From cdcc9adb4474616beb487504bd24862597b1c722 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 21 Mar 2025 15:48:22 -0300 Subject: [PATCH 001/472] Initial commit --- include/nbl/builtin/hlsl/math/morton.hlsl | 36 +++++++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 37 insertions(+) create mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl new file mode 100644 index 0000000000..22c56f8999 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -0,0 +1,36 @@ +#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +template) +struct code +{ + using this_t = code; + using U = make_unsigned; + + static this_t create(vector cartesian) + { + //... TODO ... + return this_t(); + } + + //operator+, operator-, operator>>, operator<<, and other bitwise ops + + U value; +}; + +} //namespace morton +} //namespace hlsl +} //namespace nbl + + + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 291ee64bad..14e5fe67db 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -289,6 +289,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl" LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/morton.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") From 5fe6c0837ff53d156b9fc0500f3899c6c1c546c6 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Sun, 23 Mar 2025 19:30:10 -0300 Subject: [PATCH 002/472] CHeckpoint before master merge --- examples_tests | 2 +- include/nbl/builtin/hlsl/math/morton.hlsl | 54 ++++++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 91dc3afe4c..f2ea51d0b3 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 91dc3afe4c66e5bdfd313ec37e7e1863daa52116 +Subproject commit f2ea51d0b3e3388c0f9bae03602ec3b1f658c124 diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 22c56f8999..bf339f4d6f 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -10,12 +10,64 @@ namespace hlsl namespace morton { -template) +namespace impl +{ + +template +struct decode_mask; + +template +struct decode_mask : integral_constant {}; + +template +struct decode_mask : integral_constant::value << Dim) | T(1)> {}; + +template +NBL_CONSTEXPR T decode_mask_v = decode_mask::value; + +// Compile-time still a bit primitive in HLSL, we can support arbitrary-dimensional morton codes in C++ but HLSL's have to be hand coded +template +struct decode_masks_array; + +#ifndef __HLSL_VERSION + +template +struct decode_masks_array +{ + static consteval vector generateMasks() + { + vector masks; + for (auto i = 0u; i < Dim; i++) + { + masks[i] = decode_mask_v << T(i); + } + return masks; + } + + NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); +}; + +#else +template +struct decode_masks_array +{ + NBL_CONSTEXPR_STATIC_INLINE vector Masks = vector(decode_mask_v, decode_mask_v << T(1)); +}; +//template +//NBL_CONSTEXPR_STATIC_INLINE vector decode_masks_array::Masks = vector(decode_mask_v, decode_mask_v << T(1)); +#endif + +} //namespace impl + + +template && 1 < D && D < 5) struct code { using this_t = code; using U = make_unsigned; + + static this_t create(vector cartesian) { //... TODO ... From f18b2fa2925cd7f5c5cc94a808cc518b0bd9baaa Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 17:21:37 -0300 Subject: [PATCH 003/472] Checkpoint before merging new type_traits change --- include/nbl/builtin/hlsl/math/morton.hlsl | 56 +++++++++++++++-------- include/nbl/builtin/hlsl/type_traits.hlsl | 6 +++ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index bf339f4d6f..22081e2b7f 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -13,7 +13,7 @@ namespace morton namespace impl { -template +template struct decode_mask; template @@ -22,15 +22,11 @@ struct decode_mask : integral_constant {}; template struct decode_mask : integral_constant::value << Dim) | T(1)> {}; +#ifndef __HLSL_VERSION + template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -// Compile-time still a bit primitive in HLSL, we can support arbitrary-dimensional morton codes in C++ but HLSL's have to be hand coded -template -struct decode_masks_array; - -#ifndef __HLSL_VERSION - template struct decode_masks_array { @@ -47,31 +43,50 @@ struct decode_masks_array NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); }; -#else -template -struct decode_masks_array -{ - NBL_CONSTEXPR_STATIC_INLINE vector Masks = vector(decode_mask_v, decode_mask_v << T(1)); -}; -//template -//NBL_CONSTEXPR_STATIC_INLINE vector decode_masks_array::Masks = vector(decode_mask_v, decode_mask_v << T(1)); +template +NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; + #endif } //namespace impl +// HLSL only supports up to D = 4, and even then having this in a more generic manner is blocked by a DXC issue targeting SPIR-V +#ifndef __HLSL_VERSION + +#define NBL_HLSL_MORTON_MASKS(U, D) impl::decode_masks< U , D > + +#else + +// Up to D = 4 supported +#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask< U , D >::value,\ + impl::decode_mask< U , D >::value << U (1),\ + impl::decode_mask< U , D >::value << U (2),\ + impl::decode_mask< U , D >::value << U (3)\ + ) +#endif + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it template && 1 < D && D < 5) struct code { using this_t = code; - using U = make_unsigned; - + using U = make_unsigned_t; +#ifdef __HLSL_VERSION + _Static_assert(is_same_v, + "make_signed requires that T shall be a (possibly cv-qualified) " + "integral type or enumeration but not a bool type."); +#endif static this_t create(vector cartesian) { - //... TODO ... - return this_t(); + NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(I, D); + printf("%d %d %d %d", Masks[0], Masks[1], Masks[2], Masks[3]); + this_t foo; + foo.value = U(0); + return foo; } //operator+, operator-, operator>>, operator<<, and other bitwise ops @@ -79,6 +94,9 @@ struct code U value; }; +// Don't forget to delete this macro after usage +#undef NBL_HLSL_MORTON_MASKS + } //namespace morton } //namespace hlsl } //namespace nbl diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 708f643ab0..222dbcdb7c 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -688,6 +688,12 @@ NBL_CONSTEXPR uint64_t extent_v = extent::value; template using make_void_t = typename make_void::type; +template +using make_signed_t = typename make_signed::type; + +template +using make_unsigned_t = typename make_unsigned::type; + template struct conditional_value { From 4ebc555d320cc3e678095d72437e07721dc1441b Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 19:18:49 -0300 Subject: [PATCH 004/472] Works, but throws DXC warning --- include/nbl/builtin/hlsl/math/morton.hlsl | 25 +++++++++-------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 22081e2b7f..058bdad862 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -22,11 +22,11 @@ struct decode_mask : integral_constant {}; template struct decode_mask : integral_constant::value << Dim) | T(1)> {}; -#ifndef __HLSL_VERSION - template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +#ifndef __HLSL_VERSION + template struct decode_masks_array { @@ -58,10 +58,11 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; #else // Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask< U , D >::value,\ - impl::decode_mask< U , D >::value << U (1),\ - impl::decode_mask< U , D >::value << U (2),\ - impl::decode_mask< U , D >::value << U (3)\ +// This will throw a DXC warning about the vector being truncated - no way around that +#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask_v< U , D >,\ + impl::decode_mask_v< U , D > << U (1),\ + impl::decode_mask_v< U , D > << U (2),\ + impl::decode_mask_v< U , D > << U (3)\ ) #endif @@ -74,18 +75,12 @@ struct code using this_t = code; using U = make_unsigned_t; -#ifdef __HLSL_VERSION - _Static_assert(is_same_v, - "make_signed requires that T shall be a (possibly cv-qualified) " - "integral type or enumeration but not a bool type."); -#endif - static this_t create(vector cartesian) { - NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(I, D); - printf("%d %d %d %d", Masks[0], Masks[1], Masks[2], Masks[3]); + NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + printf("%u %u %u %u", Masks[0], Masks[1], Masks[2]); this_t foo; - foo.value = U(0); + foo.value = Masks[0]; return foo; } From 55a2ef637ca12c6c35b6f8001db6f619acfc2315 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 19:41:14 -0300 Subject: [PATCH 005/472] Added concept for valid morton dimensions --- include/nbl/builtin/hlsl/math/morton.hlsl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 058bdad862..99980284e9 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -13,6 +13,19 @@ namespace morton namespace impl { +// Valid dimension for a morton code +#ifndef __HLSL_VERSION + +template +NBL_BOOL_CONCEPT MortonDimension = D > 1; + +#else + +template +NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; + +#endif + template struct decode_mask; @@ -69,7 +82,7 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && 1 < D && D < 5) +template && impl::MortonDimension) struct code { using this_t = code; @@ -78,7 +91,6 @@ struct code static this_t create(vector cartesian) { NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - printf("%u %u %u %u", Masks[0], Masks[1], Masks[2]); this_t foo; foo.value = Masks[0]; return foo; From f5162561ee2203aa51c8c600aed225d679c9408d Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 21:28:07 -0300 Subject: [PATCH 006/472] Creation from vector working as intended --- include/nbl/builtin/hlsl/math/morton.hlsl | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 99980284e9..aab8511b95 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ #include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" namespace nbl { @@ -88,12 +89,22 @@ struct code using this_t = code; using U = make_unsigned_t; - static this_t create(vector cartesian) + static this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t foo; - foo.value = Masks[0]; - return foo; + NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + const vector unsignedCartesian = bit_cast, vector >(cartesian); + U val = U(0); + [[unroll]] + // We want to interleave the bits of each number in `unsignedCartesian`. We do this by enumerating + // val[0] = bit 0 of unsignedCartesian[0], val[1] = bit 0 of unsignedCartesian[1], ..., val[D-1] = bit 0 of unsignedCartesian[D-1], + // val[D] = bit 1 of unsignedCartesian[0], val[D+1] = bit 1 of unsignedCartesian[1], ..., val[2D-1] = bit 1 of unsignedCartesian[D-1] + // and so on until we get val[BitDwidth - 1] and stop. + for (U i = U(0); i < BitWidth; i++) + { + val |= (unsignedCartesian[i % D] & (U(1) << (i / D))) << (i - (i / D)); + } + this_t retVal = {val}; + return retVal; } //operator+, operator-, operator>>, operator<<, and other bitwise ops From 534d81bfc2ab1136d959a41ecee521990115d7bb Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 26 Mar 2025 13:05:20 -0300 Subject: [PATCH 007/472] Added some extra macro specifiers, vector truncation with no warnings on HLSL side by specializing , a bunch of morton operators --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 8 +- .../nbl/builtin/hlsl/cpp_compat/vector.hlsl | 30 +++ include/nbl/builtin/hlsl/math/morton.hlsl | 181 ++++++++++++++++-- 3 files changed, 198 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 3802bd69ea..a93727815b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -40,8 +40,11 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline +#define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr +#define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline +#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_FUNC +#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_STATIC #define NBL_CONST_MEMBER_FUNC const namespace nbl::hlsl @@ -70,8 +73,11 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static +#define NBL_CONSTEXPR_STATIC_FUNC static #define NBL_CONSTEXPR_INLINE_FUNC inline +#define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline +#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_CONSTEXPR_STATIC_INLINE_FUNC #define NBL_CONST_MEMBER_FUNC namespace nbl diff --git a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl index 354937427a..f6ced52db1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl @@ -1,6 +1,8 @@ #ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat/basic.h" + // stuff for C++ #ifndef __HLSL_VERSION #include @@ -92,4 +94,32 @@ struct blake3_hasher::update_impl,Dummy> } #endif } + +// To prevent implicit truncation warnings +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + +template +struct static_cast_helper, vector > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) + { + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < N; i++) + { + retVal[i] = val[i]; + } + return retVal; + } +}; + +} +} +} + #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index aab8511b95..ecd94ce69e 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -3,6 +3,8 @@ #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" namespace nbl { @@ -15,18 +17,9 @@ namespace impl { // Valid dimension for a morton code -#ifndef __HLSL_VERSION - -template -NBL_BOOL_CONCEPT MortonDimension = D > 1; - -#else - template NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; -#endif - template struct decode_mask; @@ -73,11 +66,12 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; // Up to D = 4 supported // This will throw a DXC warning about the vector being truncated - no way around that -#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask_v< U , D >,\ +// The only way to avoid this atm (until they fix issue 7006 below) is to wrap the whole class in a macro and expand it for each possible value of `D` +#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ impl::decode_mask_v< U , D > << U (1),\ impl::decode_mask_v< U , D > << U (2),\ impl::decode_mask_v< U , D > << U (3)\ - ) + )) #endif @@ -88,25 +82,134 @@ struct code { using this_t = code; using U = make_unsigned_t; + NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION - static this_t create(NBL_CONST_REF_ARG(vector) cartesian) + code() = default; + + // To immediately get compound operators and functional structs in CPP side + code(const I _value) : value(bit_cast(_value)){} + + #endif + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); const vector unsignedCartesian = bit_cast, vector >(cartesian); U val = U(0); + [[unroll]] - // We want to interleave the bits of each number in `unsignedCartesian`. We do this by enumerating - // val[0] = bit 0 of unsignedCartesian[0], val[1] = bit 0 of unsignedCartesian[1], ..., val[D-1] = bit 0 of unsignedCartesian[D-1], - // val[D] = bit 1 of unsignedCartesian[0], val[D+1] = bit 1 of unsignedCartesian[1], ..., val[2D-1] = bit 1 of unsignedCartesian[D-1] - // and so on until we get val[BitDwidth - 1] and stop. - for (U i = U(0); i < BitWidth; i++) + for (U dim = 0; dim < U(D); dim++) { - val |= (unsignedCartesian[i % D] & (U(1) << (i / D))) << (i - (i / D)); + [[unroll]] + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer + for (U valBit = dim, coordBit = U(1), shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= 1, shift += U(D) - 1) + { + val |= (unsignedCartesian[dim] & coordBit) << shift; + } } - this_t retVal = {val}; + + this_t retVal; + retVal.value = val; return retVal; } + // CPP can also have a constructor + #ifndef __HLSL_VERSION + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + code(NBL_CONST_REF_ARG(vector) cartesian) + { + *this = create(cartesian); + } + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + explicit operator vector() const noexcept + { + // Definition below, we override `impl::static_cast_helper` to have this conversion in both CPP/HLSL + return _static_cast, this_t>(*this); + } + + #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value << bits; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value >> bits; + return retVal; + } + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC + { + this_t allOnes; + // allOnes encodes a cartesian coordinate with all values set to 1 + allOnes.value = (U(1) << D) - U(1); + // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 + return operator~() + allOnes; + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + + //operator+, operator-, operator>>, operator<<, and other bitwise ops U value; @@ -116,6 +219,44 @@ struct code #undef NBL_HLSL_MORTON_MASKS } //namespace morton + +namespace impl +{ + +template +struct static_cast_helper, morton::code > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) + { + using U = typename morton::code::U; + NBL_CONSTEXPR_STATIC U BitWidth = morton::code::BitWidth; + // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits + // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift + // at the end to preserve sign. + // To this end, we first notice that the coordinate/dimension of index `dim` gets + // `bits(dim) = ceil((BitWidth - dim)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not + // divide `BitWidth perfectly`). + // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones + // by shifting everything `BitWidth - bits(dim)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. + + vector cartesian; + for (U dim = 0; dim < U(D); dim++) + { + const U bitsDim = (BitWidth - dim + U(D) - 1) / U(D); // <- this computes the ceil + U coordVal = U(0); + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer + for (U valBit = dim, coordBit = U(1) << dim, shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= U(D), shift += U(D) - 1) + { + coordVal |= (val.value & coordBit) << (BitWidth - bitsDim - shift); + } + cartesian[dim] = (bit_cast(coordVal) >> (BitWidth - bitsDim)); + } + return cartesian; + } +}; + +} // namespace impl + } //namespace hlsl } //namespace nbl From 625639031599374d44e8f8a6a79570471f0f4a9c Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 26 Mar 2025 14:53:42 -0300 Subject: [PATCH 008/472] Add safe copile-time vector truncation and some function specifiers for both cpp and hlsl --- include/nbl/builtin/hlsl/cpp_compat.hlsl | 3 + include/nbl/builtin/hlsl/cpp_compat/basic.h | 66 +++++++++---------- .../hlsl/cpp_compat/impl/vector_impl.hlsl | 35 ++++++++++ .../nbl/builtin/hlsl/cpp_compat/vector.hlsl | 30 --------- include/nbl/builtin/hlsl/math/morton.hlsl | 34 ---------- src/nbl/builtin/CMakeLists.txt | 1 + 6 files changed, 72 insertions(+), 97 deletions(-) create mode 100644 include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index 175a3e76c1..cb06447aa1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -6,4 +6,7 @@ #include #include +// Had to push some stuff here to avoid circular dependencies +#include + #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index a93727815b..41e920e41e 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -2,35 +2,7 @@ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_BASIC_INCLUDED_ #include - -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -template -struct static_cast_helper -{ - static inline To cast(From u) - { -#ifndef __HLSL_VERSION - return static_cast(u); -#else - return To(u); -#endif - } -}; -} - -template -inline To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - -} -} +#include #ifndef __HLSL_VERSION #include @@ -43,8 +15,7 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline -#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_FUNC -#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_STATIC +#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const namespace nbl::hlsl @@ -68,6 +39,7 @@ namespace nbl::hlsl #else + #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC @@ -77,8 +49,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline -#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_CONSTEXPR_STATIC_INLINE_FUNC -#define NBL_CONST_MEMBER_FUNC +#define NBL_CONST_MEMBER_FUNC namespace nbl { @@ -106,4 +77,33 @@ struct add_pointer #endif +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } +}; +} + +template +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +{ +return impl::static_cast_helper::cast(v); +} + +} +} + #endif diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl new file mode 100644 index 0000000000..524d1fa45e --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl @@ -0,0 +1,35 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ + +#include +#include +#include + +// To prevent implicit truncation warnings +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + +template NBL_PARTIAL_REQ_TOP(N <= M) +struct static_cast_helper, vector NBL_PARTIAL_REQ_BOT(N <= M) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) + { + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < N; i++) + { + retVal[i] = val[i]; + } + return retVal; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl index f6ced52db1..354937427a 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl @@ -1,8 +1,6 @@ #ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ -#include "nbl/builtin/hlsl/cpp_compat/basic.h" - // stuff for C++ #ifndef __HLSL_VERSION #include @@ -94,32 +92,4 @@ struct blake3_hasher::update_impl,Dummy> } #endif } - -// To prevent implicit truncation warnings -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ - -template -struct static_cast_helper, vector > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) - { - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < N; i++) - { - retVal[i] = val[i]; - } - return retVal; - } -}; - -} -} -} - #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index ecd94ce69e..50cf78caae 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -32,49 +32,15 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -#ifndef __HLSL_VERSION - -template -struct decode_masks_array -{ - static consteval vector generateMasks() - { - vector masks; - for (auto i = 0u; i < Dim; i++) - { - masks[i] = decode_mask_v << T(i); - } - return masks; - } - - NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); -}; - -template -NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; - -#endif - } //namespace impl -// HLSL only supports up to D = 4, and even then having this in a more generic manner is blocked by a DXC issue targeting SPIR-V -#ifndef __HLSL_VERSION - -#define NBL_HLSL_MORTON_MASKS(U, D) impl::decode_masks< U , D > - -#else - // Up to D = 4 supported -// This will throw a DXC warning about the vector being truncated - no way around that -// The only way to avoid this atm (until they fix issue 7006 below) is to wrap the whole class in a macro and expand it for each possible value of `D` #define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ impl::decode_mask_v< U , D > << U (1),\ impl::decode_mask_v< U , D > << U (2),\ impl::decode_mask_v< U , D > << U (3)\ )) -#endif - // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it template && impl::MortonDimension) diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 2e68d1fdf7..fa548e210a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -248,6 +248,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/vector_impl.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") From 246cefc422e8ef7b36cd22c90a1f695d643c3b45 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:44:44 -0300 Subject: [PATCH 009/472] Morton class done! --- include/nbl/builtin/hlsl/math/morton.hlsl | 241 +++++++++++++++++++--- 1 file changed, 215 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 50cf78caae..dfe53c3446 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -68,18 +68,17 @@ struct code */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); const vector unsignedCartesian = bit_cast, vector >(cartesian); U val = U(0); [[unroll]] - for (U dim = 0; dim < U(D); dim++) + for (U coord = 0; coord < U(D); coord++) { [[unroll]] - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer - for (U valBit = dim, coordBit = U(1), shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= 1, shift += U(D) - 1) + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer + for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) { - val |= (unsignedCartesian[dim] & coordBit) << shift; + val |= (unsignedCartesian[coord] & coordBit) << shift; } } @@ -112,6 +111,68 @@ struct code #endif + // --------------------------------------------------------- AUX METHODS ------------------------------------------------------------------- + + /** + * @brief Extracts a single coordinate + * + * @param [in] coord The coordinate to extract + */ + NBL_CONSTEXPR_INLINE_FUNC I getCoordinate(uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits + // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift + // at the end to preserve sign. + // To this end, we first notice that the coordinate of index `coord` gets + // `bits(coord) = ceil((BitWidth - coord)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not + // divide `BitWidth perfectly`). + // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones + // by shifting everything `BitWidth - bits(coord)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. + + const U bitsCoord = BitWidth / U(D) + ((coord < BitWidth % D) ? U(1) : U(0)); // <- this computes the ceil + U coordVal = U(0); + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer + [[unroll]] + for (U valBitIdx = U(coord), coordBit = U(1) << U(coord), shift = U(coord); valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= U(D), shift += U(D) - 1) + { + coordVal |= (value & coordBit) << (BitWidth - bitsCoord - shift); + } + return bit_cast(coordVal) >> (BitWidth - bitsCoord); + } + + /** + * @brief Returns an element of type U with the highest bit of the number encoded in `coord` set to its right value, and all other bits set to 0 + * + * @param [in] coord The coordinate whose highest bit we want to get + */ + /* + NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); + // This is the index of that bit as an index in the encoded value + const U shift = coordHighestBitIdx * U(D) + U(coord); + return value & (U(1) << shift); + } + */ + + /** + * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord + * (for each value) has its highest bit set to 1. + * + * @param [in] coord The coordinate whose highest bit we want to get + */ + NBL_CONSTEXPR_INLINE_FUNC U logicalOrHighestBits(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); + // This is the index of that bit as an index in the encoded value + const U shift = coordHighestBitIdx * U(D) + U(coord); + return (value | rhs.value) & (U(1) << shift); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -174,9 +235,153 @@ struct code // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + this_t retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + { + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + retVal.value |= ((value | (~Masks[coord])) + (rhs.value & Masks[coord])) & Masks[coord]; + } + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + this_t retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + { + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + retVal.value |= ((value & Masks[coord]) - (rhs.value & Masks[coord])) & Masks[coord]; + } + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC bool operator!() NBL_CONST_MEMBER_FUNC + { + return value.operator!(); + } - //operator+, operator-, operator>>, operator<<, and other bitwise ops + NBL_CONSTEXPR_INLINE_FUNC bool coordEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + return (value & Masks[coord]) == (rhs.value & Masks[coord]); + } + + NBL_CONSTEXPR_INLINE_FUNC vector operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + vector retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + retVal[coord] = coordEquals(rhs, coord); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC bool allEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordNotEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return !coordEquals(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC vector operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + vector retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + retVal[coord] = coordNotEquals(rhs, coord); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return ! allEqual(rhs); + } + + + + template + NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + Comparison comparison; + OppositeComparison oppositeComparison; + + // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison + #ifndef __HLSL_VERSION + if constexpr (is_unsigned_v) + #else + if (is_unsigned_v) + #endif + { + return comparison(value & Masks[coord], rhs.value & Masks[coord]); + } + // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit + else + { + // I will give an example for the case of `Comparison` being `functional::less`, but other cases are similar + // If both are negative (both bits set to 1) then `x < y` iff `z > w` when `z,w` are the bit representations of `x,y` as unsigned + // If this is nonnegative and rhs is negative, it should return false. Since in this case `highestBit = 0` and `rhsHighestBit = 1` this + // is the same as doing `z > w` again + // If this is negative and rhs is nonnegative, it should return true. But in this case we have `highestBit = 1` and `rhsHighestBit = 0` + // so again we can just return `z > w`. + // All three cases end up in the same expression. + if (logicalOrHighestBits(rhs, coord)) + return oppositeComparison(value & Masks[coord], rhs.value & Masks[coord]); + // If neither of them have their highest bit set, both are nonnegative. Therefore, we can return the unsigned comparison + else + return comparison(value & Masks[coord], rhs.value & Masks[coord]); + } + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, greater >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, greater_equal >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, less >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, less_equal >(rhs, coord); + } + + #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ + { \ + vector retVal; \ + [[unroll]] \ + for (uint16_t coord = 0; coord < D; coord++) \ + retVal[coord] = COMPARISON (rhs, coord); \ + return retVal; \ + } + + DEFINE_OPERATOR(< , coordLessThan); + DEFINE_OPERATOR(<= , coordLessThanEquals); + DEFINE_OPERATOR(> , coordGreaterThan); + DEFINE_OPERATOR(>= , coordGreaterThanEquals); U value; }; @@ -186,6 +391,7 @@ struct code } //namespace morton +// Still in nbl::hlsl we can go to nbl::hlsl::impl and specialize the `static_cast_helper` namespace impl { @@ -194,28 +400,11 @@ struct static_cast_helper, morton::code > { NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) { - using U = typename morton::code::U; - NBL_CONSTEXPR_STATIC U BitWidth = morton::code::BitWidth; - // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits - // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift - // at the end to preserve sign. - // To this end, we first notice that the coordinate/dimension of index `dim` gets - // `bits(dim) = ceil((BitWidth - dim)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not - // divide `BitWidth perfectly`). - // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones - // by shifting everything `BitWidth - bits(dim)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. - vector cartesian; - for (U dim = 0; dim < U(D); dim++) + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) { - const U bitsDim = (BitWidth - dim + U(D) - 1) / U(D); // <- this computes the ceil - U coordVal = U(0); - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer - for (U valBit = dim, coordBit = U(1) << dim, shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= U(D), shift += U(D) - 1) - { - coordVal |= (val.value & coordBit) << (BitWidth - bitsDim - shift); - } - cartesian[dim] = (bit_cast(coordVal) >> (BitWidth - bitsDim)); + cartesian[coord] = val.getCoordinate(coord); } return cartesian; } From 1c7f7911e416c8ec42ba3055b9da9a9da900d23f Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:48:35 -0300 Subject: [PATCH 010/472] Remove some leftover commented code --- include/nbl/builtin/hlsl/math/morton.hlsl | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index dfe53c3446..153ec08bf0 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -140,23 +140,6 @@ struct code return bit_cast(coordVal) >> (BitWidth - bitsCoord); } - /** - * @brief Returns an element of type U with the highest bit of the number encoded in `coord` set to its right value, and all other bits set to 0 - * - * @param [in] coord The coordinate whose highest bit we want to get - */ - /* - NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); - // This is the index of that bit as an index in the encoded value - const U shift = coordHighestBitIdx * U(D) + U(coord); - return value & (U(1) << shift); - } - */ - /** * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord * (for each value) has its highest bit set to 1. From 508879948064ff01c05a9e1f2166d2261c17697f Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:56:57 -0300 Subject: [PATCH 011/472] Remove leaking macro --- include/nbl/builtin/hlsl/math/morton.hlsl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 153ec08bf0..4dc05738b6 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -366,6 +366,8 @@ struct code DEFINE_OPERATOR(> , coordGreaterThan); DEFINE_OPERATOR(>= , coordGreaterThanEquals); + #undef DEFINE_OPERATOR + U value; }; From e25a35cce8f0554baf98173f9cc1d1dd93629042 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 28 Mar 2025 20:16:00 -0300 Subject: [PATCH 012/472] Bugfixes with arithmetic --- include/nbl/builtin/hlsl/math/morton.hlsl | 108 +++++++++++++--------- 1 file changed, 63 insertions(+), 45 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 4dc05738b6..89d1a99749 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -57,7 +57,7 @@ struct code code() = default; // To immediately get compound operators and functional structs in CPP side - code(const I _value) : value(bit_cast(_value)){} + code(const U _value) : value(_value) {} #endif @@ -69,7 +69,7 @@ struct code NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { const vector unsignedCartesian = bit_cast, vector >(cartesian); - U val = U(0); + this_t retVal = { U(0) }; [[unroll]] for (U coord = 0; coord < U(D); coord++) @@ -78,12 +78,10 @@ struct code // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) { - val |= (unsignedCartesian[coord] & coordBit) << shift; + retVal.value |= (unsignedCartesian[coord] & coordBit) << shift; } } - this_t retVal; - retVal.value = val; return retVal; } @@ -141,48 +139,43 @@ struct code } /** - * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord - * (for each value) has its highest bit set to 1. + * @brief Returns an element of type U by extracting only the highest bit (of the bits used to encode `coord`) * - * @param [in] coord The coordinate whose highest bit we want to get + * @param [in] coord The coordinate whose highest bit we want to extract. */ - NBL_CONSTEXPR_INLINE_FUNC U logicalOrHighestBits(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC { // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these // bits is `bits(coord) - 1` const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); // This is the index of that bit as an index in the encoded value const U shift = coordHighestBitIdx * U(D) + U(coord); - return (value | rhs.value) & (U(1) << shift); + return value & (U(1) << shift); } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value & rhs.value; + this_t retVal = { value & rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value | rhs.value; + this_t retVal = { value | rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value ^ rhs.value; + this_t retVal = { value ^ rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = ~value; + this_t retVal = { ~value }; return retVal; } @@ -191,15 +184,13 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value << bits; + this_t retVal = { value << U(bits) }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value >> bits; + this_t retVal = { value >> U(bits) }; return retVal; } @@ -209,19 +200,20 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC { - this_t allOnes; // allOnes encodes a cartesian coordinate with all values set to 1 - allOnes.value = (U(1) << D) - U(1); + const static this_t allOnes = { (U(1) << D) - U(1) }; // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 return operator~() + allOnes; } // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + // CHANGED FOR DEBUG: REMEMBER TO CHANGE BACK + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal; + this_t retVal = { U(0) }; [[unroll]] for (uint16_t coord = 0; coord < D; coord++) { @@ -240,7 +232,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal; + this_t retVal = { U(0) }; [[unroll]] for (uint16_t coord = 0; coord < D; coord++) { @@ -293,17 +285,15 @@ struct code NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return ! allEqual(rhs); + return !allEqual(rhs); } - - - template + template NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); Comparison comparison; - OppositeComparison oppositeComparison; + OnSignMismatch onSignMismatch; // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison #ifndef __HLSL_VERSION @@ -317,39 +307,67 @@ struct code // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit else { - // I will give an example for the case of `Comparison` being `functional::less`, but other cases are similar - // If both are negative (both bits set to 1) then `x < y` iff `z > w` when `z,w` are the bit representations of `x,y` as unsigned - // If this is nonnegative and rhs is negative, it should return false. Since in this case `highestBit = 0` and `rhsHighestBit = 1` this - // is the same as doing `z > w` again - // If this is negative and rhs is nonnegative, it should return true. But in this case we have `highestBit = 1` and `rhsHighestBit = 0` - // so again we can just return `z > w`. - // All three cases end up in the same expression. - if (logicalOrHighestBits(rhs, coord)) - return oppositeComparison(value & Masks[coord], rhs.value & Masks[coord]); - // If neither of them have their highest bit set, both are nonnegative. Therefore, we can return the unsigned comparison + // I will give an example for `operator<` but the same reasoning holds for all others. Some abuse of notation but hopefully it's clear. + + // If `this[coord] >= 0` and `rhs[coord] < 0` then `this[coord] < rhs[coord]` returns false. Notice that in this case, the highest bit of + // `value` (of the bits representing the number encoded in `coord`) is `0`, while the highest bit for rhs is `1`. + // Similarly, if `this[coord] < 0` and `rhs[coord] >= 0` then `this[coord] < rhs[coord]` returns true, and the highest bit situation is inverted. + // This means that if the signs of `this[coord]` and `rhs[coord]` are not equal, the result depends on the sign of `this[coord]`. + // What that result should be is controlled by `OnSignMismatch`. + // Finally, notice that if only one of those bits is set to 1, then the `xor` of that highest bit yields 1 as well + const U highestBit = extractHighestBit(coord); + const U rhsHighestBit = rhs.extractHighestBit(coord); + if (highestBit ^ rhsHighestBit) + return onSignMismatch(highestBit); + // If both are nonnegative, then we can just use the comparison as it comes. + // If both are negative, it just so happens that applying the same operator to their unsigned bitcasted representations yields the same result. + // For `operator<`, for example, consider two negative numbers. Starting from the MSB (we know it's `1` for both in this case) and moving to the right, + // consider what happens when we encounter the first bit where they mismatch: the one with a `0` at position `k` (by position I mean counted from the + // left, starting at 0) is adding at most `2^k - 1` in the lowest bits, while the one with a `1` is adding exactly `2^k`. This means that the one + // with a 0 is "more negative". else return comparison(value & Masks[coord], rhs.value & Masks[coord]); } } + + struct OnSignMismatchLessThan + { + // On a sign mismatch, `thisrhs` is true if this is non-negative (`highestBit` set to `0`) and false otherwise + // Therefore since it takes a number with only the highest bit set we only have to return the opposite of whether there is in fact a bit set + bool operator()(U highestBit) + { + return !bool(highestBit); + } + }; NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, greater >(rhs, coord); + return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, greater_equal >(rhs, coord); + return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, less >(rhs, coord); + return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, less_equal >(rhs, coord); + return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); } #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ From 0d9dd4afa6190dd029cf0e8e311ec132a818ec4a Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 1 Apr 2025 15:25:38 -0300 Subject: [PATCH 013/472] Checkpoint, have to check why vector compat isn't working --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 6 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 34 ++ .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 13 + .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 153 +++++++ include/nbl/builtin/hlsl/functional.hlsl | 34 +- include/nbl/builtin/hlsl/math/morton.hlsl | 423 ------------------ include/nbl/builtin/hlsl/morton.hlsl | 72 +++ .../builtin/hlsl/spirv_intrinsics/core.hlsl | 3 +- src/nbl/builtin/CMakeLists.txt | 4 +- 9 files changed, 311 insertions(+), 431 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/uint64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl create mode 100644 include/nbl/builtin/hlsl/morton.hlsl diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 41e920e41e..77d9d887bd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -17,6 +17,7 @@ #define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) namespace nbl::hlsl { @@ -49,7 +50,8 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline -#define NBL_CONST_MEMBER_FUNC +#define NBL_CONST_MEMBER_FUNC +#define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) namespace nbl { @@ -100,7 +102,7 @@ struct static_cast_helper template NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) { -return impl::static_cast_helper::cast(v); + return impl::static_cast_helper::cast(v); } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 1d43d9b14a..7b8726566f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -103,6 +103,10 @@ template struct nMax_helper; template struct nClamp_helper; +template +struct addCarry_helper; +template +struct subBorrow_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -162,6 +166,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_hel template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMax_helper, nMax, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMin_helper, nMin, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, (T), (T)(T), T) +// Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitCount_helper, bitCount, (T), (T), BITCOUNT_HELPER_RETRUN_TYPE) @@ -599,6 +606,33 @@ struct nClamp_helper } }; +// Once again no need to restrict the two below with concepts for same reason as HLSL version +template +struct addCarry_helper +{ + using return_t = spirv::AddCarryOutput; + NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = operand1 + operand2; + retVal.carry = retVal.result < operand1 ? T(1) : T(0); + return retVal; + } +}; + +template +struct subBorrow_helper +{ + using return_t = spirv::SubBorrowOutput; + NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = static_cast(operand1 - operand2); + retVal.borrow = operand1 >= operand2 ? T(0) : T(1); + return retVal; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index b695c4b82b..1f1957dbbd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -217,6 +217,19 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A return cpp_compat_intrinsics_impl::refract_helper::__call(I, N, eta); } +template +NBL_CONSTEXPR_INLINE_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); +} + +template +NBL_CONSTEXPR_INLINE_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); +} + + #ifdef __HLSL_VERSION #define NAMESPACE spirv #else diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl new file mode 100644 index 0000000000..3178159794 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -0,0 +1,153 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +struct emulated_uint64_t +{ + using storage_t = vector; + using this_t = emulated_uint64_t; + + storage_t data; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_uint64_t() = default; + + // To immediately get compound operators and functional structs in CPP side + explicit emulated_uint64_t(const storage_t _data) : data(_data) {} + + #endif + + /** + * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + this_t retVal; + retVal.data = _data; + return retVal; + } + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data & rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data | rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data ^ rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(~data); + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); + const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const this_t retVal = create(addResult); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); + const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const this_t retVal = create(subResult); + return retVal; + } + +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + const uint32_t higherBitsMask = ~uint32_t(0) << shift; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> _bits, ((operand.data.x & lowerBitsMask) << shift) | (operand.data.y >> _bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + +} //namespace nbl +} //namespace hlsl + + + +#endif diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 25d822a940..3cf24193a4 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -165,7 +165,7 @@ COMPOUND_ASSIGN(divides) // ----------------- End of compound assignment ops ---------------- -// Min, Max and Ternary Operator don't use ALIAS_STD because they don't exist in STD +// Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs struct minimum @@ -200,13 +200,39 @@ struct ternary_operator { using type_t = T; - T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_INLINE_FUNC T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) { return condition ? lhs : rhs; } }; -} -} +template +struct left_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } +}; + +template +struct arithmetic_right_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } +}; + +// Declare template, but left unimplemented by default +template +struct logical_right_shift_operator; + +} //namespace nbl +} //namespace hlsl #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl deleted file mode 100644 index 89d1a99749..0000000000 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ /dev/null @@ -1,423 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ - -#include "nbl/builtin/hlsl/concepts/core.hlsl" -#include "nbl/builtin/hlsl/bit.hlsl" -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/functional.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace morton -{ - -namespace impl -{ - -// Valid dimension for a morton code -template -NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; - -template -struct decode_mask; - -template -struct decode_mask : integral_constant {}; - -template -struct decode_mask : integral_constant::value << Dim) | T(1)> {}; - -template -NBL_CONSTEXPR T decode_mask_v = decode_mask::value; - -} //namespace impl - -// Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ - impl::decode_mask_v< U , D > << U (1),\ - impl::decode_mask_v< U , D > << U (2),\ - impl::decode_mask_v< U , D > << U (3)\ - )) - -// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 -// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && impl::MortonDimension) -struct code -{ - using this_t = code; - using U = make_unsigned_t; - NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - #ifndef __HLSL_VERSION - - code() = default; - - // To immediately get compound operators and functional structs in CPP side - code(const U _value) : value(_value) {} - - #endif - - /** - * @brief Creates a Morton code from a set of cartesian coordinates - * - * @param [in] cartesian Coordinates to encode - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) - { - const vector unsignedCartesian = bit_cast, vector >(cartesian); - this_t retVal = { U(0) }; - - [[unroll]] - for (U coord = 0; coord < U(D); coord++) - { - [[unroll]] - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer - for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) - { - retVal.value |= (unsignedCartesian[coord] & coordBit) << shift; - } - } - - return retVal; - } - - // CPP can also have a constructor - #ifndef __HLSL_VERSION - - /** - * @brief Creates a Morton code from a set of cartesian coordinates - * - * @param [in] cartesian Coordinates to encode - */ - code(NBL_CONST_REF_ARG(vector) cartesian) - { - *this = create(cartesian); - } - - /** - * @brief Decodes this Morton code back to a set of cartesian coordinates - */ - explicit operator vector() const noexcept - { - // Definition below, we override `impl::static_cast_helper` to have this conversion in both CPP/HLSL - return _static_cast, this_t>(*this); - } - - #endif - - // --------------------------------------------------------- AUX METHODS ------------------------------------------------------------------- - - /** - * @brief Extracts a single coordinate - * - * @param [in] coord The coordinate to extract - */ - NBL_CONSTEXPR_INLINE_FUNC I getCoordinate(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits - // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift - // at the end to preserve sign. - // To this end, we first notice that the coordinate of index `coord` gets - // `bits(coord) = ceil((BitWidth - coord)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not - // divide `BitWidth perfectly`). - // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones - // by shifting everything `BitWidth - bits(coord)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. - - const U bitsCoord = BitWidth / U(D) + ((coord < BitWidth % D) ? U(1) : U(0)); // <- this computes the ceil - U coordVal = U(0); - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer - [[unroll]] - for (U valBitIdx = U(coord), coordBit = U(1) << U(coord), shift = U(coord); valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= U(D), shift += U(D) - 1) - { - coordVal |= (value & coordBit) << (BitWidth - bitsCoord - shift); - } - return bit_cast(coordVal) >> (BitWidth - bitsCoord); - } - - /** - * @brief Returns an element of type U by extracting only the highest bit (of the bits used to encode `coord`) - * - * @param [in] coord The coordinate whose highest bit we want to extract. - */ - NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); - // This is the index of that bit as an index in the encoded value - const U shift = coordHighestBitIdx * U(D) + U(coord); - return value & (U(1) << shift); - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value & rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value | rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value ^ rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = { ~value }; - return retVal; - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value << U(bits) }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value >> U(bits) }; - return retVal; - } - - #endif - - // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC - { - // allOnes encodes a cartesian coordinate with all values set to 1 - const static this_t allOnes = { (U(1) << D) - U(1) }; - // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 - return operator~() + allOnes; - } - - // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- - - // CHANGED FOR DEBUG: REMEMBER TO CHANGE BACK - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal = { U(0) }; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - // put 1 bits everywhere in the bits the current axis is not using - // then extract just the axis bits for the right hand coordinate - // carry-1 will propagate the bits across the already set bits - // then clear out the bits not belonging to current axis - // Note: Its possible to clear on `this` and fill on `rhs` but that will - // disable optimizations, we expect the compiler to optimize a lot if the - // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` - retVal.value |= ((value | (~Masks[coord])) + (rhs.value & Masks[coord])) & Masks[coord]; - } - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal = { U(0) }; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate - retVal.value |= ((value & Masks[coord]) - (rhs.value & Masks[coord])) & Masks[coord]; - } - return retVal; - } - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC bool operator!() NBL_CONST_MEMBER_FUNC - { - return value.operator!(); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - return (value & Masks[coord]) == (rhs.value & Masks[coord]); - } - - NBL_CONSTEXPR_INLINE_FUNC vector operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - vector retVal; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - retVal[coord] = coordEquals(rhs, coord); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC bool allEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return value == rhs.value; - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordNotEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return !coordEquals(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC vector operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - vector retVal; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - retVal[coord] = coordNotEquals(rhs, coord); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !allEqual(rhs); - } - - template - NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - Comparison comparison; - OnSignMismatch onSignMismatch; - - // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison - #ifndef __HLSL_VERSION - if constexpr (is_unsigned_v) - #else - if (is_unsigned_v) - #endif - { - return comparison(value & Masks[coord], rhs.value & Masks[coord]); - } - // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit - else - { - // I will give an example for `operator<` but the same reasoning holds for all others. Some abuse of notation but hopefully it's clear. - - // If `this[coord] >= 0` and `rhs[coord] < 0` then `this[coord] < rhs[coord]` returns false. Notice that in this case, the highest bit of - // `value` (of the bits representing the number encoded in `coord`) is `0`, while the highest bit for rhs is `1`. - // Similarly, if `this[coord] < 0` and `rhs[coord] >= 0` then `this[coord] < rhs[coord]` returns true, and the highest bit situation is inverted. - // This means that if the signs of `this[coord]` and `rhs[coord]` are not equal, the result depends on the sign of `this[coord]`. - // What that result should be is controlled by `OnSignMismatch`. - // Finally, notice that if only one of those bits is set to 1, then the `xor` of that highest bit yields 1 as well - const U highestBit = extractHighestBit(coord); - const U rhsHighestBit = rhs.extractHighestBit(coord); - if (highestBit ^ rhsHighestBit) - return onSignMismatch(highestBit); - // If both are nonnegative, then we can just use the comparison as it comes. - // If both are negative, it just so happens that applying the same operator to their unsigned bitcasted representations yields the same result. - // For `operator<`, for example, consider two negative numbers. Starting from the MSB (we know it's `1` for both in this case) and moving to the right, - // consider what happens when we encounter the first bit where they mismatch: the one with a `0` at position `k` (by position I mean counted from the - // left, starting at 0) is adding at most `2^k - 1` in the lowest bits, while the one with a `1` is adding exactly `2^k`. This means that the one - // with a 0 is "more negative". - else - return comparison(value & Masks[coord], rhs.value & Masks[coord]); - } - } - - struct OnSignMismatchLessThan - { - // On a sign mismatch, `thisrhs` is true if this is non-negative (`highestBit` set to `0`) and false otherwise - // Therefore since it takes a number with only the highest bit set we only have to return the opposite of whether there is in fact a bit set - bool operator()(U highestBit) - { - return !bool(highestBit); - } - }; - - NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); - } - - #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ - { \ - vector retVal; \ - [[unroll]] \ - for (uint16_t coord = 0; coord < D; coord++) \ - retVal[coord] = COMPARISON (rhs, coord); \ - return retVal; \ - } - - DEFINE_OPERATOR(< , coordLessThan); - DEFINE_OPERATOR(<= , coordLessThanEquals); - DEFINE_OPERATOR(> , coordGreaterThan); - DEFINE_OPERATOR(>= , coordGreaterThanEquals); - - #undef DEFINE_OPERATOR - - U value; -}; - -// Don't forget to delete this macro after usage -#undef NBL_HLSL_MORTON_MASKS - -} //namespace morton - -// Still in nbl::hlsl we can go to nbl::hlsl::impl and specialize the `static_cast_helper` -namespace impl -{ - -template -struct static_cast_helper, morton::code > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) - { - vector cartesian; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - cartesian[coord] = val.getCoordinate(coord); - } - return cartesian; - } -}; - -} // namespace impl - -} //namespace hlsl -} //namespace nbl - - - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl new file mode 100644 index 0000000000..89eddf8675 --- /dev/null +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -0,0 +1,72 @@ +#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +namespace impl +{ + +// Valid dimension for a morton code +template +NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; + +// Masks + +template +struct decode_mask; + +template +struct decode_mask : integral_constant {}; + +template +struct decode_mask : integral_constant::value << Dim) | T(1)> {}; + +template +NBL_CONSTEXPR T decode_mask_v = decode_mask::value; + +// Decode masks are different for each dimension + +template +struct MortonDecoder; + +} //namespace impl + +// Up to D = 4 supported +#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ + impl::decode_mask_v< U , D > << U (1),\ + impl::decode_mask_v< U , D > << U (2),\ + impl::decode_mask_v< U , D > << U (3)\ + )) + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth>16), conditional_t<(TotalBitWidth>32), _uint64_t, uint32_t>, uint16_t> ; + + + storage_t value; +}; + +// Don't forget to delete this macro after usage +#undef NBL_HLSL_MORTON_MASKS + +} //namespace morton +} //namespace hlsl +} //namespace nbl + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index d351cab07d..d8d90de726 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -4,13 +4,14 @@ #ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ +#include + #ifdef __HLSL_VERSION // TODO: AnastZIuk fix public search paths so we don't choke #include "spirv/unified1/spirv.hpp" #include #include #include -#include namespace nbl { diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index fa548e210a..a11a26d69a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -214,6 +214,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/uint64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable @@ -291,7 +292,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl" LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/morton.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") @@ -368,5 +368,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") #blur LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") +#morton codes +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From 89d2bf2a5d9fab347850babe31fdc8f0a95c64f6 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 2 Apr 2025 16:19:20 -0300 Subject: [PATCH 014/472] Refactor morton class, get new conversion running --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 8 +- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 11 ++ include/nbl/builtin/hlsl/morton.hlsl | 175 +++++++++++++++++- 3 files changed, 186 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 7b8726566f..92fc9e929b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -611,11 +611,11 @@ template struct addCarry_helper { using return_t = spirv::AddCarryOutput; - NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + constexpr static inline return_t __call(const T operand1, const T operand2) { return_t retVal; retVal.result = operand1 + operand2; - retVal.carry = retVal.result < operand1 ? T(1) : T(0); + retVal.carry = T(retVal.result < operand1); return retVal; } }; @@ -624,11 +624,11 @@ template struct subBorrow_helper { using return_t = spirv::SubBorrowOutput; - NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + constexpr static inline return_t __call(const T operand1, const T operand2) { return_t retVal; retVal.result = static_cast(operand1 - operand2); - retVal.borrow = operand1 >= operand2 ? T(0) : T(1); + retVal.borrow = T(operand1 < operand2); return retVal; } }; diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index 3178159794..c4f1f1ef1b 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -40,6 +40,17 @@ struct emulated_uint64_t return retVal; } + /** + * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `uint64` being emulated + * @param [in] lo Lowest 32 bits of the `uint64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return create(storage_t(hi, lo)); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 89eddf8675..d4ada29d70 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -1,11 +1,12 @@ -#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" namespace nbl { @@ -35,11 +36,177 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- + // Decode masks are different for each dimension +// Decoder works with unsigned, cast to sign depends on the Morton class +// Bit width checks happen in Morton class as well -template +template struct MortonDecoder; +// Specializations for lack of uint64_t + +template +struct MortonDecoder<2, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), // Groups bits by 1 on, 1 off + emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), // Groups bits by 2 on, 2 off + emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), // Groups bits by 4 on, 4 off + emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), // Groups bits by 8 on, 8 off + emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), // Groups bits by 16 on, 16 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) };// Groups bits by 32 on, 32 off + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<3, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), + conditional_value<(Bits <= 6), uint16_t, uint16_t(2), + conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), // Groups bits by 1 on, 2 off (also only considers 21 bits) + emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), // Groups bits by 3 on, 6 off + emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), // Groups bits by 6 on, 12 off + emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), // Groups bits by 12 on, 24 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + // First iteration is special + decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & DecodeMasks[1]; + [[unroll]] + for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 2]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<4, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), // Groups bits by 1 on, 3 off + emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), // Groups bits by 2 on, 6 off + emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), // Groups bits by 4 on, 12 off + emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), // Groups bits by 8 on, 24 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) };// Groups bits by 16 on, 48 off + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[6] = { _static_cast(0x5555555555555555), // Groups bits by 1 on, 1 off + _static_cast(0x3333333333333333), // Groups bits by 2 on, 2 off + _static_cast(0x0F0F0F0F0F0F0F0F), // Groups bits by 4 on, 4 off + _static_cast(0x00FF00FF00FF00FF), // Groups bits by 8 on, 8 off + _static_cast(0x0000FFFF0000FFFF), // Groups bits by 16 on, 16 off + _static_cast(0x00000000FFFFFFFF) };// Groups bits by 32 on, 32 off + + encode_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), + conditional_value<(Bits <= 6), uint16_t, uint16_t(2), + conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1249249249249249), // Groups bits by 1 on, 2 off (also only considers 21 bits) + _static_cast(0x01C0E070381C0E07), // Groups bits by 3 on, 6 off + _static_cast(0x0FC003F000FC003F), // Groups bits by 6 on, 12 off + _static_cast(0x0000FFF000000FFF), // Groups bits by 12 on, 24 off + _static_cast(0x0000000000FFFFFF) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + + encode_t decoded = encodedValue & DecodeMasks[0]; + // First iteration is special + decoded = (decoded | (decoded >> 2) | (decoded >> 4)) & DecodeMasks[1]; + [[unroll]] + for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 2]; + } + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1111111111111111), // Groups bits by 1 on, 3 off + _static_cast(0x0303030303030303), // Groups bits by 2 on, 6 off + _static_cast(0x000F000F000F000F), // Groups bits by 4 on, 12 off + _static_cast(0x000000FF000000FF), // Groups bits by 8 on, 24 off + _static_cast(0x000000000000FFFF) };// Groups bits by 16 on, 48 off + + encode_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded); + } +}; + } //namespace impl // Up to D = 4 supported @@ -56,7 +223,7 @@ struct code { using this_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; - using storage_t = conditional_t<(TotalBitWidth>16), conditional_t<(TotalBitWidth>32), _uint64_t, uint32_t>, uint16_t> ; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; storage_t value; From de4d0fb2f266da125d94801c5c38bd81a9260acd Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 2 Apr 2025 23:45:53 -0300 Subject: [PATCH 015/472] Add new classes for encoding/decoding of mortn codes --- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 57 ++++ include/nbl/builtin/hlsl/morton.hlsl | 287 ++++++++++++++++-- 2 files changed, 312 insertions(+), 32 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index c4f1f1ef1b..3794031c8e 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -3,6 +3,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" namespace nbl { @@ -156,6 +157,62 @@ constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) #endif +namespace impl +{ + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + const To highBits = _static_cast(u.data.x) << To(32); + return highBits | _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(_static_cast(u >> 32), _static_cast(u)); + } +}; + +} //namespace impl + } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d4ada29d70..e2e1596587 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -22,7 +22,7 @@ namespace impl template NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; -// Masks +// Basic decode masks template struct decode_mask; @@ -36,17 +36,240 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- +// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- +// Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y + +#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +{\ + NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ +}; + +#ifndef __HLSL_VERSION + +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +{\ + NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE));\ +}; + +#else + +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +{\ + NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ +};\ +NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE)); +#endif + +#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template struct morton_mask_##DIM##_##MASK ;\ + NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE)\ + NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, 0x##HEX_HIGH_VALUE##HEX_LOW_VALUE)\ + template\ + NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; + +NBL_MORTON_DECODE_MASK(2, 0, 55555555, 55555555) // Groups bits by 1 on, 1 off +NBL_MORTON_DECODE_MASK(2, 1, 33333333, 33333333) // Groups bits by 2 on, 2 off +NBL_MORTON_DECODE_MASK(2, 2, 0F0F0F0F, 0F0F0F0F) // Groups bits by 4 on, 4 off +NBL_MORTON_DECODE_MASK(2, 3, 00FF00FF, 00FF00FF) // Groups bits by 8 on, 8 off +NBL_MORTON_DECODE_MASK(2, 4, 0000FFFF, 0000FFFF) // Groups bits by 16 on, 16 off +NBL_MORTON_DECODE_MASK(2, 5, 00000000, FFFFFFFF) // Groups bits by 32 on, 32 off + +NBL_MORTON_DECODE_MASK(3, 0, 12492492, 49249249) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits +NBL_MORTON_DECODE_MASK(3, 1, 01C0E070, 381C0E07) // Groups bits by 3 on, 6 off +NBL_MORTON_DECODE_MASK(3, 2, 0FC003F0, 00FC003F) // Groups bits by 6 on, 12 off +NBL_MORTON_DECODE_MASK(3, 3, 0000FFF0, 00000FFF) // Groups bits by 12 on, 24 off +NBL_MORTON_DECODE_MASK(3, 4, 00000000, 00FFFFFF) // Groups bits by 24 on, 48 off + +NBL_MORTON_DECODE_MASK(4, 0, 11111111, 11111111) // Groups bits by 1 on, 3 off +NBL_MORTON_DECODE_MASK(4, 1, 03030303, 03030303) // Groups bits by 2 on, 6 off +NBL_MORTON_DECODE_MASK(4, 2, 000F000F, 000F000F) // Groups bits by 4 on, 12 off +NBL_MORTON_DECODE_MASK(4, 3, 000000FF, 000000FF) // Groups bits by 8 on, 24 off +NBL_MORTON_DECODE_MASK(4, 4, 00000000, 0000FFFF) // Groups bits by 16 on, 48 off + +#undef NBL_MORTON_DECODE_MASK +#undef NBL_MORTON_EMULATED_DECODE_MASK +#undef NBL_MORTON_GENERIC_DECODE_MASK + +// ----------------------------------------------------------------- MORTON ENCODERS --------------------------------------------------- + +template +struct MortonEncoder; + +template +struct MortonEncoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 16) + { + encoded = (encoded | leftShift(encoded, 16)) & morton_mask_2_4_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + encoded = (encoded | leftShift(encoded, 8)) & morton_mask_2_3_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + encoded = (encoded | leftShift(encoded, 4)) & morton_mask_2_2_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + encoded = (encoded | leftShift(encoded, 2)) & morton_mask_2_1_v; + } + encoded = (encoded | leftShift(encoded, 1)) & morton_mask_2_0_v; + return encoded; + } +}; + +template +struct MortonEncoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 12) + { + encoded = (encoded | leftShift(encoded, 24)) & morton_mask_3_3_v; + } + NBL_IF_CONSTEXPR(Bits > 6) + { + encoded = (encoded | leftShift(encoded, 12)) & morton_mask_3_2_v; + } + NBL_IF_CONSTEXPR(Bits > 3) + { + encoded = (encoded | leftShift(encoded, 6)) & morton_mask_3_1_v; + } + encoded = (encoded | leftShift(encoded, 2) | leftShift(encoded, 4)) & morton_mask_3_0_v; + return encoded; + } +}; -// Decode masks are different for each dimension -// Decoder works with unsigned, cast to sign depends on the Morton class -// Bit width checks happen in Morton class as well +template +struct MortonEncoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 8) + { + encoded = (encoded | leftShift(encoded, 24)) & morton_mask_4_3_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + encoded = (encoded | leftShift(encoded, 12)) & morton_mask_4_2_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + encoded = (encoded | leftShift(encoded, 6)) & morton_mask_4_1_v; + } + encoded = (encoded | leftShift(encoded, 3)) & morton_mask_4_0_v; + return encoded; + } +}; + +// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- template struct MortonDecoder; -// Specializations for lack of uint64_t +template +struct MortonDecoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_2_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 1)) & morton_mask_2_1_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + decoded = (decoded | rightShift(decoded, 2)) & morton_mask_2_2_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + decoded = (decoded | rightShift(decoded, 4)) & morton_mask_2_3_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + decoded = (decoded | rightShift(decoded, 8)) & morton_mask_2_4_v; + } + NBL_IF_CONSTEXPR(Bits > 16) + { + decoded = (decoded | rightShift(decoded, 16)) & morton_mask_2_5_v; + } + + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_3_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & morton_mask_3_1_v; + } + NBL_IF_CONSTEXPR(Bits > 3) + { + decoded = (decoded | rightShift(decoded, 6)) & morton_mask_3_2_v; + } + NBL_IF_CONSTEXPR(Bits > 6) + { + decoded = (decoded | rightShift(decoded, 12)) & morton_mask_3_3_v; + } + NBL_IF_CONSTEXPR(Bits > 12) + { + decoded = (decoded | rightShift(decoded, 24)) & morton_mask_3_4_v; + } + + return _static_cast(decoded); + } +}; +template +struct MortonDecoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_4_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 3)) & morton_mask_4_1_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + decoded = (decoded | rightShift(decoded, 6)) & morton_mask_4_2_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + decoded = (decoded | rightShift(decoded, 12)) & morton_mask_4_3_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + decoded = (decoded | rightShift(decoded, 24)) & morton_mask_4_4_v; + } + + return _static_cast(decoded); + } +}; + +/* template struct MortonDecoder<2, Bits, emulated_uint64_t> { @@ -55,12 +278,12 @@ struct MortonDecoder<2, Bits, emulated_uint64_t> { NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), // Groups bits by 1 on, 1 off - emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), // Groups bits by 2 on, 2 off - emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), // Groups bits by 4 on, 4 off - emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), // Groups bits by 8 on, 8 off - emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), // Groups bits by 16 on, 16 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) };// Groups bits by 32 on, 32 off + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), + emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), + emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), + emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), + emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) }; arithmetic_right_shift_operator rightShift; @@ -84,11 +307,11 @@ struct MortonDecoder<3, Bits, emulated_uint64_t> conditional_value<(Bits <= 6), uint16_t, uint16_t(2), conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), // Groups bits by 1 on, 2 off (also only considers 21 bits) - emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), // Groups bits by 3 on, 6 off - emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), // Groups bits by 6 on, 12 off - emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), // Groups bits by 12 on, 24 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), (also only considers 21 bits) + emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), + emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), + emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) }; (40 off if you're feeling pedantic) arithmetic_right_shift_operator rightShift; @@ -112,11 +335,11 @@ struct MortonDecoder<4, Bits, emulated_uint64_t> { NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), // Groups bits by 1 on, 3 off - emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), // Groups bits by 2 on, 6 off - emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), // Groups bits by 4 on, 12 off - emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), // Groups bits by 8 on, 24 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) };// Groups bits by 16 on, 48 off + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), + emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), + emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), + emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) }; arithmetic_right_shift_operator rightShift; @@ -207,14 +430,9 @@ struct MortonDecoder<4, Bits, encode_t> } }; -} //namespace impl +*/ -// Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ - impl::decode_mask_v< U , D > << U (1),\ - impl::decode_mask_v< U , D > << U (2),\ - impl::decode_mask_v< U , D > << U (3)\ - )) +} //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it @@ -227,10 +445,15 @@ struct code storage_t value; -}; -// Don't forget to delete this macro after usage -#undef NBL_HLSL_MORTON_MASKS + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION + + code() = default; + + #endif +}; } //namespace morton } //namespace hlsl From 799420e9dfa1f8bd8039fd724edea4ecf3133a87 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 4 Apr 2025 16:20:54 -0300 Subject: [PATCH 016/472] Fix conversion operators --- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 33 ++- include/nbl/builtin/hlsl/morton.hlsl | 279 ++++++------------ 2 files changed, 116 insertions(+), 196 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index 3794031c8e..ab08e1ff38 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -24,9 +24,6 @@ struct emulated_uint64_t emulated_uint64_t() = default; - // To immediately get compound operators and functional structs in CPP side - explicit emulated_uint64_t(const storage_t _data) : data(_data) {} - #endif /** @@ -52,6 +49,16 @@ struct emulated_uint64_t return create(storage_t(hi, lo)); } + /** + * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * + * @param [in] _data `uint64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) + { + return create(_static_cast(u >> 32), _static_cast(u)); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -115,9 +122,11 @@ struct left_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { + if (!bits) + return operand; const uint32_t _bits = uint32_t(bits); const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t higherBitsMask = ~uint32_t(0) << shift; + const uint32_t higherBitsMask = (~uint32_t(0)) << shift; // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; return emulated_uint64_t::create(retValData); @@ -132,6 +141,8 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { + if (!bits) + return operand; const uint32_t _bits = uint32_t(bits); const uint32_t shift = ComponentBitWidth - _bits; const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; @@ -173,10 +184,10 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +template<> +struct static_cast_helper { - using To = Unsigned; + using To = uint64_t; using From = emulated_uint64_t; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) @@ -199,15 +210,15 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +template<> +struct static_cast_helper { using To = emulated_uint64_t; - using From = Unsigned; + using From = uint64_t; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) { - return emulated_uint64_t::create(_static_cast(u >> 32), _static_cast(u)); + return emulated_uint64_t::create(u); } }; diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e2e1596587..07aa21b821 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -46,44 +46,41 @@ NBL_CONSTEXPR T decode_mask_v = decode_mask::value; #ifndef __HLSL_VERSION -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ -{\ - NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE));\ -}; +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) #else -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ {\ NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ };\ -NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE)); +NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE); #endif -#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template struct morton_mask_##DIM##_##MASK ;\ - NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE)\ - NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, 0x##HEX_HIGH_VALUE##HEX_LOW_VALUE)\ +#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK ;\ + NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE)\ + NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE)\ template\ NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; -NBL_MORTON_DECODE_MASK(2, 0, 55555555, 55555555) // Groups bits by 1 on, 1 off -NBL_MORTON_DECODE_MASK(2, 1, 33333333, 33333333) // Groups bits by 2 on, 2 off -NBL_MORTON_DECODE_MASK(2, 2, 0F0F0F0F, 0F0F0F0F) // Groups bits by 4 on, 4 off -NBL_MORTON_DECODE_MASK(2, 3, 00FF00FF, 00FF00FF) // Groups bits by 8 on, 8 off -NBL_MORTON_DECODE_MASK(2, 4, 0000FFFF, 0000FFFF) // Groups bits by 16 on, 16 off -NBL_MORTON_DECODE_MASK(2, 5, 00000000, FFFFFFFF) // Groups bits by 32 on, 32 off - -NBL_MORTON_DECODE_MASK(3, 0, 12492492, 49249249) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits -NBL_MORTON_DECODE_MASK(3, 1, 01C0E070, 381C0E07) // Groups bits by 3 on, 6 off -NBL_MORTON_DECODE_MASK(3, 2, 0FC003F0, 00FC003F) // Groups bits by 6 on, 12 off -NBL_MORTON_DECODE_MASK(3, 3, 0000FFF0, 00000FFF) // Groups bits by 12 on, 24 off -NBL_MORTON_DECODE_MASK(3, 4, 00000000, 00FFFFFF) // Groups bits by 24 on, 48 off - -NBL_MORTON_DECODE_MASK(4, 0, 11111111, 11111111) // Groups bits by 1 on, 3 off -NBL_MORTON_DECODE_MASK(4, 1, 03030303, 03030303) // Groups bits by 2 on, 6 off -NBL_MORTON_DECODE_MASK(4, 2, 000F000F, 000F000F) // Groups bits by 4 on, 12 off -NBL_MORTON_DECODE_MASK(4, 3, 000000FF, 000000FF) // Groups bits by 8 on, 24 off -NBL_MORTON_DECODE_MASK(4, 4, 00000000, 0000FFFF) // Groups bits by 16 on, 48 off +NBL_MORTON_DECODE_MASK(2, 0, uint64_t(0x5555555555555555)) // Groups bits by 1 on, 1 off +NBL_MORTON_DECODE_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off +NBL_MORTON_DECODE_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off +NBL_MORTON_DECODE_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off +NBL_MORTON_DECODE_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off +NBL_MORTON_DECODE_MASK(2, 5, uint64_t(0x00000000FFFFFFFF)) // Groups bits by 32 on, 32 off + +NBL_MORTON_DECODE_MASK(3, 0, uint64_t(0x1249249249249249)) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits +NBL_MORTON_DECODE_MASK(3, 1, uint64_t(0x01C0E070381C0E07)) // Groups bits by 3 on, 6 off +NBL_MORTON_DECODE_MASK(3, 2, uint64_t(0x0FC003F000FC003F)) // Groups bits by 6 on, 12 off +NBL_MORTON_DECODE_MASK(3, 3, uint64_t(0x0000FFF000000FFF)) // Groups bits by 12 on, 24 off +NBL_MORTON_DECODE_MASK(3, 4, uint64_t(0x0000000000FFFFFF)) // Groups bits by 24 on, 48 off + +NBL_MORTON_DECODE_MASK(4, 0, uint64_t(0x1111111111111111)) // Groups bits by 1 on, 3 off +NBL_MORTON_DECODE_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off +NBL_MORTON_DECODE_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off +NBL_MORTON_DECODE_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off +NBL_MORTON_DECODE_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off #undef NBL_MORTON_DECODE_MASK #undef NBL_MORTON_EMULATED_DECODE_MASK @@ -269,193 +266,105 @@ struct MortonDecoder<4, Bits, encode_t> } }; -/* -template -struct MortonDecoder<2, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); +} //namespace impl - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), - emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), - emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), - emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), - emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) }; +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - arithmetic_right_shift_operator rightShift; + + storage_t value; - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded.data.y); - } -}; + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- -template -struct MortonDecoder<3, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), - conditional_value<(Bits <= 6), uint16_t, uint16_t(2), - conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), (also only considers 21 bits) - emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), - emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), - emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) }; (40 off if you're feeling pedantic) + code() = default; - arithmetic_right_shift_operator rightShift; + #endif - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - // First iteration is special - decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & DecodeMasks[1]; + /** + * @brief Creates a Morton code from a set of integral cartesian coordinates + * + * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class + */ + template + NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + create(NBL_CONST_REF_ARG(vector) cartesian) + { + using U = make_unsigned_t; + left_shift_operator leftShift; + storage_t encodedCartesian = _static_cast(uint64_t(0)); [[unroll]] - for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + for (uint16_t i = 0; i < D; i++) { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 2]; + encodedCartesian = encodedCartesian | leftShift(impl::MortonEncoder::encode(_static_cast(cartesian[i])), i); } - return _static_cast(decoded.data.y); + this_t retVal; + retVal.value = encodedCartesian; + return retVal; } -}; -template -struct MortonDecoder<4, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), - emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), - emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), - emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) }; + // CPP can also have an actual constructor + #ifndef __HLSL_VERSION - arithmetic_right_shift_operator rightShift; + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded.data.y); - } -}; - -template -struct MortonDecoder<2, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + template + explicit code(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[6] = { _static_cast(0x5555555555555555), // Groups bits by 1 on, 1 off - _static_cast(0x3333333333333333), // Groups bits by 2 on, 2 off - _static_cast(0x0F0F0F0F0F0F0F0F), // Groups bits by 4 on, 4 off - _static_cast(0x00FF00FF00FF00FF), // Groups bits by 8 on, 8 off - _static_cast(0x0000FFFF0000FFFF), // Groups bits by 16 on, 16 off - _static_cast(0x00000000FFFFFFFF) };// Groups bits by 32 on, 32 off - - encode_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded); + *this = create(cartesian); } -}; -template -struct MortonDecoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + // This one is defined later since it requires `static_cast_helper` specialization + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + + template + explicit operator vector() const noexcept { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), - conditional_value<(Bits <= 6), uint16_t, uint16_t(2), - conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1249249249249249), // Groups bits by 1 on, 2 off (also only considers 21 bits) - _static_cast(0x01C0E070381C0E07), // Groups bits by 3 on, 6 off - _static_cast(0x0FC003F000FC003F), // Groups bits by 6 on, 12 off - _static_cast(0x0000FFF000000FFF), // Groups bits by 12 on, 24 off - _static_cast(0x0000000000FFFFFF) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) - - encode_t decoded = encodedValue & DecodeMasks[0]; - // First iteration is special - decoded = (decoded | (decoded >> 2) | (decoded >> 4)) & DecodeMasks[1]; - [[unroll]] - for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) - { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 2]; - } - return _static_cast(decoded); + return _static_cast, morton::code, Bits, D>>(*this); } + + #endif }; -template -struct MortonDecoder<4, Bits, encode_t> +} //namespace morton + +// Specialize the `static_cast_helper` +namespace impl { - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1111111111111111), // Groups bits by 1 on, 3 off - _static_cast(0x0303030303030303), // Groups bits by 2 on, 6 off - _static_cast(0x000F000F000F000F), // Groups bits by 4 on, 12 off - _static_cast(0x000000FF000000FF), // Groups bits by 8 on, 24 off - _static_cast(0x000000000000FFFF) };// Groups bits by 16 on, 48 off - - encode_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + using U = make_unsigned_t; + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + arithmetic_right_shift_operator rightShift; + vector cartesian; + for (uint16_t i = 0; i < D; i++) { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); } - return _static_cast(decoded); + return cartesian; } }; -*/ - -} //namespace impl +} // namespace impl -// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 -// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && D * Bits <= 64) -struct code -{ - using this_t = code; - NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; - using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - - - storage_t value; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - #ifndef __HLSL_VERSION - - code() = default; - - #endif -}; - -} //namespace morton } //namespace hlsl } //namespace nbl From 52323bc1f67e58b547c65be11ae9ac9d08e8e4ed Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 4 Apr 2025 23:45:39 -0300 Subject: [PATCH 017/472] Finish the rest of comparison ops and we're done! --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 2 + include/nbl/builtin/hlsl/functional.hlsl | 23 +- include/nbl/builtin/hlsl/morton.hlsl | 231 +++++++++++++++++- .../nbl/builtin/hlsl/portable/uint64_t.hlsl | 30 +++ .../nbl/builtin/hlsl/portable/vector_t.hlsl | 18 ++ src/nbl/builtin/CMakeLists.txt | 1 + 6 files changed, 294 insertions(+), 11 deletions(-) create mode 100644 include/nbl/builtin/hlsl/portable/uint64_t.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 0053008aa4..a106cec440 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include +#include #include #include #include @@ -329,6 +330,7 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 3cf24193a4..e5486e2727 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -195,7 +195,7 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; -template +template struct ternary_operator { using type_t = T; @@ -206,7 +206,7 @@ struct ternary_operator } }; -template +template struct left_shift_operator { using type_t = T; @@ -217,7 +217,7 @@ struct left_shift_operator } }; -template +template struct arithmetic_right_shift_operator { using type_t = T; @@ -228,9 +228,20 @@ struct arithmetic_right_shift_operator } }; -// Declare template, but left unimplemented by default -template -struct logical_right_shift_operator; +template +struct logical_right_shift_operator +{ + using type_t = T; + using unsigned_type_t = make_unsigned_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + arithmetic_right_shift_operator arithmeticRightShift; + return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); + } +}; + + } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 07aa21b821..499deb1db4 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -266,6 +266,47 @@ struct MortonDecoder<4, Bits, encode_t> } }; +// ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- +// Here because no partial specialization of methods + +template +struct Equals; + +template +struct Equals +{ + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + retVal[i] = (_value & rhs[i]) == rhs[i]; + } + return retVal; + } +}; + +template +struct Equals +{ + template + NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > + operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + { + using U = make_unsigned_t; + vector interleaved; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); + } + Equals equals; + return equals(_value, interleaved); + } +}; + } //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 @@ -274,10 +315,10 @@ template; + using this_signed_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - storage_t value; // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- @@ -325,26 +366,205 @@ struct code *this = create(cartesian); } - // This one is defined later since it requires `static_cast_helper` specialization - /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template - explicit operator vector() const noexcept + constexpr inline explicit operator vector() const noexcept { return _static_cast, morton::code, Bits, D>>(*this); } #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + left_shift_operator leftShift; + // allOnes encodes a cartesian coordinate with all values set to 1 + this_t allOnes; + allOnes.value = leftShift(_static_cast(1), D) - _static_cast(1); + // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 + this_signed_t retVal; + retVal.value = (operator~() + allOnes).value; + return retVal; + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + this_t retVal; + retVal.value = _static_cast(uint64_t(0)); + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + retVal.value |= ((value | (~leftShift(Mask, i))) + (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); + } + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + this_t retVal; + retVal.value = _static_cast(uint64_t(0)); + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + retVal.value |= ((value & leftShift(Mask, i)) - (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); + } + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator==(NBL_CONST_REF_ARG(vector) rhs) + { + impl::Equals equals; + return equals(value, rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value != rhs.value; + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator==(rhs); + } }; } //namespace morton +template +struct left_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator valueLeftShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueLeftShift(operand.value, bits * D); + return retVal; + } +}; + +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + arithmetic_right_shift_operator valueArithmeticRightShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueArithmeticRightShift(operand.value, bits * D); + return retVal; + } +}; + +// This one's uglier - have to unpack to get the expected behaviour +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + vector cartesian = _static_cast >(operand); + cartesian >> scalar_t(bits); + return type_t::create(cartesian); + } +}; + +#ifndef __HLSL_VERSION + +template&& D* Bits <= 64) +constexpr inline morton::code morton::code::operator<<(uint16_t bits) const +{ + left_shift_operator> leftShift; + return leftShift(*this, bits); +} + +template&& D* Bits <= 64) +constexpr inline morton::code morton::code::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator> rightShift; + return rightShift(*this, bits); +} + +#endif + // Specialize the `static_cast_helper` namespace impl { + // I must be of same signedness as the morton code, and be wide enough to hold each component template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > @@ -355,6 +575,7 @@ struct static_cast_helper, morton::code, Bits, D, _u using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; arithmetic_right_shift_operator rightShift; vector cartesian; + [[unroll]] for (uint16_t i = 0; i < D; i++) { cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); diff --git a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl new file mode 100644 index 0000000000..ac081234ac --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl @@ -0,0 +1,30 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_UINT_64 to force using emulated uint64 + +namespace nbl +{ +namespace hlsl +{ +template +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_UINT_64 +using portable_uint64_t = emulated_uint64_t; +#else +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +#endif + +#else +using portable_uint64_t = uint64_t; +#endif + +//static_assert(sizeof(portable_uint64_t) == sizeof(uint64_t)); + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index ace199e20b..dcaea97739 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -36,19 +36,37 @@ template using portable_vector_t4 = portable_vector_t; #ifdef __HLSL_VERSION +// Float template using portable_float64_t2 = portable_vector_t2 >; template using portable_float64_t3 = portable_vector_t3 >; template using portable_float64_t4 = portable_vector_t4 >; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2 >; +template +using portable_uint64_t3 = portable_vector_t3 >; +template +using portable_uint64_t4 = portable_vector_t4 >; #else +// Float template using portable_float64_t2 = portable_vector_t2; template using portable_float64_t3 = portable_vector_t3; template using portable_float64_t4 = portable_vector_t4; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2; +template +using portable_uint64_t3 = portable_vector_t3; +template +using portable_uint64_t4 = portable_vector_t4; #endif } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index a11a26d69a..d7005a1ed6 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -219,6 +219,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/uint64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 From b6b70030434018a9e70ea4c52c86d48c135cc94e Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 7 Apr 2025 19:41:08 -0300 Subject: [PATCH 018/472] Final Mortons --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 488 ++++++++++++++++++ .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 232 --------- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 3 +- include/nbl/builtin/hlsl/morton.hlsl | 107 +++- .../nbl/builtin/hlsl/portable/int64_t.hlsl | 31 ++ .../nbl/builtin/hlsl/portable/uint64_t.hlsl | 30 -- .../nbl/builtin/hlsl/portable/vector_t.hlsl | 17 + src/nbl/builtin/CMakeLists.txt | 4 +- 8 files changed, 641 insertions(+), 271 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/int64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/emulated/uint64_t.hlsl create mode 100644 include/nbl/builtin/hlsl/portable/int64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/portable/uint64_t.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl new file mode 100644 index 0000000000..f3269cc6ba --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -0,0 +1,488 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +struct emulated_uint64_t +{ + using storage_t = vector; + using this_t = emulated_uint64_t; + + storage_t data; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_uint64_t() = default; + + #endif + + /** + * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + this_t retVal; + retVal.data = _data; + return retVal; + } + + /** + * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `uint64` being emulated + * @param [in] lo Lowest 32 bits of the `uint64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return create(storage_t(hi, lo)); + } + + /** + * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * + * @param [in] _data `uint64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) + { + return create(_static_cast(u >> 32), _static_cast(u)); + } + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data & rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data | rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data ^ rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(~data); + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); + const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const this_t retVal = create(addResult); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); + const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const this_t retVal = create(subResult); + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return data.x == rhs.data.x && data.y == rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return data.x != rhs.data.x || data.y != rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return data.x < rhs.data.x; + else + return data.y < rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return data.x > rhs.data.x; + else + return data.y > rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator>(rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator<(rhs); + } +}; + +struct emulated_int64_t : emulated_uint64_t +{ + using base_t = emulated_uint64_t; + using base_t::storage_t; + using this_t = emulated_int64_t; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_int64_t() = default; + + #endif + + /** + * @brief Creates an `emulated_int64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `int64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + return _static_cast(base_t::create(_data)); + } + + /** + * @brief Creates an `emulated_int64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `int64` being emulated + * @param [in] lo Lowest 32 bits of the `int64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return _static_cast(base_t::create(hi, lo)); + } + + /** + * @brief Creates an `emulated_int64_t` from a `int64_t`. Useful for compile-time encoding. + * + * @param [in] _data `int64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(int64_t) i) + { + return _static_cast(base_t::create(_static_cast(i))); + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + // Only this one needs to be redefined since it's arithmetic + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + // Same as unsigned but the topmost bits are compared as signed + NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return _static_cast(data.x) < _static_cast(rhs.data.x); + else + return data.y < rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return _static_cast(data.x) > _static_cast(rhs.data.x); + else + return data.y > rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator>(rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator<(rhs); + } +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_int64_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator leftShift; + return _static_cast(leftShift(_static_cast(operand), bits)); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + // Also the right shift *only* in the top bits happens as a signed arithmetic right shift + const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; + return emulated_int64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + +namespace impl +{ + +template<> +struct static_cast_helper +{ + using To = emulated_uint64_t; + using From = emulated_int64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + To retVal; + retVal.data = i.data; + return retVal; + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_int64_t; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + To retVal; + retVal.data = u.data; + return retVal; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return _static_cast(u.data.y); + } +}; + +template<> +struct static_cast_helper +{ + using To = uint64_t; + using From = emulated_uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + const To highBits = _static_cast(u.data.x) << To(32); + return highBits | _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_uint64_t; + using From = uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(u); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +{ + using To = Signed; + using From = emulated_int64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return _static_cast(i.data.y); + } +}; + +template<> +struct static_cast_helper +{ + using To = int64_t; + using From = emulated_int64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + const To highBits = _static_cast(i.data.x) << To(32); + return highBits | _static_cast(i.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +{ + using To = emulated_int64_t; + using From = Signed; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return emulated_int64_t::create(uint32_t(0), _static_cast(i)); + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_int64_t; + using From = int64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return emulated_int64_t::create(i); + } +}; + +} //namespace impl + +} //namespace nbl +} //namespace hlsl + +#ifndef __HLSL_VERSION +#define NBL_ADD_STD std:: +#else +#define NBL_ADD_STD nbl::hlsl:: +#endif + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +#undef NBL_ADD_STD + + + +#endif diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl deleted file mode 100644 index ab08e1ff38..0000000000 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ /dev/null @@ -1,232 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/functional.hlsl" -#include "nbl/builtin/hlsl/concepts/core.hlsl" - -namespace nbl -{ -namespace hlsl -{ - -struct emulated_uint64_t -{ - using storage_t = vector; - using this_t = emulated_uint64_t; - - storage_t data; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - - #ifndef __HLSL_VERSION - - emulated_uint64_t() = default; - - #endif - - /** - * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - this_t retVal; - retVal.data = _data; - return retVal; - } - - /** - * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern - * - * @param [in] hi Highest 32 bits of the `uint64` being emulated - * @param [in] lo Lowest 32 bits of the `uint64` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) - { - return create(storage_t(hi, lo)); - } - - /** - * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. - * - * @param [in] _data `uint64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) - { - return create(_static_cast(u >> 32), _static_cast(u)); - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data & rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data | rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data ^ rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(~data); - return retVal; - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - constexpr inline this_t operator<<(uint16_t bits) const; - - constexpr inline this_t operator>>(uint16_t bits) const; - - #endif - - // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); - const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; - const this_t retVal = create(addResult); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); - const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; - const this_t retVal = create(subResult); - return retVal; - } - -}; - -template<> -struct left_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t higherBitsMask = (~uint32_t(0)) << shift; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; - return emulated_uint64_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> _bits, ((operand.data.x & lowerBitsMask) << shift) | (operand.data.y >> _bits) }; - return emulated_uint64_t::create(retValData); - } -}; - -#ifndef __HLSL_VERSION - -constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const -{ - left_shift_operator leftShift; - return leftShift(*this, bits); -} - -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -#endif - -namespace impl -{ - -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > -{ - using To = Unsigned; - using From = emulated_uint64_t; - - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return _static_cast(u.data.y); - } -}; - -template<> -struct static_cast_helper -{ - using To = uint64_t; - using From = emulated_uint64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - const To highBits = _static_cast(u.data.x) << To(32); - return highBits | _static_cast(u.data.y); - } -}; - -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > -{ - using To = emulated_uint64_t; - using From = Unsigned; - - // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); - } -}; - -template<> -struct static_cast_helper -{ - using To = emulated_uint64_t; - using From = uint64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return emulated_uint64_t::create(u); - } -}; - -} //namespace impl - -} //namespace nbl -} //namespace hlsl - - - -#endif diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index a106cec440..65a97bbe68 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,7 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include -#include +#include #include #include #include @@ -331,6 +331,7 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 499deb1db4..9c834424a8 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -5,7 +5,7 @@ #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" -#include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" +#include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" namespace nbl @@ -275,14 +275,15 @@ struct Equals; template struct Equals { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; vector retVal; [[unroll]] for (uint16_t i = 0; i < D; i++) { - retVal[i] = (_value & rhs[i]) == rhs[i]; + retVal[i] = (value & leftShift(Mask, i)) == leftShift(rhs[i], i); } return retVal; } @@ -293,7 +294,7 @@ struct Equals { template NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using U = make_unsigned_t; vector interleaved; @@ -303,10 +304,77 @@ struct Equals interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); } Equals equals; - return equals(_value, interleaved); + return equals(value, interleaved); + } +}; + +template +struct BaseComparison; + +// Aux method for extracting highest bit, used by the comparison below +template +NBL_CONSTEXPR_INLINE_FUNC storage_t extractHighestBit(storage_t value, uint16_t coord) +{ + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const uint16_t coordHighestBitIdx = Bits / D - ((coord < Bits % D) ? uint16_t(0) : uint16_t(1)); + // This is the index of that bit as an index in the encoded value + const uint16_t shift = coordHighestBitIdx * D + coord; + left_shift_operator leftShift; + return value & leftShift(_static_cast(uint16_t(1)), shift); +} + +template +struct BaseComparison +{ + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + vector retVal; + ComparisonOp comparison; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + storage_t thisCoord = value & leftShift(Mask, i); + storage_t rhsCoord = leftShift(rhs[i], i); + // If coordinate is negative, we add 1s in every bit not corresponding to coord + if (extractHighestBit(thisCoord) != _static_cast(uint64_t(0))) + thisCoord = thisCoord | ~leftShift(Mask, i); + if (extractHighestBit(rhsCoord) != _static_cast(uint64_t(0))) + rhsCoord = rhsCoord | ~leftShift(Mask, i); + retVal[i] = comparison(thisCoord, rhsCoord); + } + return retVal; + } +}; + +template +struct BaseComparison +{ + template + NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > + operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + using U = make_unsigned_t; + vector interleaved; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); + } + BaseComparison baseComparison; + return baseComparison(value, interleaved); } }; +template +struct LessThan : BaseComparison > {}; + +template +struct LessEquals : BaseComparison > {}; + + } //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 @@ -490,8 +558,35 @@ struct code template enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) { - return !operator==(rhs); + return !operator== (rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<(NBL_CONST_REF_ARG(vector) rhs) + { + impl::LessThan lessThan; + return lessThan(value, rhs); } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<=(NBL_CONST_REF_ARG(vector) rhs) + { + impl::LessEquals lessEquals; + return lessEquals(value, rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator<= (rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>=(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator< (rhs); + } + }; } //namespace morton diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl new file mode 100644 index 0000000000..6929e160fa --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -0,0 +1,31 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_INT_64 to force using emulated int64 types + +namespace nbl +{ +namespace hlsl +{ +template +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_INT_64 +using portable_uint64_t = emulated_uint64_t; +using portable_int64_t = emulated_int64_t; +#else +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; +#endif + +#else +using portable_uint64_t = uint64_t; +using portable_int64_t = int64_t; +#endif + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl deleted file mode 100644 index ac081234ac..0000000000 --- a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ -#define _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ - -#include -#include - -// define NBL_FORCE_EMULATED_UINT_64 to force using emulated uint64 - -namespace nbl -{ -namespace hlsl -{ -template -#ifdef __HLSL_VERSION -#ifdef NBL_FORCE_EMULATED_UINT_64 -using portable_uint64_t = emulated_uint64_t; -#else -using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; -#endif - -#else -using portable_uint64_t = uint64_t; -#endif - -//static_assert(sizeof(portable_uint64_t) == sizeof(uint64_t)); - -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index dcaea97739..16d5b40f81 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -3,6 +3,7 @@ #include #include +#include namespace nbl { @@ -51,6 +52,14 @@ template using portable_uint64_t3 = portable_vector_t3 >; template using portable_uint64_t4 = portable_vector_t4 >; + +//Int +template +using portable_int64_t2 = portable_vector_t2 >; +template +using portable_int64_t3 = portable_vector_t3 >; +template +using portable_int64_t4 = portable_vector_t4 >; #else // Float template @@ -67,6 +76,14 @@ template using portable_uint64_t3 = portable_vector_t3; template using portable_uint64_t4 = portable_vector_t4; + +// Int +template +using portable_int64_t2 = portable_vector_t2; +template +using portable_int64_t3 = portable_vector_t3; +template +using portable_int64_t4 = portable_vector_t4; #endif } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index d7005a1ed6..f03d8ae22c 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -214,12 +214,12 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/uint64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/uint64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 From 60ff99a4dadfdecc5abf59e4fb2d95e62d6ed929 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 7 Apr 2025 23:20:42 -0300 Subject: [PATCH 019/472] Clean up the emulated int code, fix some constant creation in the morton code --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 317 ++++++++---------- include/nbl/builtin/hlsl/morton.hlsl | 15 +- 2 files changed, 161 insertions(+), 171 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index f3269cc6ba..cad10242f2 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -5,31 +5,35 @@ #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" +// Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs +// and whether the topmost bits of the divisor are equal to 0 +// - Francisco + namespace nbl { namespace hlsl { -struct emulated_uint64_t +template +struct emulated_int64_base { - using storage_t = vector; - using this_t = emulated_uint64_t; + using storage_t = vector; + using this_t = emulated_int64_base; - storage_t data; + storage_t data; // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - #ifndef __HLSL_VERSION - emulated_uint64_t() = default; + emulated_int64_base() = default; #endif /** - * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { @@ -39,10 +43,10 @@ struct emulated_uint64_t } /** - * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern * - * @param [in] hi Highest 32 bits of the `uint64` being emulated - * @param [in] lo Lowest 32 bits of the `uint64` being emulated + * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated + * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) { @@ -50,9 +54,9 @@ struct emulated_uint64_t } /** - * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * @brief Creates an `emulated_int64_base` from a `uint64_t` with its bitpattern. Useful for compile-time encoding. * - * @param [in] _data `uint64_t` to be unpacked into high and low bits + * @param [in] u `uint64_t` to be unpacked into high and low bits */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) { @@ -126,7 +130,15 @@ struct emulated_uint64_t NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { if (data.x != rhs.data.x) - return data.x < rhs.data.x; + { + // If signed, compare topmost bits as signed + NBL_IF_CONSTEXPR(Signed) + return _static_cast(data.x) < _static_cast(rhs.data.x); + // If unsigned, compare them as-is + else + return data.x < rhs.data.x; + } + // Lower bits are positive in both signed and unsigned else return data.y < rhs.data.y; } @@ -134,7 +146,14 @@ struct emulated_uint64_t NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { if (data.x != rhs.data.x) - return data.x > rhs.data.x; + { + // If signed, compare topmost bits as signed + NBL_IF_CONSTEXPR(Signed) + return _static_cast(data.x) > _static_cast(rhs.data.x); + // If unsigned, compare them as-is + else + return data.x > rhs.data.x; + } else return data.y > rhs.data.y; } @@ -150,94 +169,15 @@ struct emulated_uint64_t } }; -struct emulated_int64_t : emulated_uint64_t -{ - using base_t = emulated_uint64_t; - using base_t::storage_t; - using this_t = emulated_int64_t; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - +using emulated_uint64_t = emulated_int64_base; +using emulated_int64_t = emulated_int64_base; - #ifndef __HLSL_VERSION - - emulated_int64_t() = default; - - #endif +// ---------------------- Functional operatos ------------------------ - /** - * @brief Creates an `emulated_int64_t` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `int64_t` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - return _static_cast(base_t::create(_data)); - } - - /** - * @brief Creates an `emulated_int64_t` from two `uint32_t`s representing its bitpattern - * - * @param [in] hi Highest 32 bits of the `int64` being emulated - * @param [in] lo Lowest 32 bits of the `int64` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) - { - return _static_cast(base_t::create(hi, lo)); - } - - /** - * @brief Creates an `emulated_int64_t` from a `int64_t`. Useful for compile-time encoding. - * - * @param [in] _data `int64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(int64_t) i) - { - return _static_cast(base_t::create(_static_cast(i))); - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - // Only this one needs to be redefined since it's arithmetic - constexpr inline this_t operator>>(uint16_t bits) const; - - #endif - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - - // Same as unsigned but the topmost bits are compared as signed - NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - if (data.x != rhs.data.x) - return _static_cast(data.x) < _static_cast(rhs.data.x); - else - return data.y < rhs.data.y; - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - if (data.x != rhs.data.x) - return _static_cast(data.x) > _static_cast(rhs.data.x); - else - return data.y > rhs.data.y; - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator>(rhs); - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator<(rhs); - } -}; - -template<> -struct left_shift_operator +template +struct left_shift_operator > { - using type_t = emulated_uint64_t; + using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) @@ -248,7 +188,7 @@ struct left_shift_operator const uint32_t shift = ComponentBitWidth - _bits; // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; - return emulated_uint64_t::create(retValData); + return type_t::create(retValData); } }; @@ -270,18 +210,6 @@ struct arithmetic_right_shift_operator } }; -template<> -struct left_shift_operator -{ - using type_t = emulated_int64_t; - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - left_shift_operator leftShift; - return _static_cast(leftShift(_static_cast(operand), bits)); - } -}; - template<> struct arithmetic_right_shift_operator { @@ -303,7 +231,8 @@ struct arithmetic_right_shift_operator #ifndef __HLSL_VERSION -constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +template +constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint16_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); @@ -356,113 +285,163 @@ struct static_cast_helper } }; -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { - using To = Unsigned; - using From = emulated_uint64_t; + using To = I; + using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - return _static_cast(u.data.y); + return _static_cast(val.data.y); } }; -template<> -struct static_cast_helper +template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) +struct static_cast_helper NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > { - using To = uint64_t; - using From = emulated_uint64_t; + using To = I; + using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - const To highBits = _static_cast(u.data.x) << To(32); - return highBits | _static_cast(u.data.y); + const To highBits = _static_cast(val.data.x) << To(32); + return highBits | _static_cast(val.data.y); } }; -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) > { - using To = emulated_uint64_t; - using From = Unsigned; + using To = emulated_int64_base; + using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + return To::create(uint32_t(0), _static_cast(i)); } }; -template<> -struct static_cast_helper +template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > { - using To = emulated_uint64_t; - using From = uint64_t; + using To = emulated_int64_base; + using From = I; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return emulated_uint64_t::create(u); + return To::create(_static_cast(i)); } }; -template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +} //namespace impl + +// ---------------------- STD arithmetic operators ------------------------ +// Specializations of the structs found in functional.hlsl +// These all have to be specialized because of the identity that can't be initialized inside the struct definition + +template +struct plus > { - using To = Signed; - using From = emulated_int64_t; + using type_t = emulated_int64_base; - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return _static_cast(i.data.y); + return lhs + rhs; } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template<> -struct static_cast_helper +template +struct minus > { - using To = int64_t; - using From = emulated_int64_t; + using type_t = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - const To highBits = _static_cast(i.data.x) << To(32); - return highBits | _static_cast(i.data.y); + return lhs - rhs; } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > -{ - using To = emulated_int64_t; - using From = Signed; +#ifdef __HLSL_VERSION +template<> +NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +template<> +NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); +#endif - // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) +// --------------------------------- Compound assignment operators ------------------------------------------ +// Specializations of the structs found in functional.hlsl + +template +struct plus_assign > +{ + using type_t = emulated_int64_base; + using base_t = plus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return emulated_int64_t::create(uint32_t(0), _static_cast(i)); + lhs = baseOp(lhs, rhs); } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template<> -struct static_cast_helper +template +struct minus_assign > { - using To = emulated_int64_t; - using From = int64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + using type_t = emulated_int64_base; + using base_t = minus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return emulated_int64_t::create(i); + lhs = baseOp(lhs, rhs); } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -} //namespace impl +#ifdef __HLSL_VERSION +template<> +NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +template<> +NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; +#endif } //namespace nbl } //namespace hlsl +// Declare them as signed/unsigned versions of each other + #ifndef __HLSL_VERSION #define NBL_ADD_STD std:: #else diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9c834424a8..e2ae3d8b0a 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -39,17 +39,28 @@ NBL_CONSTEXPR T decode_mask_v = decode_mask::value; // --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- // Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y +#ifndef __HLSL_VERSION + #define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ {\ NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ }; -#ifndef __HLSL_VERSION - #define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) #else +#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +{\ + NBL_CONSTEXPR_STATIC_INLINE T value;\ +};\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint16_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint32_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ + #define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ {\ NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ From 55601628733ca20218f0c13d481e0c1df29bed1a Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 8 Apr 2025 19:44:15 -0300 Subject: [PATCH 020/472] Addressing latest PR review. Generic overloads for of different functional structs blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7325 --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 218 +++++++++--------- include/nbl/builtin/hlsl/functional.hlsl | 102 ++++++++ include/nbl/builtin/hlsl/morton.hlsl | 2 + .../nbl/builtin/hlsl/portable/int64_t.hlsl | 7 +- 4 files changed, 218 insertions(+), 111 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index cad10242f2..45cb82ed78 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -4,6 +4,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" // Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs // and whether the topmost bits of the divisor are equal to 0 @@ -35,7 +36,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -48,19 +49,9 @@ struct emulated_int64_base * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { - return create(storage_t(hi, lo)); - } - - /** - * @brief Creates an `emulated_int64_base` from a `uint64_t` with its bitpattern. Useful for compile-time encoding. - * - * @param [in] u `uint64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) - { - return create(_static_cast(u >> 32), _static_cast(u)); + return create(storage_t(lo, hi)); } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- @@ -92,9 +83,9 @@ struct emulated_int64_base // Only valid in CPP #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint16_t bits) const; + constexpr inline this_t operator<<(this_t bits) const; - constexpr inline this_t operator>>(uint16_t bits) const; + constexpr inline this_t operator>>(this_t bits) const; #endif @@ -102,16 +93,16 @@ struct emulated_int64_base NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); - const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const spirv::AddCarryOutput lowerAddResult = addCarry(data.x, rhs.data.x); + const storage_t addResult = { lowerAddResult.result, data.y + rhs.data.y + lowerAddResult.carry }; const this_t retVal = create(addResult); return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); - const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.x, rhs.data.x); + const storage_t subResult = { lowerSubResult.result, data.y - rhs.data.y - lowerSubResult.borrow }; const this_t retVal = create(subResult); return retVal; } @@ -172,86 +163,6 @@ struct emulated_int64_base using emulated_uint64_t = emulated_int64_base; using emulated_int64_t = emulated_int64_base; -// ---------------------- Functional operatos ------------------------ - -template -struct left_shift_operator > -{ - using type_t = emulated_int64_base; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; - return type_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; - return emulated_uint64_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_int64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - // Also the right shift *only* in the top bits happens as a signed arithmetic right shift - const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; - return emulated_int64_t::create(retValData); - } -}; - -#ifndef __HLSL_VERSION - -template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint16_t bits) const -{ - left_shift_operator leftShift; - return leftShift(*this, bits); -} - -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -#endif - namespace impl { @@ -285,7 +196,7 @@ struct static_cast_helper } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { using To = I; @@ -294,25 +205,24 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con // Return only the lowest bits NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - return _static_cast(val.data.y); + return _static_cast(val.data.x); } }; -template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) -struct static_cast_helper NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > { using To = I; using From = emulated_int64_base; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - const To highBits = _static_cast(val.data.x) << To(32); - return highBits | _static_cast(val.data.y); + return bit_cast(val.data); } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { using To = emulated_int64_base; using From = I; @@ -324,20 +234,108 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con } }; -template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v ) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > { using To = emulated_int64_base; using From = I; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return To::create(_static_cast(i)); + To retVal; + retVal.data = bit_cast(i); + return retVal; } }; } //namespace impl +// ---------------------- Functional operators ------------------------ + +template +struct left_shift_operator > +{ + using type_t = emulated_int64_base; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << bits) | (operand.data.y >> shift), operand.data.y << bits }; + return type_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + // Also the right shift *only* in the top bits happens as a signed arithmetic right shift + const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; + return emulated_int64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +template +constexpr inline emulated_int64_base emulated_int64_base::operator<<(this_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(this_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(this_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + // ---------------------- STD arithmetic operators ------------------------ // Specializations of the structs found in functional.hlsl // These all have to be specialized because of the identity that can't be initialized inside the struct definition diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index e5486e2727..cc95633f44 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/limits.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" namespace nbl @@ -217,6 +218,56 @@ struct left_shift_operator } }; +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct left_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand << bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(! (concepts::IntVector) && concepts::Vectorial) +struct left_shift_operator) && concepts::Vectorial) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, leftShift(operand.getComponent(i), bits.getComponent(i))); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, leftShift(operand.getComponent(i), bits)); + } + return shifted; + } +}; + template struct arithmetic_right_shift_operator { @@ -228,6 +279,57 @@ struct arithmetic_right_shift_operator } }; +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand >> bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, rightShift(operand.getComponent(i), bits.getComponent(i))); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, rightShift(operand.getComponent(i), bits)); + } + return shifted; + } +}; + +// Left unimplemented for vectorial types by default template struct logical_right_shift_operator { diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e2ae3d8b0a..ea583fddfa 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,6 +8,8 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" +// TODO: mega macro to get functional plus, minus, plus_assign, minus_assign + namespace nbl { namespace hlsl diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl index 6929e160fa..2dffa40a2d 100644 --- a/include/nbl/builtin/hlsl/portable/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -10,18 +10,23 @@ namespace nbl { namespace hlsl { -template #ifdef __HLSL_VERSION #ifdef NBL_FORCE_EMULATED_INT_64 +template using portable_uint64_t = emulated_uint64_t; +template using portable_int64_t = emulated_int64_t; #else +template using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +template using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; #endif #else +template using portable_uint64_t = uint64_t; +template using portable_int64_t = int64_t; #endif From e50c56b52e873da965804153eba64b3cb133c4a3 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 9 Apr 2025 00:23:55 -0300 Subject: [PATCH 021/472] Bunch of emulated int64 fixes regarding creation, comparison operators and left/right shifts --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 95 +++++++++---------- 1 file changed, 44 insertions(+), 51 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 45cb82ed78..98fcf2835b 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -34,7 +34,7 @@ struct emulated_int64_base /** * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { @@ -54,6 +54,18 @@ struct emulated_int64_base return create(storage_t(lo, hi)); } + // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + { + return data.x; + } + + NBL_CONSTEXPR_INLINE_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC + { + return data.y; + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -93,60 +105,42 @@ struct emulated_int64_base NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.x, rhs.data.x); - const storage_t addResult = { lowerAddResult.result, data.y + rhs.data.y + lowerAddResult.carry }; - const this_t retVal = create(addResult); + const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); + const this_t retVal = create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.x, rhs.data.x); - const storage_t subResult = { lowerSubResult.result, data.y - rhs.data.y - lowerSubResult.borrow }; - const this_t retVal = create(subResult); + const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); + const this_t retVal = create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); return retVal; } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return data.x == rhs.data.x && data.y == rhs.data.y; + return all(data == rhs.data); } NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return data.x != rhs.data.x || data.y != rhs.data.y; + return any(data != rhs.data); } NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - if (data.x != rhs.data.x) - { - // If signed, compare topmost bits as signed - NBL_IF_CONSTEXPR(Signed) - return _static_cast(data.x) < _static_cast(rhs.data.x); - // If unsigned, compare them as-is - else - return data.x < rhs.data.x; - } - // Lower bits are positive in both signed and unsigned - else - return data.y < rhs.data.y; + // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less + // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() < rhs.__getLSB()))); } NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - if (data.x != rhs.data.x) - { - // If signed, compare topmost bits as signed - NBL_IF_CONSTEXPR(Signed) - return _static_cast(data.x) > _static_cast(rhs.data.x); - // If unsigned, compare them as-is - else - return data.x > rhs.data.x; - } - else - return data.y > rhs.data.y; + // Same reasoning as above + const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() > rhs.__getLSB()))); } NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -260,15 +254,15 @@ struct left_shift_operator > // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0` in LSB and the result of `uint32_t(1) << 32` in your architecture in MSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << bits) | (operand.data.y >> shift), operand.data.y << bits }; - return type_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + return bits ? shifted : operand; } }; @@ -280,15 +274,15 @@ struct arithmetic_right_shift_operator // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; - return emulated_uint64_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + return bits ? shifted : operand; } }; @@ -300,16 +294,15 @@ struct arithmetic_right_shift_operator // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0xFFFFFFFF` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - // Also the right shift *only* in the top bits happens as a signed arithmetic right shift - const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; - return emulated_int64_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(uint32_t(int32_t(operand.__getMSB()) >> bits), ~uint32_t(0)) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); + return bits ? shifted : operand; } }; From b1de9c37b2e2572ea13163f241e9fab0a044bb8e Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 9 Apr 2025 16:24:21 -0300 Subject: [PATCH 022/472] Fix automatic specialize macro in cpp compat intrinsics, add intrinsic and generic ternary operator that should work for all compatible types, address PR review comments --- include/nbl/builtin/hlsl/complex.hlsl | 16 -------- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 38 +++++++++++++++++-- .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 6 +++ .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 36 +++++++++++------- include/nbl/builtin/hlsl/functional.hlsl | 21 +++++++++- .../builtin/hlsl/spirv_intrinsics/core.hlsl | 6 +++ 6 files changed, 89 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl index 6728a9bf3d..a3a9f387d0 100644 --- a/include/nbl/builtin/hlsl/complex.hlsl +++ b/include/nbl/builtin/hlsl/complex.hlsl @@ -427,22 +427,6 @@ complex_t rotateRight(NBL_CONST_REF_ARG(complex_t) value) return retVal; } -template -struct ternary_operator< complex_t > -{ - using type_t = complex_t; - - complex_t operator()(bool condition, NBL_CONST_REF_ARG(complex_t) lhs, NBL_CONST_REF_ARG(complex_t) rhs) - { - const vector lhsVector = vector(lhs.real(), lhs.imag()); - const vector rhsVector = vector(rhs.real(), rhs.imag()); - const vector resultVector = condition ? lhsVector : rhsVector; - const complex_t result = { resultVector.x, resultVector.y }; - return result; - } -}; - - } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 92fc9e929b..e1ba823b9b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -75,6 +75,8 @@ template struct all_helper; template struct any_helper; +template +struct select_helper; template struct bitReverseAs_helper; template @@ -121,8 +123,8 @@ struct subBorrow_helper; // the template<> needs to be written ourselves // return type is __VA_ARGS__ to protect against `,` in templated return types #define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, SPIRV_FUNCTION_NAME, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\ -NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ -struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ +NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ +struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ {\ using return_t = __VA_ARGS__;\ static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\ @@ -143,8 +145,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length, template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSqrt, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (B)(T), (B)(T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T) @@ -633,6 +636,35 @@ struct subBorrow_helper } }; +template +NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) +struct select_helper) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + return condition ? object1 : object2; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) +struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + using traits = hlsl::vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 1f1957dbbd..284ba564d7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -150,6 +150,12 @@ inline bool any(Vector vec) return cpp_compat_intrinsics_impl::any_helper::__call(vec); } +template +NBL_CONSTEXPR_INLINE_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +{ + return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); +} + /** * @brief Returns x - floor(x). * diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 98fcf2835b..53881423e9 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -132,15 +132,19 @@ struct emulated_int64_base { // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSBEqual = __getMSB() == rhs.__getMSB(); const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); - return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() < rhs.__getLSB()))); + const bool LSB = __getLSB() < rhs.__getLSB(); + return MSBEqual ? LSB : MSB; } NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Same reasoning as above + const bool MSBEqual = __getMSB() == rhs.__getMSB(); const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); - return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() > rhs.__getLSB()))); + const bool LSB = __getLSB() > rhs.__getLSB(); + return MSBEqual ? LSB : MSB; } NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -259,10 +263,12 @@ struct left_shift_operator > NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(0, operand.__getLSB() << shift) - : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - return bits ? shifted : operand; + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; @@ -279,10 +285,12 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(operand.__getMSB() >> shift, 0) - : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); - return bits ? shifted : operand; + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; @@ -299,10 +307,12 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(uint32_t(int32_t(operand.__getMSB()) >> bits), ~uint32_t(0)) + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); - return bits ? shifted : operand; + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index cc95633f44..51ee4f4829 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -201,9 +201,26 @@ struct ternary_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) { - return condition ? lhs : rhs; + return select(condition, lhs, rhs); + } +}; + +template +struct ternary_operator > +{ + using type_t = T; + using traits = hlsl::vector_traits; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + { + return select(condition, lhs, rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(vector) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + { + return select, T>(condition, lhs, rhs); } }; diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index d8d90de726..8add7a9ed3 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -12,6 +12,7 @@ #include #include #include +#include namespace nbl { @@ -335,6 +336,11 @@ template [[vk::ext_instruction(spv::OpAny)]] enable_if_t&& is_same_v::scalar_type, bool>, BooleanVector> any(BooleanVector vec); +// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q) +template && (! concepts::Vector || (concepts::Vector && (extent_v == extent_v)))) +[[vk::ext_instruction(spv::OpSelect)]] +ResultType select(Condition condition, ResultType object1, ResultType object2); + template) [[vk::ext_instruction(spv::OpIAddCarry)]] AddCarryOutput addCarry(T operand1, T operand2); From ea8cd43756146225058dcfbc1ddf4d254b0fd579 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 11 Apr 2025 12:39:16 -0300 Subject: [PATCH 023/472] Checkpoint: adding a bunch of operators to emulated vector types --- include/nbl/builtin/hlsl/concepts/core.hlsl | 10 + include/nbl/builtin/hlsl/concepts/vector.hlsl | 2 + include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 + .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 108 ++++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 4 +- include/nbl/builtin/hlsl/functional.hlsl | 101 ++++-- include/nbl/builtin/hlsl/morton.hlsl | 290 +++++------------- include/nbl/builtin/hlsl/type_traits.hlsl | 2 + 8 files changed, 236 insertions(+), 283 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index dcbafae8a5..4a8b848cb8 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -74,12 +74,22 @@ struct is_emulating_floating_point_scalar { NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; }; + +template +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = IntegralScalar; +}; } //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) template NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; +//! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t) +template +NBL_BOOL_CONCEPT IntegralLikeScalar = impl::is_emulating_integral_scalar::value; + } } } diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl index 468838730a..3ea3199951 100644 --- a/include/nbl/builtin/hlsl/concepts/vector.hlsl +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -40,6 +40,8 @@ NBL_BOOL_CONCEPT FloatingPointLikeVectorial = concepts::Vectorial && concepts template NBL_BOOL_CONCEPT IntVectorial = concepts::Vectorial && (is_integral_v::scalar_type>); template +NBL_BOOL_CONCEPT IntegralLikeVectorial = concepts::Vectorial && concepts::IntegralLikeScalar::scalar_type>; +template NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::SignedIntegralScalar::scalar_type>; } diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 77d9d887bd..81bdf32c19 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -11,6 +11,7 @@ #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static +#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline @@ -45,6 +46,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static +#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_STATIC_FUNC static #define NBL_CONSTEXPR_INLINE_FUNC inline diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 53881423e9..ca51b0060a 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -94,10 +94,8 @@ struct emulated_int64_base // Only valid in CPP #ifndef __HLSL_VERSION - - constexpr inline this_t operator<<(this_t bits) const; - - constexpr inline this_t operator>>(this_t bits) const; + constexpr inline this_t operator<<(uint32_t bits) const; + constexpr inline this_t operator>>(uint32_t bits) const; #endif @@ -256,13 +254,12 @@ struct left_shift_operator > using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0` in LSB and the result of `uint32_t(1) << 32` in your architecture in MSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) @@ -270,6 +267,12 @@ struct left_shift_operator > ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; template<> @@ -278,13 +281,12 @@ struct arithmetic_right_shift_operator using type_t = emulated_uint64_t; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) @@ -292,6 +294,12 @@ struct arithmetic_right_shift_operator ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; template<> @@ -300,13 +308,12 @@ struct arithmetic_right_shift_operator using type_t = emulated_int64_t; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0xFFFFFFFF` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) @@ -314,24 +321,30 @@ struct arithmetic_right_shift_operator ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; #ifndef __HLSL_VERSION template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(this_t bits) const +constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint32_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); } -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(this_t bits) const +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); } -constexpr inline emulated_int64_t emulated_int64_t::operator>>(this_t bits) const +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); @@ -353,11 +366,7 @@ struct plus > return lhs + rhs; } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; template @@ -370,23 +379,17 @@ struct minus > return lhs - rhs; } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; -#ifdef __HLSL_VERSION template<> -NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); -#endif +NBL_CONSTEXPR_INLINE emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -402,11 +405,7 @@ struct plus_assign > lhs = baseOp(lhs, rhs); } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; template @@ -420,23 +419,30 @@ struct minus_assign > lhs = baseOp(lhs, rhs); } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; -#ifdef __HLSL_VERSION template<> -NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; -#endif +NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; + +// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- +namespace concepts +{ +namespace impl +{ +template +struct is_emulating_integral_scalar > +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; +} +} } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 65a97bbe68..4d7c3839d9 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -330,8 +330,8 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) - DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) - DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 51ee4f4829..93687bdb6a 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -207,23 +207,6 @@ struct ternary_operator } }; -template -struct ternary_operator > -{ - using type_t = T; - using traits = hlsl::vector_traits; - - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select(condition, lhs, rhs); - } - - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(vector) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select, T>(condition, lhs, rhs); - } -}; - template struct left_shift_operator { @@ -252,34 +235,68 @@ struct left_shift_operator) > } }; -template NBL_PARTIAL_REQ_TOP(! (concepts::IntVector) && concepts::Vectorial) -struct left_shift_operator) && concepts::Vectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::Vector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, leftShift(operand.getComponent(i), bits.getComponent(i))); + setter(shifted, i, leftShift(getter(operand, i), getter(bits, i))); } return shifted; } NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, leftShift(operand.getComponent(i), bits)); + setter(shifted, i, leftShift(getter(operand, i), bits)); } return shifted; } @@ -313,34 +330,68 @@ struct arithmetic_right_shift_operator NBL_PARTIAL_REQ_TOP(concepts::Vectorial) -struct arithmetic_right_shift_operator) > +template NBL_PARTIAL_REQ_TOP(!concepts::Vector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, rightShift(operand.getComponent(i), bits.getComponent(i))); + setter(shifted, i, rightShift(getter(operand, i), getter(bits, i))); } return shifted; } NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, rightShift(operand.getComponent(i), bits)); + setter(shifted, i, rightShift(getter(operand, i), bits)); } return shifted; } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index ea583fddfa..9e62e40c2a 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" +#include "nbl/builtin/hlsl/portable/vector_t.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -22,90 +23,67 @@ namespace impl // Valid dimension for a morton code template -NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; +NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; -// Basic decode masks - -template -struct decode_mask; +// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- -template -struct decode_mask : integral_constant {}; +NBL_CONSTEXPR uint16_t CodingStages = 5; -template -struct decode_mask : integral_constant::value << Dim) | T(1)> {}; +template +struct coding_mask; -template -NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +template +NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; -// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- -// Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y +// 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage +// mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). +#define NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(DIM, BASE_VALUE) template struct coding_mask\ +{\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t KilloffMask = _Bits * DIM < 64 ? (uint64_t(1) << (_Bits * DIM)) - 1 : ~uint64_t(0);\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE) & KilloffMask;\ +}; -#ifndef __HLSL_VERSION +#define NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(DIM, STAGE, BASE_VALUE) template struct coding_mask\ +{\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE);\ +}; -#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +// Final stage mask also counts exact number of bits, although maybe it's not necessary +#define NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template struct coding_mask\ {\ - NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ }; -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off -#else +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, uint64_t(0x30C30C30C30C30C3)) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, uint64_t(0xF00F00F00F00F00F)) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, uint64_t(0x00FF0000FF0000FF)) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, uint64_t(0xFFFF00000000FFFF)) // Groups bits by 16 on, 32 off -#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ -{\ - NBL_CONSTEXPR_STATIC_INLINE T value;\ -};\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint16_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint32_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ - -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ -{\ - NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ -};\ -NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE); -#endif +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) -#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK ;\ - NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE)\ - NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE)\ - template\ - NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; - -NBL_MORTON_DECODE_MASK(2, 0, uint64_t(0x5555555555555555)) // Groups bits by 1 on, 1 off -NBL_MORTON_DECODE_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off -NBL_MORTON_DECODE_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off -NBL_MORTON_DECODE_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off -NBL_MORTON_DECODE_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off -NBL_MORTON_DECODE_MASK(2, 5, uint64_t(0x00000000FFFFFFFF)) // Groups bits by 32 on, 32 off - -NBL_MORTON_DECODE_MASK(3, 0, uint64_t(0x1249249249249249)) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits -NBL_MORTON_DECODE_MASK(3, 1, uint64_t(0x01C0E070381C0E07)) // Groups bits by 3 on, 6 off -NBL_MORTON_DECODE_MASK(3, 2, uint64_t(0x0FC003F000FC003F)) // Groups bits by 6 on, 12 off -NBL_MORTON_DECODE_MASK(3, 3, uint64_t(0x0000FFF000000FFF)) // Groups bits by 12 on, 24 off -NBL_MORTON_DECODE_MASK(3, 4, uint64_t(0x0000000000FFFFFF)) // Groups bits by 24 on, 48 off - -NBL_MORTON_DECODE_MASK(4, 0, uint64_t(0x1111111111111111)) // Groups bits by 1 on, 3 off -NBL_MORTON_DECODE_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off -NBL_MORTON_DECODE_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off -NBL_MORTON_DECODE_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off -NBL_MORTON_DECODE_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off - -#undef NBL_MORTON_DECODE_MASK -#undef NBL_MORTON_EMULATED_DECODE_MASK -#undef NBL_MORTON_GENERIC_DECODE_MASK - -// ----------------------------------------------------------------- MORTON ENCODERS --------------------------------------------------- - -template -struct MortonEncoder; - -template -struct MortonEncoder<2, Bits, encode_t> +NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS + +#undef NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK + +// ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- + +template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +struct MortonEncoder { template NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) @@ -114,168 +92,70 @@ struct MortonEncoder<2, Bits, encode_t> encode_t encoded = _static_cast(decodedValue); NBL_IF_CONSTEXPR(Bits > 16) { - encoded = (encoded | leftShift(encoded, 16)) & morton_mask_2_4_v; + encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 8) { - encoded = (encoded | leftShift(encoded, 8)) & morton_mask_2_3_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 8 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 4) { - encoded = (encoded | leftShift(encoded, 4)) & morton_mask_2_2_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 4 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 2) { - encoded = (encoded | leftShift(encoded, 2)) & morton_mask_2_1_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 2 * (Dim - 1)); } - encoded = (encoded | leftShift(encoded, 1)) & morton_mask_2_0_v; - return encoded; - } -}; - -template -struct MortonEncoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) - { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); - NBL_IF_CONSTEXPR(Bits > 12) - { - encoded = (encoded | leftShift(encoded, 24)) & morton_mask_3_3_v; - } - NBL_IF_CONSTEXPR(Bits > 6) - { - encoded = (encoded | leftShift(encoded, 12)) & morton_mask_3_2_v; - } - NBL_IF_CONSTEXPR(Bits > 3) - { - encoded = (encoded | leftShift(encoded, 6)) & morton_mask_3_1_v; - } - encoded = (encoded | leftShift(encoded, 2) | leftShift(encoded, 4)) & morton_mask_3_0_v; - return encoded; - } -}; - -template -struct MortonEncoder<4, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) - { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); - NBL_IF_CONSTEXPR(Bits > 8) - { - encoded = (encoded | leftShift(encoded, 24)) & morton_mask_4_3_v; - } - NBL_IF_CONSTEXPR(Bits > 4) - { - encoded = (encoded | leftShift(encoded, 12)) & morton_mask_4_2_v; - } - NBL_IF_CONSTEXPR(Bits > 2) + NBL_IF_CONSTEXPR(Bits > 1) { - encoded = (encoded | leftShift(encoded, 6)) & morton_mask_4_1_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); } - encoded = (encoded | leftShift(encoded, 3)) & morton_mask_4_0_v; - return encoded; + return encoded & _static_cast(coding_mask_v); } }; -// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- +// ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template -struct MortonDecoder; - -template -struct MortonDecoder<2, Bits, encode_t> +template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +struct MortonDecoder { - template + template 16), uint32_t, uint16_t> + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_2_0_v; + arithmetic_right_shift_operator > rightShift; + portable_vector_t decoded; NBL_IF_CONSTEXPR(Bits > 1) { - decoded = (decoded | rightShift(decoded, 1)) & morton_mask_2_1_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 1 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 2) { - decoded = (decoded | rightShift(decoded, 2)) & morton_mask_2_2_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 2 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 4) { - decoded = (decoded | rightShift(decoded, 4)) & morton_mask_2_3_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 4 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 8) { - decoded = (decoded | rightShift(decoded, 8)) & morton_mask_2_4_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 8 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 16) { - decoded = (decoded | rightShift(decoded, 16)) & morton_mask_2_5_v; - } - - return _static_cast(decoded); - } -}; - -template -struct MortonDecoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) - { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_3_0_v; - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & morton_mask_3_1_v; - } - NBL_IF_CONSTEXPR(Bits > 3) - { - decoded = (decoded | rightShift(decoded, 6)) & morton_mask_3_2_v; - } - NBL_IF_CONSTEXPR(Bits > 6) - { - decoded = (decoded | rightShift(decoded, 12)) & morton_mask_3_3_v; - } - NBL_IF_CONSTEXPR(Bits > 12) - { - decoded = (decoded | rightShift(decoded, 24)) & morton_mask_3_4_v; - } - - return _static_cast(decoded); - } -}; - -template -struct MortonDecoder<4, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) - { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_4_0_v; - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = (decoded | rightShift(decoded, 3)) & morton_mask_4_1_v; - } - NBL_IF_CONSTEXPR(Bits > 2) - { - decoded = (decoded | rightShift(decoded, 6)) & morton_mask_4_2_v; - } - NBL_IF_CONSTEXPR(Bits > 4) - { - decoded = (decoded | rightShift(decoded, 12)) & morton_mask_4_3_v; - } - NBL_IF_CONSTEXPR(Bits > 8) - { - decoded = (decoded | rightShift(decoded, 24)) & morton_mask_4_4_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 16 * (Dim - 1)); } - return _static_cast(decoded); + return _static_cast(decoded & _static_cast(coding_mask_v)); } }; @@ -290,7 +170,7 @@ struct Equals { NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; vector retVal; [[unroll]] @@ -342,7 +222,7 @@ struct BaseComparison { NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; vector retVal; ComparisonOp comparison; @@ -392,7 +272,7 @@ struct LessEquals : BaseComparison && D * Bits <= 64) +template && D * Bits <= 64) struct code { using this_t = code; @@ -515,7 +395,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; this_t retVal; retVal.value = _static_cast(uint64_t(0)); @@ -536,7 +416,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; this_t retVal; retVal.value = _static_cast(uint64_t(0)); @@ -653,14 +533,14 @@ struct arithmetic_right_shift_operator > #ifndef __HLSL_VERSION -template&& D* Bits <= 64) +template&& D* Bits <= 64) constexpr inline morton::code morton::code::operator<<(uint16_t bits) const { left_shift_operator> leftShift; return leftShift(*this, bits); } -template&& D* Bits <= 64) +template&& D* Bits <= 64) constexpr inline morton::code morton::code::operator>>(uint16_t bits) const { arithmetic_right_shift_operator> rightShift; diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 5bfc7ca89b..bc160de788 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -664,6 +664,8 @@ using conditional_t = typename conditional::type; // Template Variables +template +NBL_CONSTEXPR T integral_constant_v = integral_constant::value; template NBL_CONSTEXPR bool is_same_v = is_same::value; template From 53a5f6a8cd4c19718694ff701c3723bbfffcf0f5 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 11 Apr 2025 17:04:15 -0300 Subject: [PATCH 024/472] Vectorized encode/decode for better pipelining --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 152 ++++++++++-------- include/nbl/builtin/hlsl/morton.hlsl | 29 +++- 2 files changed, 106 insertions(+), 75 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 4d7c3839d9..c4938fc9c2 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -147,93 +147,107 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(component_t val) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + val); - - return output; + #define NBL_EMULATED_VECTOR_DEFINE_OPERATOR(OP)\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (this_t other)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (vector other)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other.getComponent(i)); - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector other) - { - this_t output; + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(&) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(|) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(^) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(+) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(-) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(*) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(/) - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other[i]); + #undef NBL_EMULATED_VECTOR_DEFINE_OPERATOR - return output; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(component_t val) + NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { - this_t output; - + component_t sum = 0; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - val); + sum = sum + CRTP::getComponent(i); - return output; + return sum; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) - { - this_t output; +}; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); +template +struct emulated_vector : CRTP +{ + using component_t = ComponentType; + using this_t = emulated_vector; - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector other) + NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) { this_t output; - + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other[i]); + output.setComponent(i, other.getComponent(i)); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(component_t val) + template + NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) { this_t output; - + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * val); + output.setComponent(i, ComponentType::create(other[i])); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); - return output; + #define NBL_EMULATED_VECTOR_OPERATOR(OP, ENABLE_CONDITION) NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (component_t val)\ + {\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + val);\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ + {\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i));\ + return output;\ } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other[i]); - return output; - } + NBL_EMULATED_VECTOR_OPERATOR(&, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(|, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(^, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(+, true) + NBL_EMULATED_VECTOR_OPERATOR(-, true) + NBL_EMULATED_VECTOR_OPERATOR(*, true) + NBL_EMULATED_VECTOR_OPERATOR(/, true) + + #undef NBL_EMULATED_VECTOR_OPERATOR - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() + NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() { - component_t sum = 0; + ComponentType sum = ComponentType::create(0); + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); @@ -241,6 +255,7 @@ struct emulated_vector : CRTP } }; + #define DEFINE_OPERATORS_FOR_TYPE(...)\ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ {\ @@ -270,12 +285,13 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// TODO: some of code duplication could be avoided -template -struct emulated_vector : CRTP +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- + +template +struct emulated_vector, CRTP, false> : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; + using component_t = emulated_float64_t; + using this_t = emulated_vector; NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) { @@ -293,7 +309,7 @@ struct emulated_vector : CRTP this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); + output.setComponent(i, component_t::create(other[i])); return output; } @@ -330,8 +346,6 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) - //DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) - //DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) @@ -341,9 +355,9 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() + NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { - ComponentType sum = ComponentType::create(0); + component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9e62e40c2a..e8cb2b73bf 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -85,11 +85,12 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) struct MortonEncoder { - template + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); + left_shift_operator > leftShift; + portable_vector_t encoded = _static_cast >(decodedValue); NBL_IF_CONSTEXPR(Bits > 16) { encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); @@ -114,7 +115,16 @@ struct MortonEncoder encoded = encoded & _static_cast(coding_mask_v); encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); } - return encoded & _static_cast(coding_mask_v); + encoded = encoded & _static_cast(coding_mask_v); + encoded = leftShift(encoded, _static_cast >(vector(0, 1, 2, 3))); + // The `encoded` above is vectorized for each coord, here we collapse all coords into a single element + encode_t actualEncoded = _static_cast(uint64_t(0)); + array_get, encode_t> getter; + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + actualEncoded = actualEncoded | getter(encoded, i); + + return actualEncoded; } }; @@ -123,12 +133,19 @@ struct MortonEncoder template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) struct MortonDecoder { - template 16), uint32_t, uint16_t> - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(vector_traits::scalar_type) * 8 >= Bits) + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { arithmetic_right_shift_operator > rightShift; portable_vector_t decoded; + array_set, encode_t> setter; + // Write initial values into decoded + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + setter(decoded, i, encodedValue); + decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); + NBL_IF_CONSTEXPR(Bits > 1) { decoded = decoded & _static_cast(coding_mask_v); From cf52d9cbf2d99e3ceb16495ef9049511cbde2096 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 14 Apr 2025 16:02:17 -0300 Subject: [PATCH 025/472] Adress the last of PR review changes: vectorize more operators, add a bunch of operators and functional structs for vectorial types --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 20 +- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 8 + .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 113 ++++++- include/nbl/builtin/hlsl/functional.hlsl | 28 +- include/nbl/builtin/hlsl/morton.hlsl | 319 ++++++++---------- include/nbl/builtin/hlsl/mpl.hlsl | 28 +- 6 files changed, 310 insertions(+), 206 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 81bdf32c19..f01d2d78ec 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -90,7 +90,7 @@ namespace impl template struct static_cast_helper { - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(NBL_CONST_REF_ARG(From) u) { #ifndef __HLSL_VERSION return static_cast(u); @@ -99,10 +99,26 @@ struct static_cast_helper #endif } }; + +// CPP-side, this can invoke the copy constructor if the copy is non-trivial in generic code +// HLSL-side, this enables generic conversion code between types, contemplating the case where no conversion is needed +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC Same cast(NBL_CONST_REF_ARG(Same) s) + { +#ifndef __HLSL_VERSION + return static_cast(s); +#else + return s; +#endif + } +}; + } template -NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) { return impl::static_cast_helper::cast(v); } diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index ca51b0060a..4f354c900e 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -431,6 +431,14 @@ NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity template<> NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; +// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- + +template<> +struct is_signed : bool_constant {}; + +template<> +struct is_unsigned : bool_constant {}; + // --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- namespace concepts { diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index c4938fc9c2..fd5f5e3c34 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -147,7 +147,7 @@ struct emulated_vector : CRTP return output; } - #define NBL_EMULATED_VECTOR_DEFINE_OPERATOR(OP)\ + #define NBL_EMULATED_VECTOR_OPERATOR(OP)\ NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ {\ this_t output;\ @@ -170,15 +170,33 @@ struct emulated_vector : CRTP return output;\ } - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(&) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(|) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(^) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(+) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(-) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(*) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(/) + NBL_EMULATED_VECTOR_OPERATOR(&) + NBL_EMULATED_VECTOR_OPERATOR(|) + NBL_EMULATED_VECTOR_OPERATOR(^) + NBL_EMULATED_VECTOR_OPERATOR(+) + NBL_EMULATED_VECTOR_OPERATOR(-) + NBL_EMULATED_VECTOR_OPERATOR(*) + NBL_EMULATED_VECTOR_OPERATOR(/) - #undef NBL_EMULATED_VECTOR_DEFINE_OPERATOR + #undef NBL_EMULATED_VECTOR_OPERATOR + + #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ + {\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ + } + + NBL_EMULATED_VECTOR_COMPARISON(==) + NBL_EMULATED_VECTOR_COMPARISON(!=) + NBL_EMULATED_VECTOR_COMPARISON(<) + NBL_EMULATED_VECTOR_COMPARISON(<=) + NBL_EMULATED_VECTOR_COMPARISON(>) + NBL_EMULATED_VECTOR_COMPARISON(>=) + + #undef NBL_EMULATED_VECTOR_COMPARISON NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { @@ -222,7 +240,7 @@ struct emulated_vector : CRTP this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + val);\ + output.setComponent(i, CRTP::getComponent(i) OP val);\ return output;\ }\ NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ @@ -230,7 +248,7 @@ struct emulated_vector : CRTP this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i));\ + output.setComponent(i, CRTP::getComponent(i) OP other.getComponent(i));\ return output;\ } @@ -244,6 +262,24 @@ struct emulated_vector : CRTP #undef NBL_EMULATED_VECTOR_OPERATOR + #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ + {\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ + } + + NBL_EMULATED_VECTOR_COMPARISON(==) + NBL_EMULATED_VECTOR_COMPARISON(!=) + NBL_EMULATED_VECTOR_COMPARISON(<) + NBL_EMULATED_VECTOR_COMPARISON(<=) + NBL_EMULATED_VECTOR_COMPARISON(>) + NBL_EMULATED_VECTOR_COMPARISON(>=) + + #undef NBL_EMULATED_VECTOR_COMPARISON + NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() { ComponentType sum = ComponentType::create(0); @@ -442,7 +478,7 @@ namespace impl template struct static_cast_helper, vector, void> { - static inline emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t2 cast(vector vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -455,7 +491,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t3 cast(vector vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -469,7 +505,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t4 cast(vector vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -487,7 +523,7 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - static inline OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec) { array_get getter; array_set setter; @@ -500,6 +536,53 @@ struct static_cast_helper, emulated_vector_t\ +struct static_cast_helper, emulated_vector_t##N , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + for (int i = 0; i < N; ++i)\ + setter(output, i, _static_cast(getter(vec, i)));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(2) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(3) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) + +#undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST + +#define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ +struct static_cast_helper, emulated_vector_t##M , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##M ;\ + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + for (int i = 0; i < N; ++i)\ + setter(output, i, getter(vec, i));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TRUNCATION(2, 2) +NBL_EMULATED_VEC_TRUNCATION(2, 3) +NBL_EMULATED_VEC_TRUNCATION(2, 4) +NBL_EMULATED_VEC_TRUNCATION(3, 3) +NBL_EMULATED_VEC_TRUNCATION(3, 4) +NBL_EMULATED_VEC_TRUNCATION(4, 4) + +#undef NBL_EMULATED_VEC_TRUNCATION + } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 93687bdb6a..45198cbe7a 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -80,7 +80,7 @@ struct reference_wrapper : enable_if_t< // TODO: partial specializations for T being a special SPIR-V type for image ops, etc. -#define ALIAS_STD(NAME,OP) template struct NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME { \ using type_t = T; \ \ T operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \ @@ -92,7 +92,7 @@ struct reference_wrapper : enable_if_t< #else // CPP -#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; #endif @@ -136,13 +136,35 @@ ALIAS_STD(divides,/) }; +ALIAS_STD(equal_to,==) }; +ALIAS_STD(not_equal_to,!=) }; ALIAS_STD(greater,>) }; ALIAS_STD(less,<) }; ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal,<=) }; +ALIAS_STD(less_equal, <= ) }; #undef ALIAS_STD +// The above comparison operators return bool on STD. Here's a specialization so that they return `vector` for vectorial types +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return lhs OP rhs;\ + }\ +}; + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=) + +#undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION + // ------------------------ Compound assignment operators ---------------------- #define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e8cb2b73bf..d2fca1165f 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,6 +8,7 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" #include "nbl/builtin/hlsl/portable/vector_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -82,61 +83,65 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS // ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- -template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) struct MortonEncoder { template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index + * + * @param [in] decodedValue Cartesian coordinates to interleave and shift + */ + NBL_CONSTEXPR_STATIC_INLINE_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { + NBL_CONSTEXPR_STATIC encode_t EncodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; left_shift_operator > leftShift; - portable_vector_t encoded = _static_cast >(decodedValue); - NBL_IF_CONSTEXPR(Bits > 16) - { - encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 8) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 8 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 4) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 4 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 2) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 2 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 1) + portable_vector_t interleaved = _static_cast >(decodedValue)& EncodeMasks[CodingStages]; + + NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; + [[unroll]] + for (uint16_t i = Stages; i > 0; i--) { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); + interleaved = interleaved | leftShift(interleaved, (uint32_t(1) << (i - 1)) * (Dim - 1)); + interleaved = interleaved & EncodeMasks[i - 1]; } - encoded = encoded & _static_cast(coding_mask_v); - encoded = leftShift(encoded, _static_cast >(vector(0, 1, 2, 3))); - // The `encoded` above is vectorized for each coord, here we collapse all coords into a single element - encode_t actualEncoded = _static_cast(uint64_t(0)); + + // After interleaving, shift each coordinate left by their index + return leftShift(interleaved, _static_cast >(vector(0, 1, 2, 3))); + } + + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Encodes a vector of cartesian coordinates as a Morton code + * + * @param [in] decodedValue Cartesian coordinates to encode + */ + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + portable_vector_t interleaveShifted = interleaveShift(decodedValue); + + encode_t encoded = _static_cast(uint64_t(0)); array_get, encode_t> getter; [[unroll]] for (uint16_t i = 0; i < Dim; i++) - actualEncoded = actualEncoded | getter(encoded, i); - - return actualEncoded; + encoded = encoded | getter(interleaveShifted, i); + + return encoded; } }; // ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) struct MortonDecoder { template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; arithmetic_right_shift_operator > rightShift; portable_vector_t decoded; array_set, encode_t> setter; @@ -146,38 +151,28 @@ struct MortonDecoder setter(decoded, i, encodedValue); decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 1 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 2) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 2 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 4) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 4 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 8) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 8 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 16) + NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; + [[unroll]] + for (uint16_t i = 0; i < Stages; i++) { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 16 * (Dim - 1)); + decoded = decoded & DecodeMasks[i]; + decoded = decoded | rightShift(decoded, (uint32_t(1) << i) * (Dim - 1)); } - return _static_cast(decoded & _static_cast(coding_mask_v)); + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated + NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) + return _static_cast(decoded); + else + return _static_cast(decoded & DecodeMasks[CodingStages]); } }; // ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- // Here because no partial specialization of methods +// `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted + +template +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::round_up_to_pot_v)); template struct Equals; @@ -185,105 +180,76 @@ struct Equals; template struct Equals { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - retVal[i] = (value & leftShift(Mask, i)) == leftShift(rhs[i], i); - } - return retVal; + NBL_CONSTEXPR portable_vector_t zeros = _static_cast >(_static_cast >(vector(0,0,0,0))); + + portable_vector_t rhsCasted = _static_cast >(rhs); + portable_vector_t xored = rhsCasted ^ value; + return xored == zeros; } }; template struct Equals { - template - NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - using U = make_unsigned_t; - vector interleaved; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); - } - Equals equals; - return equals(value, interleaved); + const portable_vector_t interleaved = MortonEncoder::interleaveShift(rhs); + return Equals::__call(value, interleaved); } }; template struct BaseComparison; -// Aux method for extracting highest bit, used by the comparison below -template -NBL_CONSTEXPR_INLINE_FUNC storage_t extractHighestBit(storage_t value, uint16_t coord) -{ - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const uint16_t coordHighestBitIdx = Bits / D - ((coord < Bits % D) ? uint16_t(0) : uint16_t(1)); - // This is the index of that bit as an index in the encoded value - const uint16_t shift = coordHighestBitIdx * D + coord; - left_shift_operator leftShift; - return value & leftShift(_static_cast(uint16_t(1)), shift); -} +// Aux variable that has only the sign bit for the first of D dimensions +template +NBL_CONSTEXPR uint64_t SignMask = uint64_t(1) << (D * (Bits - 1)); template struct BaseComparison { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; - vector retVal; + NBL_CONSTEXPR_STATIC portable_vector_t InterleaveMasks = _static_cast >(_static_cast >(vector(coding_mask_v, coding_mask_v << 1, coding_mask_v << 2, coding_mask_v << 3))); + NBL_CONSTEXPR_STATIC portable_vector_t SignMasks = _static_cast >(_static_cast >(vector(SignMask, SignMask << 1, SignMask << 2, SignMask << 3))); ComparisonOp comparison; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - storage_t thisCoord = value & leftShift(Mask, i); - storage_t rhsCoord = leftShift(rhs[i], i); - // If coordinate is negative, we add 1s in every bit not corresponding to coord - if (extractHighestBit(thisCoord) != _static_cast(uint64_t(0))) - thisCoord = thisCoord | ~leftShift(Mask, i); - if (extractHighestBit(rhsCoord) != _static_cast(uint64_t(0))) - rhsCoord = rhsCoord | ~leftShift(Mask, i); - retVal[i] = comparison(thisCoord, rhsCoord); - } - return retVal; + // Obtain a vector of deinterleaved coordinates and flip their sign bits + const portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); } }; template struct BaseComparison { - template - NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - using U = make_unsigned_t; - vector interleaved; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); - } + const vector interleaved = MortonEncoder::interleaveShift(rhs); BaseComparison baseComparison; return baseComparison(value, interleaved); } }; template -struct LessThan : BaseComparison > {}; +struct LessThan : BaseComparison > > {}; template -struct LessEquals : BaseComparison > {}; +struct LessEquals : BaseComparison > > {}; +template +struct GreaterThan : BaseComparison > > {}; + +template +struct GreaterEquals : BaseComparison > > {}; } //namespace impl @@ -313,19 +279,11 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { - using U = make_unsigned_t; - left_shift_operator leftShift; - storage_t encodedCartesian = _static_cast(uint64_t(0)); - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - encodedCartesian = encodedCartesian | leftShift(impl::MortonEncoder::encode(_static_cast(cartesian[i])), i); - } this_t retVal; - retVal.value = encodedCartesian; + retVal.value = impl::MortonEncoder::encode(cartesian); return retVal; } @@ -337,8 +295,7 @@ struct code * * @param [in] cartesian Coordinates to encode */ - - template + template= Bits) explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); @@ -347,11 +304,8 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template - constexpr inline explicit operator vector() const noexcept - { - return _static_cast, morton::code, Bits, D>>(*this); - } + template= Bits) + constexpr inline explicit operator vector() const noexcept; #endif @@ -398,14 +352,13 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC { - left_shift_operator leftShift; - // allOnes encodes a cartesian coordinate with all values set to 1 - this_t allOnes; - allOnes.value = leftShift(_static_cast(1), D) - _static_cast(1); - // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 - this_signed_t retVal; - retVal.value = (operator~() + allOnes).value; - return retVal; + this_t zero; + zero.value = _static_cast(0); + #ifndef __HLSL_VERSION + return zero - *this; + #else + return zero - this; + #endif } // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- @@ -453,48 +406,51 @@ struct code return value == rhs.value; } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator==(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector equals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::Equals equals; - return equals(value, rhs); - } + return impl::Equals::__call(value, rhs); + } NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value != rhs.value; } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector notEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator== (rhs); + return !equals(rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector less(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::LessThan lessThan; - return lessThan(value, rhs); + return impl::LessThan::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector lessEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::LessEquals lessEquals; - return lessEquals(value, rhs); + return impl::LessEquals::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector greater(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator<= (rhs); + return impl::GreaterThan::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector greaterEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator< (rhs); + return impl::GreaterEquals::__call(value, rhs); } }; @@ -571,26 +527,29 @@ namespace impl { // I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > { NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { - using U = make_unsigned_t; using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; - arithmetic_right_shift_operator rightShift; - vector cartesian; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); - } - return cartesian; + return morton::impl::MortonDecoder::decode(val.value); } }; } // namespace impl +#ifndef __HLSL_VERSION + +template && D* Bits <= 64) +template = Bits) +constexpr inline morton::code::operator vector() const noexcept +{ + return _static_cast, morton::code, Bits, D>>(*this); +} + +#endif + } //namespace hlsl } //namespace nbl diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 2015b05b3d..67f6445324 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -43,13 +43,23 @@ struct countl_zero : impl::countl_zero template NBL_CONSTEXPR T countl_zero_v = countl_zero::value; +template +struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; +template +NBL_CONSTEXPR bool is_pot_v = is_pot::value; + template struct log2 { NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero::value-1 : -1ull; }; template -NBL_CONSTEXPR uint64_t log2_v = log2::value; +NBL_CONSTEXPR uint16_t log2_v = log2::value; + +template +struct log2_ceil : integral_constant + uint16_t(!is_pot_v)> {}; +template +NBL_CONSTEXPR uint16_t log2_ceil_v = log2_ceil::value; template struct rotl @@ -79,11 +89,6 @@ struct align_up template NBL_CONSTEXPR uint64_t align_up_v = align_up::value; -template -struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; -template -NBL_CONSTEXPR bool is_pot_v = is_pot::value; - template struct max { @@ -99,6 +104,17 @@ struct min }; template NBL_CONSTEXPR T min_v = min::value; + +template +struct round_up_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; + +template +struct round_down_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR uint64_t round_down_to_pot_v = round_down_to_pot::value; + } } } From f954522001947a4f7f4c74696b71571924a5c590 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 24 Apr 2025 15:57:18 -0300 Subject: [PATCH 026/472] Removed `NBL_CONSTEXPR_INLINE_FUNC` macro, replaced all usages with `NBL_CONSTEXPR_FUNC` Adds `OpUndef` to spirv `intrinsics.hlsl` and `cpp_compat.hlsl` Adds an explicit `truncate` function for vectors and emulated vectors Adds a bunch of specializations for vectorial types in `functional.hlsl` Bugfixes and changes to Morton codes, very close to them working properly with emulated ints --- include/nbl/builtin/hlsl/algorithm.hlsl | 18 +- include/nbl/builtin/hlsl/cpp_compat.hlsl | 3 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 27 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 17 +- .../hlsl/cpp_compat/impl/vector_impl.hlsl | 35 -- .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 12 +- .../nbl/builtin/hlsl/cpp_compat/promote.hlsl | 12 +- .../nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 76 ++++ .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 16 +- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 16 +- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 160 ++++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 423 ++++++++++-------- include/nbl/builtin/hlsl/functional.hlsl | 144 ++++-- include/nbl/builtin/hlsl/ieee754.hlsl | 16 +- include/nbl/builtin/hlsl/ieee754/impl.hlsl | 16 +- include/nbl/builtin/hlsl/morton.hlsl | 358 ++++++++------- .../builtin/hlsl/spirv_intrinsics/core.hlsl | 7 +- include/nbl/builtin/hlsl/type_traits.hlsl | 2 + src/nbl/builtin/CMakeLists.txt | 2 +- 19 files changed, 798 insertions(+), 562 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl create mode 100644 include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl index 3a7c4963c2..0178673f4e 100644 --- a/include/nbl/builtin/hlsl/algorithm.hlsl +++ b/include/nbl/builtin/hlsl/algorithm.hlsl @@ -18,7 +18,7 @@ namespace impl // TODO: use structs template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { T tmp = lhs; lhs = rhs; @@ -26,7 +26,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -34,7 +34,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -42,7 +42,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -50,7 +50,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -58,7 +58,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -66,7 +66,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -74,7 +74,7 @@ namespace impl } #else template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { std::swap(lhs, rhs); } @@ -82,7 +82,7 @@ namespace impl } template -NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) +NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { impl::swap(lhs, rhs); } diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index cb06447aa1..03d47864fb 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -5,8 +5,9 @@ // it includes vector and matrix #include #include +#include // Had to push some stuff here to avoid circular dependencies -#include +#include #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index f01d2d78ec..0985af6eb3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -14,8 +14,6 @@ #define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_STATIC_FUNC constexpr static -#define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) @@ -44,13 +42,11 @@ namespace nbl::hlsl #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_FUNC +#define NBL_CONSTEXPR_FUNC inline #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_STATIC_INLINE const static -#define NBL_CONSTEXPR_STATIC_FUNC static -#define NBL_CONSTEXPR_INLINE_FUNC inline -#define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline +#define NBL_CONSTEXPR_STATIC_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) @@ -90,7 +86,7 @@ namespace impl template struct static_cast_helper { - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(NBL_CONST_REF_ARG(From) u) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) u) { #ifndef __HLSL_VERSION return static_cast(u); @@ -100,25 +96,10 @@ struct static_cast_helper } }; -// CPP-side, this can invoke the copy constructor if the copy is non-trivial in generic code -// HLSL-side, this enables generic conversion code between types, contemplating the case where no conversion is needed -template -struct static_cast_helper -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC Same cast(NBL_CONST_REF_ARG(Same) s) - { -#ifndef __HLSL_VERSION - return static_cast(s); -#else - return s; -#endif - } -}; - } template -NBL_CONSTEXPR_INLINE_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) +NBL_CONSTEXPR_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) { return impl::static_cast_helper::cast(v); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index e1ba823b9b..4f7c7370bc 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -109,6 +109,8 @@ template struct addCarry_helper; template struct subBorrow_helper; +template +struct undef_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -172,6 +174,7 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, // Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(undef_helper, undef, (T), , T) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitCount_helper, bitCount, (T), (T), BITCOUNT_HELPER_RETRUN_TYPE) @@ -640,7 +643,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) struct select_helper) > { - NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { return condition ? object1 : object2; } @@ -650,7 +653,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > { - NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { using traits = hlsl::vector_traits; array_get conditionGetter; @@ -665,6 +668,16 @@ struct select_helper&& concepts::V } }; +template +struct undef_helper +{ + NBL_CONSTEXPR_STATIC_FUNC T __call() + { + T t; + return t; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl deleted file mode 100644 index 524d1fa45e..0000000000 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ - -#include -#include -#include - -// To prevent implicit truncation warnings -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ - -template NBL_PARTIAL_REQ_TOP(N <= M) -struct static_cast_helper, vector NBL_PARTIAL_REQ_BOT(N <= M) > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) - { - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < N; i++) - { - retVal[i] = val[i]; - } - return retVal; - } -}; - -} -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 284ba564d7..c511042c27 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -23,6 +23,12 @@ namespace nbl namespace hlsl { +template +NBL_CONSTEXPR_FUNC T undef() +{ + return cpp_compat_intrinsics_impl::undef_helper::__call(); +} + template inline typename cpp_compat_intrinsics_impl::bitCount_helper::return_t bitCount(NBL_CONST_REF_ARG(T) val) { @@ -151,7 +157,7 @@ inline bool any(Vector vec) } template -NBL_CONSTEXPR_INLINE_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) { return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); } @@ -224,13 +230,13 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A } template -NBL_CONSTEXPR_INLINE_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +NBL_CONSTEXPR_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) { return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); } template -NBL_CONSTEXPR_INLINE_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +NBL_CONSTEXPR_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) { return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 51ca73f6d3..0afe214de7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - T operator()(U v) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) { return T(v); } @@ -26,7 +26,7 @@ struct Promote template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v)}; return promoted; @@ -36,7 +36,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v)}; return promoted; @@ -46,7 +46,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -56,7 +56,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -68,7 +68,7 @@ struct Promote, U> } template -T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) +NBL_CONSTEXPR_FUNC T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) { impl::Promote _promote; return _promote(v); diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl new file mode 100644 index 0000000000..a95df183be --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -0,0 +1,76 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ + +#include "nbl/builtin/hlsl/type_traits.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +namespace impl +{ + +template +struct Truncate +{ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) + { + return T(v); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0] }; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1]}; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1], v[2] }; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1], v[2], v[3] }; + return truncated; + } +}; + +} //namespace impl + +template +NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) +{ + impl::Truncate _truncate; + return _truncate(v); +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index a0cde90df9..2dfc52c957 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -412,25 +412,25 @@ inline int extractExponent(__VA_ARGS__ x)\ }\ \ template<>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ {\ return extractMantissa(x.data);\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ {\ return extractNormalizeMantissa(x.data);\ }\ @@ -577,10 +577,10 @@ namespace ieee754 { namespace impl { -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } } IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 44b881345d..df785e3e8f 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -41,7 +41,7 @@ namespace hlsl { namespace emulated_float64_t_impl { -NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) +NBL_CONSTEXPR_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) { uint64_t2 output; output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); @@ -74,7 +74,7 @@ inline uint64_t castFloat32ToStorageType(float32_t val) } }; -NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) +NBL_CONSTEXPR_FUNC bool isZero(uint64_t val) { return (val << 1) == 0ull; } @@ -137,18 +137,18 @@ inline uint64_t reinterpretAsFloat64BitPattern(int64_t val) return sign | reinterpretAsFloat64BitPattern(absVal); }; -NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t value) +NBL_CONSTEXPR_FUNC uint64_t flushDenormToZero(uint64_t value) { const uint64_t biasBits = value & ieee754::traits::exponentMask; return biasBits ? value : (value & ieee754::traits::signMask); } -NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) +NBL_CONSTEXPR_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) { return signShifted | expShifted | mantissa; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) { lhs &= ~ieee754::traits::signMask; rhs &= ~ieee754::traits::signMask; @@ -156,18 +156,18 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) return lhs == rhs && lhs == ieee754::traits::inf; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs | rhs) << 1); } -NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs) << 1) && (lhs == rhs); } template -NBL_CONSTEXPR_INLINE_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) { if (!FastMath) { diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 4f354c900e..8a3fd42faf 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -20,6 +20,7 @@ struct emulated_int64_base { using storage_t = vector; using this_t = emulated_int64_base; + using this_signed_t = emulated_int64_base; storage_t data; @@ -29,6 +30,12 @@ struct emulated_int64_base emulated_int64_base() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` + template + constexpr explicit emulated_int64_base(const I& toEmulate); + + constexpr explicit emulated_int64_base(const emulated_int64_base& other) : data(other.data) {} + #endif /** @@ -36,7 +43,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -46,47 +53,57 @@ struct emulated_int64_base /** * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern * - * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated + * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { return create(storage_t(lo, hi)); } + // ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- + // GLM requires these for vector casts + + #ifndef __HLSL_VERSION + + template + constexpr explicit operator I() const noexcept; + + #endif + // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC { return data.x; } - NBL_CONSTEXPR_INLINE_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC { return data.y; } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data & rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data | rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data ^ rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { this_t retVal = create(~data); return retVal; @@ -101,56 +118,62 @@ struct emulated_int64_base // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + vector negated = -data; + return this_signed_t::create(_static_cast(negated)); + } + + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); - const this_t retVal = create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); - return retVal; + return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); - const this_t retVal = create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); - return retVal; + return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return all(data == rhs.data); + equal_to equals; + return all(equals(data, rhs.data)); } - NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return any(data != rhs.data); + not_equal_to notEquals; + return any(notEquals(data, rhs.data)); } - NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); const bool LSB = __getLSB() < rhs.__getLSB(); return MSBEqual ? LSB : MSB; } - NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Same reasoning as above const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); const bool LSB = __getLSB() > rhs.__getLSB(); return MSBEqual ? LSB : MSB; } - NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return !operator>(rhs); } - NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return !operator<(rhs); } @@ -162,32 +185,16 @@ using emulated_int64_t = emulated_int64_base; namespace impl { -template<> -struct static_cast_helper -{ - using To = emulated_uint64_t; - using From = emulated_int64_t; - - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) - { - To retVal; - retVal.data = i.data; - return retVal; - } -}; - -template<> -struct static_cast_helper +template +struct static_cast_helper, emulated_int64_base > { - using To = emulated_int64_t; - using From = emulated_uint64_t; + using To = emulated_int64_base; + using From = emulated_int64_base; - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) other) { To retVal; - retVal.data = u.data; + retVal.data = other.data; return retVal; } }; @@ -199,19 +206,19 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) { return _static_cast(val.data.x); } }; -template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v) -struct static_cast_helper NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > { using To = I; using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) { return bit_cast(val.data); } @@ -224,28 +231,53 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) { - return To::create(uint32_t(0), _static_cast(i)); + return To::create(_static_cast(i), uint32_t(0)); } }; -template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v ) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > { using To = emulated_int64_base; using From = I; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) { + // `bit_cast` blocked by GLM vectors using a union + #ifndef __HLSL_VERSION + return To::create(_static_cast(i), _static_cast(i >> 32)); + #else To retVal; - retVal.data = bit_cast(i); + retVal.data = bit_cast >(i); return retVal; + #endif } }; } //namespace impl +// Define constructor and conversion operators + +#ifndef __HLSL_VERSION + +template +template +constexpr emulated_int64_base::emulated_int64_base(const I& toEmulate) +{ + *this = _static_cast>(toEmulate); +} + +template +template +constexpr emulated_int64_base::operator I() const noexcept +{ + return _static_cast(*this); +} + +#endif + // ---------------------- Functional operators ------------------------ template @@ -258,7 +290,7 @@ struct left_shift_operator > //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; @@ -269,7 +301,7 @@ struct left_shift_operator > } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -285,7 +317,7 @@ struct arithmetic_right_shift_operator //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; @@ -296,7 +328,7 @@ struct arithmetic_right_shift_operator } // If `_bits > 63` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -312,18 +344,18 @@ struct arithmetic_right_shift_operator //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); ternary_operator ternary; return ternary(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index fd5f5e3c34..3780ce001b 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -24,7 +24,7 @@ struct _2_component_vec static_assert(sizeof(T) <= 8); - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -32,7 +32,7 @@ struct _2_component_vec y = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -40,9 +40,10 @@ struct _2_component_vec return y; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 2; @@ -56,7 +57,7 @@ struct _3_component_vec T z; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -66,7 +67,7 @@ struct _3_component_vec z = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -76,9 +77,10 @@ struct _3_component_vec return z; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 3; @@ -92,7 +94,7 @@ struct _4_component_vec T z; T w; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -104,7 +106,7 @@ struct _4_component_vec w = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -116,184 +118,210 @@ struct _4_component_vec return w; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 4; }; -template ::value> -struct emulated_vector : CRTP -{ - using this_t = emulated_vector; - using component_t = ComponentType; +template +struct emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - CRTP output; +// Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - } - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other[i]); - - return output; - } - - #define NBL_EMULATED_VECTOR_OPERATOR(OP)\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP val);\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (this_t other)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (vector other)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP other[i]);\ - return output;\ - } +#define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP() NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i).operator##OP());\ + return output;\ +} - NBL_EMULATED_VECTOR_OPERATOR(&) - NBL_EMULATED_VECTOR_OPERATOR(|) - NBL_EMULATED_VECTOR_OPERATOR(^) - NBL_EMULATED_VECTOR_OPERATOR(+) - NBL_EMULATED_VECTOR_OPERATOR(-) - NBL_EMULATED_VECTOR_OPERATOR(*) - NBL_EMULATED_VECTOR_OPERATOR(/) +#define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ +}\ +NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ +} - #undef NBL_EMULATED_VECTOR_OPERATOR +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP(vector other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ +} - #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ - {\ - vector output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ - return output;\ - } +#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ +} - NBL_EMULATED_VECTOR_COMPARISON(==) - NBL_EMULATED_VECTOR_COMPARISON(!=) - NBL_EMULATED_VECTOR_COMPARISON(<) - NBL_EMULATED_VECTOR_COMPARISON(<=) - NBL_EMULATED_VECTOR_COMPARISON(>) - NBL_EMULATED_VECTOR_COMPARISON(>=) +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC vector operator##OP (vector other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other[i];\ + return output;\ +} - #undef NBL_EMULATED_VECTOR_COMPARISON +#define NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +using this_t = emulated_vector;\ +using component_t = ComponentType;\ +NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other)\ +{\ + CRTP output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other.getComponent(i));\ +}\ +NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \ +{\ + component_t sum = CRTP::getComponent(0);\ + [[unroll]]\ + for (uint32_t i = 1u; i < CRTP::Dimension; ++i)\ + sum = sum + CRTP::getComponent(i);\ + return sum;\ +} - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() - { - component_t sum = 0; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other)\ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other[i]);\ + return output;\ +} - return sum; - } +// Fundamental, integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector&& concepts::IntegralLikeScalar) > : CRTP +{ + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) }; -template -struct emulated_vector : CRTP +// Fundamental, not integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; - - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - this_t output; - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - - return output; - } - - template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); - - return output; - } - - #define NBL_EMULATED_VECTOR_OPERATOR(OP, ENABLE_CONDITION) NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (component_t val)\ - {\ - this_t output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) OP val);\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ - {\ - this_t output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) OP other.getComponent(i));\ - return output;\ - } - - NBL_EMULATED_VECTOR_OPERATOR(&, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(|, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(^, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(+, true) - NBL_EMULATED_VECTOR_OPERATOR(-, true) - NBL_EMULATED_VECTOR_OPERATOR(*, true) - NBL_EMULATED_VECTOR_OPERATOR(/, true) - - #undef NBL_EMULATED_VECTOR_OPERATOR - - #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ - {\ - vector output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ - return output;\ - } - - NBL_EMULATED_VECTOR_COMPARISON(==) - NBL_EMULATED_VECTOR_COMPARISON(!=) - NBL_EMULATED_VECTOR_COMPARISON(<) - NBL_EMULATED_VECTOR_COMPARISON(<=) - NBL_EMULATED_VECTOR_COMPARISON(>) - NBL_EMULATED_VECTOR_COMPARISON(>=) - - #undef NBL_EMULATED_VECTOR_COMPARISON + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) +}; - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() - { - ComponentType sum = ComponentType::create(0); - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +// Not fundamental, integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector && concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) +}; - return sum; - } +// Not fundamental, not integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) }; +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR + +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- #define DEFINE_OPERATORS_FOR_TYPE(...)\ -NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -302,7 +330,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -311,7 +339,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -321,15 +349,14 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- template -struct emulated_vector, CRTP, false> : CRTP +struct emulated_vector, CRTP> : CRTP { using component_t = emulated_float64_t; - using this_t = emulated_vector; + using this_t = emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) + NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other) { this_t output; @@ -340,7 +367,7 @@ struct emulated_vector, CRTP, fa } template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) + NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other) { this_t output; @@ -350,7 +377,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) + NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -359,7 +386,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) + NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -368,7 +395,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) + NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -391,7 +418,7 @@ struct emulated_vector, CRTP, fa DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() + NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC { component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) @@ -478,7 +505,7 @@ namespace impl template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -491,7 +518,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -505,7 +532,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -523,12 +550,13 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) { array_get getter; array_set setter; OutputVecType output; + [[unroll]] for (int i = 0; i < N; ++i) setter(output, i, _static_cast(getter(vec, i))); @@ -541,11 +569,12 @@ struct static_cast_helper, emulated_vecto {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##N ;\ - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ OutputVecType output;\ + [[unroll]]\ for (int i = 0; i < N; ++i)\ setter(output, i, _static_cast(getter(vec, i)));\ return output;\ @@ -558,16 +587,38 @@ NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) #undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST +#define NBL_EMULATED_VEC_PROMOTION(N) template\ +struct Promote, ComponentType>\ +{\ + using VecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\ + {\ + array_set setter;\ + VecType promoted;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(promoted, i, v);\ + return promoted;\ + }\ +}; + +NBL_EMULATED_VEC_PROMOTION(2) +NBL_EMULATED_VEC_PROMOTION(3) +NBL_EMULATED_VEC_PROMOTION(4) + +#undef NBL_EMULATED_VEC_PROMOTION + #define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ -struct static_cast_helper, emulated_vector_t##M , void>\ +struct Truncate, emulated_vector_t##M >\ {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##M ;\ - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ OutputVecType output;\ + [[unroll]]\ for (int i = 0; i < N; ++i)\ setter(output, i, getter(vec, i));\ return output;\ @@ -583,7 +634,7 @@ NBL_EMULATED_VEC_TRUNCATION(4, 4) #undef NBL_EMULATED_VEC_TRUNCATION -} +} //namespace impl } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 45198cbe7a..76b527f6bd 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -91,7 +91,6 @@ struct reference_wrapper : enable_if_t< #else // CPP - #define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; @@ -135,18 +134,69 @@ ALIAS_STD(divides,/) NBL_CONSTEXPR_STATIC_INLINE T identity = T(1); }; +#ifndef __HLSL_VERSION + +template +struct bit_not : std::bit_not +{ + using type_t = T; +}; + +#else + +template +struct bit_not +{ + using type_t = T; + + T operator()(NBL_CONST_REF_ARG(T) operand) + { + return ~operand; + } +}; + +// The default version above only works for fundamental scalars, vectors and matrices. This is because you can't call `~x` unless `x` is one of the former. +// Similarly, calling `x.operator~()` is not valid for the aforementioned, and only for types overriding this operator. So, we need a specialization. +template NBL_PARTIAL_REQ_TOP(!(concepts::Scalar || concepts::Vector || concepts::Matrix)) +struct bit_not || concepts::Vector || concepts::Matrix)) > +{ + using type_t = T; + + T operator()(NBL_CONST_REF_ARG(T) operand) + { + return operand.operator~(); + } +}; + +#endif -ALIAS_STD(equal_to,==) }; -ALIAS_STD(not_equal_to,!=) }; -ALIAS_STD(greater,>) }; -ALIAS_STD(less,<) }; -ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal, <= ) }; +ALIAS_STD(equal_to, ==) }; +ALIAS_STD(not_equal_to, !=) }; +ALIAS_STD(greater, >) }; +ALIAS_STD(less, <) }; +ALIAS_STD(greater_equal, >=) }; +ALIAS_STD(less_equal, <=) }; #undef ALIAS_STD -// The above comparison operators return bool on STD. Here's a specialization so that they return `vector` for vectorial types -#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +// The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector` for vectorial types + +// GLM doesn't have operators on vectors +#ifndef __HLSL_VERSION + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return glm::GLM_OP (lhs, rhs);\ + }\ +}; + +#else + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ struct NAME ) >\ {\ using type_t = T;\ @@ -156,16 +206,18 @@ struct NAME ) >\ }\ }; -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=) +#endif + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==, equal) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=, notEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >, greaterThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <, lessThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=, greaterThanEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual) #undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION -// ------------------------ Compound assignment operators ---------------------- +// ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS -------------------------------------------------------------------- #define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ using type_t = T; \ @@ -186,7 +238,7 @@ COMPOUND_ASSIGN(divides) #undef COMPOUND_ASSIGN -// ----------------- End of compound assignment ops ---------------- +// ---------------------------------------------------------------- MIN, MAX, TERNARY ------------------------------------------------------------------------- // Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs(condition, lhs, rhs); } }; +// ----------------------------------------------------------------- SHIFT OPERATORS -------------------------------------------------------------------- + template struct left_shift_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand << bits; } @@ -246,28 +300,28 @@ struct left_shift_operator) > using type_t = T; using scalar_t = scalar_type_t; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand << bits; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { return operand << bits; } }; -template NBL_PARTIAL_REQ_TOP(!concepts::Vector && concepts::IntegralLikeVectorial) -struct left_shift_operator && concepts::IntegralLikeVectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -278,11 +332,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -293,11 +347,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -308,11 +362,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -329,7 +383,7 @@ struct arithmetic_right_shift_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand >> bits; } @@ -341,28 +395,28 @@ struct arithmetic_right_shift_operator; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand >> bits; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { return operand >> bits; } }; -template NBL_PARTIAL_REQ_TOP(!concepts::Vector&& concepts::IntegralLikeVectorial) -struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -373,11 +427,11 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -388,11 +442,11 @@ struct arithmetic_right_shift_operator::Dimension>) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -403,11 +457,11 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -426,7 +480,7 @@ struct logical_right_shift_operator using type_t = T; using unsigned_type_t = make_unsigned_t; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { arithmetic_right_shift_operator arithmeticRightShift; return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 8d9c78a9f0..e81ff08c7b 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -89,7 +89,7 @@ inline int extractExponent(T x) } template -NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) +NBL_CONSTEXPR_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { using AsFloat = typename float_of_size::type; return impl::castBackToFloatType(glsl::bitfieldInsert(ieee754::impl::bitCastToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); @@ -97,20 +97,20 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer // performs no overflow tests, returns x*exp2(n) template -NBL_CONSTEXPR_INLINE_FUNC T fastMulExp2(T x, int n) +NBL_CONSTEXPR_FUNC T fastMulExp2(T x, int n) { return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::type>::mantissaMask; } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; using AsFloat = typename float_of_size::type; @@ -118,21 +118,21 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type ext } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSign(T x) { using AsFloat = typename float_of_size::type; return (ieee754::impl::bitCastToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { using AsFloat = typename float_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::signMask; } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) +NBL_CONSTEXPR_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) { using AsUint = typename unsigned_integer_of_size::type; @@ -143,7 +143,7 @@ NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true) +NBL_CONSTEXPR_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true) { using AsFloat = typename float_of_size::type; using AsUint = typename unsigned_integer_of_size::type; diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl index ad8a3f9228..69fba9795f 100644 --- a/include/nbl/builtin/hlsl/ieee754/impl.hlsl +++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl @@ -15,25 +15,25 @@ namespace ieee754 namespace impl { template -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) +NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) { using AsUint = unsigned_integer_of_size_t; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } template -NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) +NBL_CONSTEXPR_FUNC T castBackToFloatType(T x) { using AsFloat = typename float_of_size::type; return bit_cast(x); } -template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } } } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d2fca1165f..650d9ce6ba 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,7 +8,6 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" #include "nbl/builtin/hlsl/portable/vector_t.hlsl" -#include "nbl/builtin/hlsl/mpl.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -33,8 +32,30 @@ NBL_CONSTEXPR uint16_t CodingStages = 5; template struct coding_mask; -template -NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; +template +NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); + +template +NBL_CONSTEXPR portable_vector_t InterleaveMasks = _static_cast >( + truncate >( + vector(coding_mask_v, + coding_mask_v << 1, + coding_mask_v << 2, + coding_mask_v << 3))); + +template +struct sign_mask : integral_constant {}; + +template +NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); + +template +NBL_CONSTEXPR portable_vector_t SignMasks = _static_cast >( + truncate >( + vector(sign_mask_v, + sign_mask_v << 1, + sign_mask_v << 2, + sign_mask_v << 3))); // 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage // mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). @@ -57,23 +78,23 @@ NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ }; -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555) // Groups bits by 1 on, 1 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555ull) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, 0x3333333333333333ull) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, 0x0F0F0F0F0F0F0F0Full) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, 0x00FF00FF00FF00FFull) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, 0x0000FFFF0000FFFFull) // Groups bits by 16 on, 16 off -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249) // Groups bits by 1 on, 2 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, uint64_t(0x30C30C30C30C30C3)) // Groups bits by 2 on, 4 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, uint64_t(0xF00F00F00F00F00F)) // Groups bits by 4 on, 8 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, uint64_t(0x00FF0000FF0000FF)) // Groups bits by 8 on, 16 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, uint64_t(0xFFFF00000000FFFF)) // Groups bits by 16 on, 32 off +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249ull) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, 0x30C30C30C30C30C3ull) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, 0xF00F00F00F00F00Full) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, 0x00FF0000FF0000FFull) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, 0xFFFF00000000FFFFull) // Groups bits by 16 on, 32 off -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111) // Groups bits by 1 on, 3 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111ull) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, 0x0303030303030303ull) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, 0x000F000F000F000Full) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, 0x000000FF000000FFull) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, 0x000000000000FFFFull) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS @@ -81,10 +102,9 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS #undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK #undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK -// ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- - -template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) -struct MortonEncoder +// ----------------------------------------------------------------- MORTON TRANSCODER --------------------------------------------------- +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) +struct Transcoder { template 16), vector, vector > NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) @@ -93,22 +113,26 @@ struct MortonEncoder * * @param [in] decodedValue Cartesian coordinates to interleave and shift */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { - NBL_CONSTEXPR_STATIC encode_t EncodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; left_shift_operator > leftShift; - portable_vector_t interleaved = _static_cast >(decodedValue)& EncodeMasks[CodingStages]; + portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; - NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; - [[unroll]] - for (uint16_t i = Stages; i > 0; i--) - { - interleaved = interleaved | leftShift(interleaved, (uint32_t(1) << (i - 1)) * (Dim - 1)); - interleaved = interleaved & EncodeMasks[i - 1]; + #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\ + interleaved = interleaved & coding_mask_v;\ } + ENCODE_LOOP_ITERATION(4) + ENCODE_LOOP_ITERATION(3) + ENCODE_LOOP_ITERATION(2) + ENCODE_LOOP_ITERATION(1) + ENCODE_LOOP_ITERATION(0) + + #undef ENCODE_LOOP_ITERATION // After interleaving, shift each coordinate left by their index - return leftShift(interleaved, _static_cast >(vector(0, 1, 2, 3))); + return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); } template 16), vector, vector > @@ -118,52 +142,58 @@ struct MortonEncoder * * @param [in] decodedValue Cartesian coordinates to encode */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { - portable_vector_t interleaveShifted = interleaveShift(decodedValue); + const portable_vector_t interleaveShifted = interleaveShift(decodedValue); - encode_t encoded = _static_cast(uint64_t(0)); array_get, encode_t> getter; + encode_t encoded = getter(interleaveShifted, 0); + [[unroll]] - for (uint16_t i = 0; i < Dim; i++) + for (uint16_t i = 1; i < Dim; i++) encoded = encoded | getter(interleaveShifted, i); return encoded; } -}; - -// ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) -struct MortonDecoder -{ template 16), vector, vector > NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + /** + * @brief Decodes a Morton code back to a vector of cartesian coordinates + * + * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below) + */ + NBL_CONSTEXPR_STATIC_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; - arithmetic_right_shift_operator > rightShift; + arithmetic_right_shift_operator encodedRightShift; portable_vector_t decoded; array_set, encode_t> setter; // Write initial values into decoded [[unroll]] for (uint16_t i = 0; i < Dim; i++) - setter(decoded, i, encodedValue); - decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); + setter(decoded, i, encodedRightShift(encodedValue, i)); - NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; - [[unroll]] - for (uint16_t i = 0; i < Stages; i++) - { - decoded = decoded & DecodeMasks[i]; - decoded = decoded | rightShift(decoded, (uint32_t(1) << i) * (Dim - 1)); + arithmetic_right_shift_operator > rightShift; + + #define DECODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + decoded = decoded & coding_mask_v;\ + decoded = decoded | rightShift(decoded, (uint16_t(1) << I) * (Dim - 1));\ } + DECODE_LOOP_ITERATION(0) + DECODE_LOOP_ITERATION(1) + DECODE_LOOP_ITERATION(2) + DECODE_LOOP_ITERATION(3) + DECODE_LOOP_ITERATION(4) + + #undef DECODE_LOOP_ITERATION + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) return _static_cast(decoded); else - return _static_cast(decoded & DecodeMasks[CodingStages]); + return _static_cast(decoded & coding_mask_v); } }; @@ -172,7 +202,7 @@ struct MortonDecoder // `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted template -NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::round_up_to_pot_v)); +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v, uint64_t(16)>)); template struct Equals; @@ -181,13 +211,14 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR portable_vector_t zeros = _static_cast >(_static_cast >(vector(0,0,0,0))); + const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); - portable_vector_t rhsCasted = _static_cast >(rhs); - portable_vector_t xored = rhsCasted ^ value; - return xored == zeros; + const portable_vector_t rhsCasted = _static_cast >(rhs); + const portable_vector_t xored = rhsCasted ^ value; + equal_to > equal; + return equal(xored, zeros); } }; @@ -195,10 +226,11 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - const portable_vector_t interleaved = MortonEncoder::interleaveShift(rhs); - return Equals::__call(value, interleaved); + using right_sign_t = conditional_t, make_unsigned_t >; + const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + return Equals::template __call(value, interleaved); } }; @@ -213,17 +245,28 @@ template { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC portable_vector_t InterleaveMasks = _static_cast >(_static_cast >(vector(coding_mask_v, coding_mask_v << 1, coding_mask_v << 2, coding_mask_v << 3))); - NBL_CONSTEXPR_STATIC portable_vector_t SignMasks = _static_cast >(_static_cast >(vector(SignMask, SignMask << 1, SignMask << 2, SignMask << 3))); ComparisonOp comparison; - // Obtain a vector of deinterleaved coordinates and flip their sign bits - const portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; - // rhs already deinterleaved, just have to cast type and flip sign - const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + NBL_IF_CONSTEXPR(Signed) + { + // Obtain a vector of deinterleaved coordinates and flip their sign bits + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); + } + else + { + // Obtain a vector of deinterleaved coordinates + portable_vector_t thisCoord = InterleaveMasks & value; + // rhs already deinterleaved, just have to cast type + const portable_vector_t rhsCoord = _static_cast >(rhs); - return comparison(thisCoord, rhsCoord); + return comparison(thisCoord, rhsCoord); + } + } }; @@ -231,11 +274,11 @@ template { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - const vector interleaved = MortonEncoder::interleaveShift(rhs); - BaseComparison baseComparison; - return baseComparison(value, interleaved); + using right_sign_t = conditional_t, make_unsigned_t >; + const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + return BaseComparison::template __call(value, interleaved); } }; @@ -283,7 +326,7 @@ struct code create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - retVal.value = impl::MortonEncoder::encode(cartesian); + retVal.value = impl::Transcoder::encode(cartesian); return retVal; } @@ -296,7 +339,7 @@ struct code * @param [in] cartesian Coordinates to encode */ template= Bits) - explicit code(NBL_CONST_REF_ARG(vector) cartesian) + inline explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); } @@ -304,35 +347,35 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template= Bits) - constexpr inline explicit operator vector() const noexcept; + template= Bits && is_signed_v == Signed) + constexpr explicit operator vector() const noexcept; #endif // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value & rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value | rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value ^ rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = ~value; @@ -342,15 +385,15 @@ struct code // Only valid in CPP #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint16_t bits) const; + constexpr this_t operator<<(uint16_t bits) const; - constexpr inline this_t operator>>(uint16_t bits) const; + constexpr this_t operator>>(uint16_t bits) const; #endif // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC { this_t zero; zero.value = _static_cast(0); @@ -363,107 +406,135 @@ struct code // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; + bit_not > bitnot; + // For each coordinate, leave its bits intact and turn every other bit ON + const portable_vector_t counterMaskedValue = bitnot(impl::InterleaveMasks) | value; + // For each coordinate in rhs, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & impl::InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; this_t retVal; - retVal.value = _static_cast(uint64_t(0)); + retVal.value = getter(interleaveShiftedResult, 0); [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - // put 1 bits everywhere in the bits the current axis is not using - // then extract just the axis bits for the right hand coordinate - // carry-1 will propagate the bits across the already set bits - // then clear out the bits not belonging to current axis - // Note: Its possible to clear on `this` and fill on `rhs` but that will - // disable optimizations, we expect the compiler to optimize a lot if the - // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` - retVal.value |= ((value | (~leftShift(Mask, i))) + (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); - } + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; + // For each coordinate, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedValue = impl::InterleaveMasks & value; + // Do the same for each coordinate in rhs + const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & impl::InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; this_t retVal; - retVal.value = _static_cast(uint64_t(0)); + retVal.value = getter(interleaveShiftedResult, 0); [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate - retVal.value |= ((value & leftShift(Mask, i)) - (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); - } + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value == rhs.value; } template) - NBL_CONSTEXPR_INLINE_FUNC vector equals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector equal(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::Equals::__call(value, rhs); + return impl::Equals::template __call(value, rhs); } - NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value != rhs.value; } template) - NBL_CONSTEXPR_INLINE_FUNC vector notEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector notEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !equals(rhs); + return !equal(rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector less(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessThan::__call(value, rhs); + return impl::LessThan::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector lessEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessEquals::__call(value, rhs); + return impl::LessEquals::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector greater(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterThan::__call(value, rhs); + return impl::GreaterThan::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector greaterEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterEquals::__call(value, rhs); + return impl::GreaterEquals::template __call(value, rhs); } }; } //namespace morton +// Specialize the `static_cast_helper` +namespace impl +{ + +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > +{ + NBL_CONSTEXPR_STATIC_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) + { + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + return morton::impl::Transcoder::decode(val.value); + } +}; + +} // namespace impl + template struct left_shift_operator > { using type_t = morton::code; using storage_t = typename type_t::storage_t; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { left_shift_operator valueLeftShift; type_t retVal; @@ -479,7 +550,7 @@ struct arithmetic_right_shift_operator > using type_t = morton::code; using storage_t = typename type_t::storage_t; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { arithmetic_right_shift_operator valueArithmeticRightShift; type_t retVal; @@ -496,10 +567,10 @@ struct arithmetic_right_shift_operator > using type_t = morton::code; using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { vector cartesian = _static_cast >(operand); - cartesian >> scalar_t(bits); + cartesian >>= scalar_t(bits); return type_t::create(cartesian); } }; @@ -507,45 +578,24 @@ struct arithmetic_right_shift_operator > #ifndef __HLSL_VERSION template&& D* Bits <= 64) -constexpr inline morton::code morton::code::operator<<(uint16_t bits) const +constexpr morton::code morton::code::operator<<(uint16_t bits) const { left_shift_operator> leftShift; return leftShift(*this, bits); } template&& D* Bits <= 64) -constexpr inline morton::code morton::code::operator>>(uint16_t bits) const +constexpr morton::code morton::code::operator>>(uint16_t bits) const { arithmetic_right_shift_operator> rightShift; return rightShift(*this, bits); } -#endif - -// Specialize the `static_cast_helper` -namespace impl -{ - -// I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) - { - using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; - return morton::impl::MortonDecoder::decode(val.value); - } -}; - -} // namespace impl - -#ifndef __HLSL_VERSION - template && D* Bits <= 64) -template = Bits) -constexpr inline morton::code::operator vector() const noexcept +template = Bits && is_signed_v == Signed) +constexpr morton::code::operator vector() const noexcept { - return _static_cast, morton::code, Bits, D>>(*this); + return _static_cast, morton::code>(*this); } #endif diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index 8add7a9ed3..901a8e419a 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -114,7 +114,12 @@ NBL_CONSTEXPR_STATIC_INLINE bool is_bda_pointer_v = is_bda_pointer::value; //! General Operations - + +//! Miscellaneous Instructions +template +[[vk::ext_instruction(spv::OpUndef)]] +T undef(); + // template [[vk::ext_instruction(spv::OpAccessChain)]] diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index bc160de788..a6b3db6708 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -684,6 +684,8 @@ template NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; template NBL_CONSTEXPR uint64_t extent_v = extent::value; +template +NBL_CONSTEXPR bool is_fundamental_v = is_fundamental::value; // Overlapping definitions diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index f03d8ae22c..c57eec4e61 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -248,9 +248,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/basic.h") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/intrinsics.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/truncate.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/vector_impl.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") From 2d0ffbadf914f84e4f7d5bfc8fec3b860121f655 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Apr 2025 15:16:08 -0300 Subject: [PATCH 027/472] Fix the last of the operators --- include/nbl/builtin/hlsl/morton.hlsl | 70 +++++++++++++++++----------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 650d9ce6ba..d570e249c8 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -35,13 +35,16 @@ struct coding_mask; template NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); -template -NBL_CONSTEXPR portable_vector_t InterleaveMasks = _static_cast >( - truncate >( - vector(coding_mask_v, - coding_mask_v << 1, - coding_mask_v << 2, - coding_mask_v << 3))); +// It's a complete cointoss whether template variables work or not, since it's a C++14 feature (not supported in HLSL2021). Most of the ones we use in Nabla work, +// but this one will only work for some parameters and not for others. Therefore, this was made into a macro to inline where used + +#define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast >(\ + truncate >(\ + vector(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 1,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 2,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 3))) + template struct sign_mask : integral_constant {}; @@ -49,13 +52,12 @@ struct sign_mask : integral_constant NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); -template -NBL_CONSTEXPR portable_vector_t SignMasks = _static_cast >( - truncate >( - vector(sign_mask_v, - sign_mask_v << 1, - sign_mask_v << 2, - sign_mask_v << 3))); +#define NBL_MORTON_SIGN_MASKS(STORAGE_T, DIM, BITS) _static_cast >(\ + truncate >(\ + vector(sign_mask_v< DIM, BITS >,\ + sign_mask_v< DIM, BITS > << 1,\ + sign_mask_v< DIM, BITS > << 2,\ + sign_mask_v< DIM, BITS > << 3))) // 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage // mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). @@ -213,10 +215,11 @@ struct Equals template) NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); const portable_vector_t rhsCasted = _static_cast >(rhs); - const portable_vector_t xored = rhsCasted ^ value; + const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); equal_to > equal; return equal(xored, zeros); } @@ -247,20 +250,22 @@ struct BaseComparison template) NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); + const portable_vector_t SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits); ComparisonOp comparison; NBL_IF_CONSTEXPR(Signed) { // Obtain a vector of deinterleaved coordinates and flip their sign bits - portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; // rhs already deinterleaved, just have to cast type and flip sign - const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; return comparison(thisCoord, rhsCoord); } else { // Obtain a vector of deinterleaved coordinates - portable_vector_t thisCoord = InterleaveMasks & value; + portable_vector_t thisCoord = InterleaveMasks & value; // rhs already deinterleaved, just have to cast type const portable_vector_t rhsCoord = _static_cast >(rhs); @@ -415,13 +420,14 @@ struct code // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); bit_not > bitnot; // For each coordinate, leave its bits intact and turn every other bit ON - const portable_vector_t counterMaskedValue = bitnot(impl::InterleaveMasks) | value; + const portable_vector_t counterMaskedValue = bitnot(InterleaveMasks) | value; // For each coordinate in rhs, leave its bits intact and turn every other bit OFF - const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF - const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & impl::InterleaveMasks; + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & InterleaveMasks; // Re-encode the result array_get, storage_t> getter; this_t retVal; @@ -429,19 +435,19 @@ struct code [[unroll]] for (uint16_t i = 1; i < D; i++) retVal.value = retVal.value | getter(interleaveShiftedResult, i); - return retVal; } // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); // For each coordinate, leave its bits intact and turn every other bit OFF - const portable_vector_t maskedValue = impl::InterleaveMasks & value; + const portable_vector_t maskedValue = InterleaveMasks & value; // Do the same for each coordinate in rhs - const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF - const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & impl::InterleaveMasks; + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & InterleaveMasks; // Re-encode the result array_get, storage_t> getter; this_t retVal; @@ -540,6 +546,10 @@ struct left_shift_operator > type_t retVal; // Shift every coordinate by `bits` retVal.value = valueLeftShift(operand.value, bits * D); + // Previous shift might move bits to positions that storage has available but the morton code does not use + // Un-decoding the resulting morton is still fine and produces expected results, but some operations such as equality expect these unused bits to be 0 so we mask them off + const uint64_t UsedBitsMask = Bits * D < 64 ? (uint64_t(1) << (Bits * D)) - 1 : ~uint64_t(0); + retVal.value = retVal.value & _static_cast(UsedBitsMask); return retVal; } }; @@ -570,7 +580,12 @@ struct arithmetic_right_shift_operator > NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { vector cartesian = _static_cast >(operand); - cartesian >>= scalar_t(bits); + // To avoid branching, we left-shift each coordinate to put the MSB (of the encoded Morton) at the position of the MSB (of the `scalar_t` used for the decoded coordinate), + // then right-shift again to get correct sign on each coordinate + // The number of bits we shift by to put MSB of Morton at MSB of `scalar_t` is the difference between the bitwidth of `scalar_t` and Bits + const scalar_t ShiftFactor = scalar_t(8 * sizeof(scalar_t) - Bits); + cartesian <<= ShiftFactor; + cartesian >>= ShiftFactor + scalar_t(bits); return type_t::create(cartesian); } }; @@ -600,6 +615,9 @@ constexpr morton::code::operator vector() cons #endif +#undef NBL_MORTON_INTERLEAVE_MASKS +#undef NBL_MORTON_SIGN_MASKS + } //namespace hlsl } //namespace nbl From 68edc322f2ba9c19ab0bd8068da2bae2390d7182 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Apr 2025 15:19:48 -0300 Subject: [PATCH 028/472] Change examples test submodule for master merge --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f2ea51d0b3..f4cc4cd22e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f2ea51d0b3e3388c0f9bae03602ec3b1f658c124 +Subproject commit f4cc4cd22ee4bd5506d794e63caafddf974ed7a4 From ca8612807ff4f025f9d923436c065699ec5a0002 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 1 Jul 2025 11:07:58 +0700 Subject: [PATCH 029/472] latest example --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 5929be13ea..ac569917eb 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5929be13ea1bfbb1d04bbe6a39321d519a3cbf92 +Subproject commit ac569917eb494b68e78133618bfc911bb04c3f47 From cd2ef9570b96b9184af85edb841a3d8ea5d2d068 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 4 Jul 2025 11:50:00 +0700 Subject: [PATCH 030/472] latest example --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 00185f2822..da63edf598 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 00185f28224460b5de9fe1dcf6a2fa0176ab489a +Subproject commit da63edf598390448a2cb5835b61ecb38ec8393c4 From 98ccfb233f6056d6cf47d286446565594493c075 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 8 Jul 2025 15:44:23 +0700 Subject: [PATCH 031/472] added debug draw aabb extension, moved from ex --- examples_tests | 2 +- include/nbl/ext/DebugDraw/CDrawAABB.h | 84 ++++ .../builtin/hlsl/aabb_instances.fragment.hlsl | 13 + .../builtin/hlsl/aabb_instances.vertex.hlsl | 30 ++ .../ext/DebugDraw/builtin/hlsl/common.hlsl | 45 +++ src/nbl/ext/CMakeLists.txt | 13 + src/nbl/ext/DebugDraw/CDrawAABB.cpp | 360 ++++++++++++++++++ src/nbl/ext/DebugDraw/CMakeLists.txt | 42 ++ 8 files changed, 588 insertions(+), 1 deletion(-) create mode 100644 include/nbl/ext/DebugDraw/CDrawAABB.h create mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl create mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl create mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl create mode 100644 src/nbl/ext/DebugDraw/CDrawAABB.cpp create mode 100644 src/nbl/ext/DebugDraw/CMakeLists.txt diff --git a/examples_tests b/examples_tests index 7469300793..738269ede1 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 7469300793cafeebba1ba3b5c77a66fdbdc7744e +Subproject commit 738269ede1b9ee83cd5e44f86e290852ce6b0127 diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h new file mode 100644 index 0000000000..6be529ecfa --- /dev/null +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -0,0 +1,84 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +// TODO move this into nabla + +#ifndef _NBL_EXT_DRAW_AABB_H_ +#define _NBL_EXT_DRAW_AABB_H_ + +#include "nbl/video/declarations.h" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" + +namespace nbl::ext::debugdraw +{ +class DrawAABB final : public core::IReferenceCounted +{ +public: + struct SCachedCreationParameters + { + using streaming_buffer_t = video::StreamingTransientDataBufferST>; + + static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + + core::smart_refctd_ptr utilities; + + //! optional, default MDI buffer allocated if not provided + core::smart_refctd_ptr streamingBuffer = nullptr; + }; + + struct SCreationParameters : SCachedCreationParameters + { + core::smart_refctd_ptr assetManager = nullptr; + + core::smart_refctd_ptr pipelineLayout; + core::smart_refctd_ptr renderpass = nullptr; + }; + + // creates an instance that can draw one AABB via push constant or multiple using streaming buffer + static core::smart_refctd_ptr create(SCreationParameters&& params); + + // creates default pipeline layout for push constant version + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); + + // creates default pipeline layout for streaming version + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); + + static core::smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); + + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); + + inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } + + // records draw command for single AABB, user has to set pipeline outside + bool renderSingle(video::IGPUCommandBuffer* commandBuffer); + + bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); + + static std::array getVerticesFromAABB(const core::aabbox3d& aabb); + + void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); + + void clearAABBs(); + +protected: + DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline); + ~DrawAABB() override; + +private: + static core::smart_refctd_ptr createPipeline(SCreationParameters& params); + static bool createStreamingBuffer(SCreationParameters& params); + + std::vector m_instances; + std::array m_unitAABBVertices; + + SCachedCreationParameters m_cachedCreationParams; + + core::smart_refctd_ptr m_pipeline; +}; +} + +#endif diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl new file mode 100644 index 0000000000..f17e028f91 --- /dev/null +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl @@ -0,0 +1,13 @@ +#pragma shader_stage(fragment) + +#include "common.hlsl" + +using namespace nbl::ext::debugdraw; + +[shader("pixel")] +float32_t4 main(PSInput input) : SV_TARGET +{ + float32_t4 outColor = input.color; + + return outColor; +} \ No newline at end of file diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl new file mode 100644 index 0000000000..8a54d40c5a --- /dev/null +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -0,0 +1,30 @@ +#pragma shader_stage(vertex) + +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#include "common.hlsl" + +using namespace nbl::hlsl; +using namespace nbl::ext::debugdraw; + +[[vk::push_constant]] SPushConstants pc; + +[shader("vertex")] +PSInput main() +{ + PSInput output; + + float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); + InstanceData instance = vk::RawBufferLoad(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()); + + float32_t4x4 transform; + transform[0] = instance.transform[0]; + transform[1] = instance.transform[1]; + transform[2] = instance.transform[2]; + transform[3] = float32_t4(0, 0, 0, 1); + float32_t4 position = mul(transform, float32_t4(vertex, 1)); + output.position = mul(pc.MVP, position); + output.color = instance.color; + + return output; +} \ No newline at end of file diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl new file mode 100644 index 0000000000..2bcd378e40 --- /dev/null +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -0,0 +1,45 @@ +#ifndef _DRAW_AABB_COMMON_HLSL +#define _DRAW_AABB_COMMON_HLSL + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace ext +{ +namespace debugdraw +{ + +struct InstanceData +{ +#ifdef __HLSL_VERSION + float32_t3x4 transform; +#else + float transform[3*4]; +#endif + nbl::hlsl::float32_t4 color; +}; + +struct SPushConstants +{ +#ifdef __HLSL_VERSION + float32_t4x4 MVP; +#else + float MVP[4*4]; +#endif + uint64_t pVertexBuffer; + uint64_t pInstanceBuffer; +}; + +#ifdef __HLSL_VERSION +struct PSInput +{ + float32_t4 position : SV_Position; + float32_t4 color : TEXCOORD0; +}; +#endif + +} +} +} +#endif diff --git a/src/nbl/ext/CMakeLists.txt b/src/nbl/ext/CMakeLists.txt index 27f9464f3e..da7835a4df 100644 --- a/src/nbl/ext/CMakeLists.txt +++ b/src/nbl/ext/CMakeLists.txt @@ -6,6 +6,7 @@ start_tracking_variables_for_propagation_to_parent() # TODO: all of those options bellow should be defined here option(NBL_BUILD_TEXT_RENDERING "Enable Nabla Text Rendering extension building and integration?" OFF) +option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension building and integration?" OFF) # TODO: also all variables bellow should be killed from build system since we have logical # targets which properties (like include search directories or outputs) can be queried @@ -54,4 +55,16 @@ if(NBL_BUILD_TEXT_RENDERING) add_subdirectory(TextRendering) endif() +if(NBL_BUILD_DEBUG_DRAW) + add_subdirectory(DebugDraw) + set(NBL_EXT_DEBUG_DRAW_INCLUDE_DIRS + ${NBL_EXT_DEBUG_DRAW_INCLUDE_DIRS} + PARENT_SCOPE + ) + set(NBL_EXT_DEBUG_DRAW_LIB + ${NBL_EXT_DEBUG_DRAW_LIB} + PARENT_SCOPE + ) +endif() + propagate_changed_variables_to_parent_scope() \ No newline at end of file diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp new file mode 100644 index 0000000000..3a17cf1b90 --- /dev/null +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -0,0 +1,360 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +// TODO move this into nabla + +#include "nbl/ext/DebugDraw/CDrawAABB.h" + +using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; +using namespace hlsl; + +namespace nbl::ext::debugdraw +{ + +core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) +{ + auto* const logger = params.utilities->getLogger(); + + auto pipeline = createPipeline(params); + if (!pipeline) + { + logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + if (!createStreamingBuffer(params)) + { + logger->log("Failed to create streaming buffer!", ILogger::ELL_ERROR); + return nullptr; + } + + return core::smart_refctd_ptr(new DrawAABB(std::move(params), pipeline)); +} + +DrawAABB::DrawAABB(SCreationParameters&& params, smart_refctd_ptr pipeline) + : m_cachedCreationParams(std::move(params)), m_pipeline(pipeline) +{ + const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); + m_unitAABBVertices = getVerticesFromAABB(unitAABB); +} + +DrawAABB::~DrawAABB() +{ +} + +// note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly +constexpr std::string_view NBL_ARCHIVE_ENTRY = _ARCHIVE_ENTRY_KEY_; + +const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const std::string_view archiveAlias) +{ + assert(system); + + if (!system) + return nullptr; + + // extension should mount everything for you, regardless if content goes from virtual filesystem + // or disk directly - and you should never rely on application framework to expose extension data + +#ifdef NBL_EMBED_BUILTIN_RESOURCES + auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); +#else + auto NBL_EXTENSION_MOUNT_DIRECTORY_ENTRY = (path(_ARCHIVE_ABSOLUTE_ENTRY_PATH_) / NBL_ARCHIVE_ENTRY).make_preferred(); + auto archive = make_smart_refctd_ptr(std::move(NBL_EXTENSION_MOUNT_DIRECTORY_ENTRY), smart_refctd_ptr(logger), system); + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); +#endif + + return smart_refctd_ptr(archive); +} + +smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params) +{ + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* set = params.assetManager->getCompilerSet(); + auto compiler = set->getShaderCompiler(IShader::E_CONTENT_TYPE::ECT_HLSL); + auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(system)); + auto includeLoader = includeFinder->getDefaultFileSystemLoader(); + includeFinder->addSearchPath(NBL_ARCHIVE_ENTRY.data(), includeLoader); + + auto compileShader = [&](const std::string& filePath, IShader::E_SHADER_STAGE stage) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = params.utilities->getLogger(); + lparams.workingDirectory = NBL_ARCHIVE_ENTRY.data(); + auto bundle = params.assetManager->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) + { + params.utilities->getLogger()->log("Shader %s not found!", ILogger::ELL_ERROR, filePath.c_str()); + exit(-1); + } + + const auto assets = bundle.getContents(); + assert(assets.size() == 1); + smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); + if (!shaderSrc) + return nullptr; + + CHLSLCompiler::SOptions options = {}; + options.stage = stage; + options.preprocessorOptions.sourceIdentifier = filePath; + options.preprocessorOptions.logger = params.utilities->getLogger(); + options.preprocessorOptions.includeFinder = includeFinder.get(); + shaderSrc = compiler->compileToSPIRV((const char*)shaderSrc->getContent()->getPointer(), options); + + return params.utilities->getLogicalDevice()->compileShader({ shaderSrc.get() }); + }; + + if (!system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); + + auto vertexShader = compileShader("aabb_instances.vertex.hlsl", IShader::E_SHADER_STAGE::ESS_VERTEX); + auto fragmentShader = compileShader("aabb_instances.fragment.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT); + + video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = params.pipelineLayout.get(); + pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; + pipelineParams[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; + pipelineParams[0].cached = { + .primitiveAssembly = { + .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, + } + }; + pipelineParams[0].renderpass = params.renderpass.get(); + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createGraphicsPipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + params.utilities->getLogger()->log("Could not create streaming pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +bool DrawAABB::createStreamingBuffer(SCreationParameters& params) +{ + const uint32_t minStreamingBufferAllocationSize = 128u, maxStreamingBufferAllocationAlignment = 4096u, mdiBufferDefaultSize = /* 2MB */ 1024u * 1024u * 2u; + + auto getRequiredAccessFlags = [&](const bitflag& properties) + { + bitflag flags(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS); + + if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT)) + flags |= IDeviceMemoryAllocation::EMCAF_READ; + if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT)) + flags |= IDeviceMemoryAllocation::EMCAF_WRITE; + + return flags; + }; + + if (!params.streamingBuffer) + { + IGPUBuffer::SCreationParams mdiCreationParams = {}; + mdiCreationParams.usage = SCachedCreationParameters::RequiredUsageFlags; + mdiCreationParams.size = mdiBufferDefaultSize; + + auto buffer = params.utilities->getLogicalDevice()->createBuffer(std::move(mdiCreationParams)); + buffer->setObjectDebugName("AABB Streaming Buffer"); + + auto memoryReqs = buffer->getMemoryReqs(); + memoryReqs.memoryTypeBits &= params.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); + + auto allocation = params.utilities->getLogicalDevice()->allocate(memoryReqs, buffer.get(), SCachedCreationParameters::RequiredAllocateFlags); + { + const bool allocated = allocation.isValid(); + assert(allocated); + } + auto memory = allocation.memory; + + if (!memory->map({ 0ull, memoryReqs.size }, getRequiredAccessFlags(memory->getMemoryPropertyFlags()))) + params.utilities->getLogger()->log("Could not map device memory!", ILogger::ELL_ERROR); + + params.streamingBuffer = make_smart_refctd_ptr(SBufferRange{0ull, mdiCreationParams.size, std::move(buffer)}, maxStreamingBufferAllocationAlignment, minStreamingBufferAllocationSize); + } + + auto buffer = params.streamingBuffer->getBuffer(); + auto binding = buffer->getBoundMemory(); + + const auto validation = std::to_array + ({ + std::make_pair(buffer->getCreationParams().usage.hasFlags(SCachedCreationParameters::RequiredUsageFlags), "Streaming buffer must be created with IBuffer::EUF_STORAGE_BUFFER_BIT | IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT enabled!"), + std::make_pair(bool(buffer->getMemoryReqs().memoryTypeBits & params.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits()), "Streaming buffer must have up-streaming memory type bits enabled!"), + std::make_pair(binding.memory->getAllocateFlags().hasFlags(SCachedCreationParameters::RequiredAllocateFlags), "Streaming buffer's memory must be allocated with IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT enabled!"), + std::make_pair(binding.memory->isCurrentlyMapped(), "Streaming buffer's memory must be mapped!"), // streaming buffer contructor already validates it, but cannot assume user won't unmap its own buffer for some reason (sorry if you have just hit it) + std::make_pair(binding.memory->getCurrentMappingAccess().hasFlags(getRequiredAccessFlags(binding.memory->getMemoryPropertyFlags())), "Streaming buffer's memory current mapping access flags don't meet requirements!") + }); + + for (const auto& [ok, error] : validation) + if (!ok) + { + params.utilities->getLogger()->log(error, ILogger::ELL_ERROR); + return false; + } + + return true; +} + +core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) +{ + return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); +} + +core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device) +{ + SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, + .offset = 0, + .size = sizeof(SPushConstants) + }; + return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); +} + +smart_refctd_ptr DrawAABB::createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment) +{ + smart_refctd_ptr pipeline; + + video::IGPUGraphicsPipeline::SCreationParams params[1] = {}; + params[0].layout = layout; + params[0].vertexShader = vertex; + params[0].fragmentShader = fragment; + params[0].cached = { + .primitiveAssembly = { + .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, + } + }; + params[0].renderpass = renderpass; + + device->createGraphicsPipelines(nullptr, params, &pipeline); + + return pipeline; +} + +bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer) +{ + commandBuffer->setLineWidth(1.f); + commandBuffer->draw(24, 1, 0, 0); + + return true; +} + +bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4) +{ + using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; + constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); + // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all + constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); + // allocator initialization needs us to round up to PoT + const auto MaxPOTAlignment = roundUpToPoT(MaxAlignment); + + auto* streaming = m_cachedCreationParams.streamingBuffer.get(); + + auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); + assert(streamingPtr); + + commandBuffer->bindGraphicsPipeline(m_pipeline.get()); // move outside of loop, only bind once + + auto instancesIt = m_instances.begin(); + const uint32_t verticesByteSize = sizeof(float32_t3) * m_unitAABBVertices.size(); + const uint32_t availableInstancesByteSize = streaming->getBuffer()->getSize() - verticesByteSize; + const uint32_t instancesPerIter = availableInstancesByteSize / sizeof(InstanceData); + using suballocator_t = core::LinearAddressAllocatorST; + while (instancesIt != m_instances.end()) + { + const uint32_t instanceCount = min(instancesPerIter, m_instances.size()); + offset_t inputOffset = 0u; + offset_t ImaginarySizeUpperBound = 0x1 << 30; + suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); + uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(verticesByteSize, sizeof(float32_t3)); + uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); + const uint32_t totalSize = imaginaryChunk.get_allocated_size(); + + inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; + std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); + streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); + + memcpy(streamingPtr + vertexByteOffset, m_unitAABBVertices.data(), sizeof(m_unitAABBVertices[0]) * m_unitAABBVertices.size()); + memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); + instancesIt += instanceCount; + + assert(!streaming->needsManualFlushOrInvalidate()); + + SPushConstants pc; + memcpy(pc.MVP, cameraMat3x4, sizeof(pc.MVP)); + pc.pVertexBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; + pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; + + commandBuffer->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + commandBuffer->draw(m_unitAABBVertices.size(), instanceCount, 0, 0); + + streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); + } + // end loop + + return true; +} + +std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) +{ + const auto& pMin = aabb.MinEdge; + const auto& pMax = aabb.MaxEdge; + + std::array vertices; + vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); + + vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); + + vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); + vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); + + vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); + vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); + vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); + vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); + + vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); + + vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); + vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); + vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); + + return vertices; +} + +void DrawAABB::addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color) +{ + InstanceData instance; + instance.color = color; + + core::matrix3x4SIMD instanceTransform; + instanceTransform.setTranslation(core::vectorSIMDf(aabb.MinEdge.X, aabb.MinEdge.Y, aabb.MinEdge.Z, 0)); + const auto diagonal = aabb.MaxEdge - aabb.MinEdge; + instanceTransform.setScale(core::vectorSIMDf(diagonal.X, diagonal.Y, diagonal.Z)); + memcpy(instance.transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); + + m_instances.push_back(instance); +} + +void DrawAABB::clearAABBs() +{ + m_instances.clear(); +} + +} diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt new file mode 100644 index 0000000000..3011fe5b4c --- /dev/null +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -0,0 +1,42 @@ +include(${NBL_ROOT_PATH}/cmake/common.cmake) + +set(NBL_EXT_INTERNAL_INCLUDE_DIR "${NBL_ROOT_PATH}/include") + +set(NBL_EXT_DEBUG_DRAW_H + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext/DebugDraw/CDrawAABB.h +) + +set(NBL_EXT_DEBUG_DRAW_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/CDrawAABB.cpp" +) + +nbl_create_ext_library_project( + DEBUG_DRAW + "${NBL_EXT_DEBUG_DRAW_H}" + "${NBL_EXT_DEBUG_DRAW_SRC}" + "${NBL_EXT_DEBUG_DRAW_EXTERNAL_INCLUDE}" + "" + "" +) + +# this should be standard for all extensions +set(_ARCHIVE_ENTRY_KEY_ "DebugDraw/builtin/hlsl") # then each one has unique archive key +get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext" ABSOLUTE) +get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) +get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + +target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ABSOLUTE_ENTRY_PATH_="${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}") +target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ENTRY_KEY_="${_ARCHIVE_ENTRY_KEY_}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ extDebugDrawbuiltinResourceData) + + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "common.hlsl") + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.vertex.hlsl") # (*) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.fragment.hlsl") # (*) + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}" "${_ARCHIVE_ENTRY_KEY_}" "nbl::ext::debugdraw::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + LINK_BUILTIN_RESOURCES_TO_TARGET(${LIB_NAME} ${_BR_TARGET_}) +endif() + +set(NBL_EXT_DEBUG_DRAW_TARGET ${LIB_NAME} CACHE INTERNAL "Nabla's Debug Draw logical target name") From a755514bf184f0ddb112637d3fad972d5be35f68 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 8 Jul 2025 15:49:14 +0700 Subject: [PATCH 032/472] removed todos --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 -- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 3 --- 2 files changed, 5 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 6be529ecfa..13ca3a1ece 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -2,8 +2,6 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -// TODO move this into nabla - #ifndef _NBL_EXT_DRAW_AABB_H_ #define _NBL_EXT_DRAW_AABB_H_ diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 3a17cf1b90..b77630a2c2 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -2,8 +2,6 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -// TODO move this into nabla - #include "nbl/ext/DebugDraw/CDrawAABB.h" using namespace nbl; @@ -294,7 +292,6 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); } - // end loop return true; } From 473592b00651360d8694921757bf5ecf1d9caa8e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 9 Jul 2025 15:19:50 +0700 Subject: [PATCH 033/472] support hlsl AABBs, also OBBs with transform --- CMakeLists.txt | 1 + examples_tests | 2 +- include/nbl/config/BuildConfigOptions.h.in | 2 ++ include/nbl/ext/DebugDraw/CDrawAABB.h | 4 ++++ src/nbl/ext/CMakeLists.txt | 1 - src/nbl/ext/DebugDraw/CDrawAABB.cpp | 23 +++++++++++++++++++--- 6 files changed, 28 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e29839399..ad8ceffba8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,6 +182,7 @@ option(NBL_FAST_MATH "Enable fast low-precision math" ON) option(NBL_BUILD_EXAMPLES "Enable building examples" ON) option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" OFF) # TODO: once it compies turn this ON by default! option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) +option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" OFF) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) if(NBL_COMPILE_WITH_CUDA) diff --git a/examples_tests b/examples_tests index 738269ede1..4f1fabdb78 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 738269ede1b9ee83cd5e44f86e290852ce6b0127 +Subproject commit 4f1fabdb786b87e4609649a36059c33cd54ae843 diff --git a/include/nbl/config/BuildConfigOptions.h.in b/include/nbl/config/BuildConfigOptions.h.in index 61b980f71e..578796d384 100644 --- a/include/nbl/config/BuildConfigOptions.h.in +++ b/include/nbl/config/BuildConfigOptions.h.in @@ -65,6 +65,8 @@ #cmakedefine _NBL_BUILD_DPL_ +#cmakedefine NBL_BUILD_DEBUG_DRAW + // TODO: This has to disapppear from the main header and go to the OptiX extension header + config #cmakedefine OPTIX_INCLUDE_DIR "@OPTIX_INCLUDE_DIR@" diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 13ca3a1ece..08b3cf5a3c 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -7,6 +7,7 @@ #include "nbl/video/declarations.h" #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/shapes/aabb.hlsl" #include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" namespace nbl::ext::debugdraw @@ -59,6 +60,9 @@ class DrawAABB final : public core::IReferenceCounted static std::array getVerticesFromAABB(const core::aabbox3d& aabb); void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); + void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); + + void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color = { 1,0,0,1 }); void clearAABBs(); diff --git a/src/nbl/ext/CMakeLists.txt b/src/nbl/ext/CMakeLists.txt index da7835a4df..6271e912b9 100644 --- a/src/nbl/ext/CMakeLists.txt +++ b/src/nbl/ext/CMakeLists.txt @@ -6,7 +6,6 @@ start_tracking_variables_for_propagation_to_parent() # TODO: all of those options bellow should be defined here option(NBL_BUILD_TEXT_RENDERING "Enable Nabla Text Rendering extension building and integration?" OFF) -option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension building and integration?" OFF) # TODO: also all variables bellow should be killed from build system since we have logical # targets which properties (like include search directories or outputs) can be queried diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index b77630a2c2..6cb2f365ec 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -256,6 +256,7 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa assert(streamingPtr); commandBuffer->bindGraphicsPipeline(m_pipeline.get()); // move outside of loop, only bind once + commandBuffer->setLineWidth(1.f); auto instancesIt = m_instances.begin(); const uint32_t verticesByteSize = sizeof(float32_t3) * m_unitAABBVertices.size(); @@ -336,14 +337,30 @@ std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color) +{ + addAABB(shapes::AABB<3, float>{{aabb.MinEdge.X, aabb.MinEdge.Y, aabb.MinEdge.Z}, { aabb.MaxEdge.X, aabb.MaxEdge.Y, aabb.MaxEdge.Z }}, color); +} + +void DrawAABB::addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color) +{ + const auto transform = hlsl::float32_t3x4(1); + addOBB(aabb, transform, color); +} + +void DrawAABB::addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color) { InstanceData instance; instance.color = color; core::matrix3x4SIMD instanceTransform; - instanceTransform.setTranslation(core::vectorSIMDf(aabb.MinEdge.X, aabb.MinEdge.Y, aabb.MinEdge.Z, 0)); - const auto diagonal = aabb.MaxEdge - aabb.MinEdge; - instanceTransform.setScale(core::vectorSIMDf(diagonal.X, diagonal.Y, diagonal.Z)); + instanceTransform.setTranslation(core::vectorSIMDf(aabb.minVx.x, aabb.minVx.y, aabb.minVx.z, 0)); + const auto diagonal = aabb.getExtent(); + instanceTransform.setScale(core::vectorSIMDf(diagonal.x, diagonal.y, diagonal.z)); + + core::matrix3x4SIMD worldTransform; + memcpy(worldTransform.pointer(), &transform, sizeof(transform)); + + instanceTransform = core::concatenateBFollowedByA(worldTransform, instanceTransform); memcpy(instance.transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); m_instances.push_back(instance); From daf34e0697a30cb970adeca4ef8dbddda1515f2c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 18 Aug 2025 16:47:58 +0700 Subject: [PATCH 034/472] minor syntax changes --- include/nbl/ext/DebugDraw/CDrawAABB.h | 92 +++++++++---------- .../builtin/hlsl/aabb_instances.fragment.hlsl | 2 +- .../builtin/hlsl/aabb_instances.vertex.hlsl | 7 +- .../ext/DebugDraw/builtin/hlsl/common.hlsl | 6 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 8 +- src/nbl/ext/DebugDraw/CMakeLists.txt | 2 +- 6 files changed, 62 insertions(+), 55 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 08b3cf5a3c..034a0321bf 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -10,76 +10,76 @@ #include "nbl/builtin/hlsl/shapes/aabb.hlsl" #include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" -namespace nbl::ext::debugdraw +namespace nbl::ext::debug_draw { class DrawAABB final : public core::IReferenceCounted { -public: - struct SCachedCreationParameters - { - using streaming_buffer_t = video::StreamingTransientDataBufferST>; + public: + struct SCachedCreationParameters + { + using streaming_buffer_t = video::StreamingTransientDataBufferST>; - static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - core::smart_refctd_ptr utilities; + core::smart_refctd_ptr utilities; - //! optional, default MDI buffer allocated if not provided - core::smart_refctd_ptr streamingBuffer = nullptr; - }; - - struct SCreationParameters : SCachedCreationParameters - { - core::smart_refctd_ptr assetManager = nullptr; + //! optional, default MDI buffer allocated if not provided + core::smart_refctd_ptr streamingBuffer = nullptr; + }; + + struct SCreationParameters : SCachedCreationParameters + { + core::smart_refctd_ptr assetManager = nullptr; - core::smart_refctd_ptr pipelineLayout; - core::smart_refctd_ptr renderpass = nullptr; - }; + core::smart_refctd_ptr pipelineLayout; + core::smart_refctd_ptr renderpass = nullptr; + }; - // creates an instance that can draw one AABB via push constant or multiple using streaming buffer - static core::smart_refctd_ptr create(SCreationParameters&& params); + // creates an instance that can draw one AABB via push constant or multiple using streaming buffer + static core::smart_refctd_ptr create(SCreationParameters&& params); - // creates default pipeline layout for push constant version - static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); + // creates default pipeline layout for push constant version + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); - // creates default pipeline layout for streaming version - static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); + // creates default pipeline layout for streaming version + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); - static core::smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); + static core::smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); - //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included - static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); - inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } + inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } - // records draw command for single AABB, user has to set pipeline outside - bool renderSingle(video::IGPUCommandBuffer* commandBuffer); + // records draw command for single AABB, user has to set pipeline outside + bool renderSingle(video::IGPUCommandBuffer* commandBuffer); - bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); + bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); - static std::array getVerticesFromAABB(const core::aabbox3d& aabb); + static std::array getVerticesFromAABB(const core::aabbox3d& aabb); - void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); - void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); + void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); + void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); - void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color = { 1,0,0,1 }); + void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color = { 1,0,0,1 }); - void clearAABBs(); + void clearAABBs(); -protected: - DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline); - ~DrawAABB() override; + protected: + DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline); + ~DrawAABB() override; -private: - static core::smart_refctd_ptr createPipeline(SCreationParameters& params); - static bool createStreamingBuffer(SCreationParameters& params); + private: + static core::smart_refctd_ptr createPipeline(SCreationParameters& params); + static bool createStreamingBuffer(SCreationParameters& params); - std::vector m_instances; - std::array m_unitAABBVertices; + std::vector m_instances; + std::array m_unitAABBVertices; - SCachedCreationParameters m_cachedCreationParams; + SCachedCreationParameters m_cachedCreationParams; - core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_pipeline; }; } diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl index f17e028f91..686e8934db 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl @@ -2,7 +2,7 @@ #include "common.hlsl" -using namespace nbl::ext::debugdraw; +using namespace nbl::ext::debug_draw; [shader("pixel")] float32_t4 main(PSInput input) : SV_TARGET diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl index 8a54d40c5a..ff993f8541 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -1,11 +1,12 @@ #pragma shader_stage(vertex) +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" #include "common.hlsl" using namespace nbl::hlsl; -using namespace nbl::ext::debugdraw; +using namespace nbl::ext::debug_draw; [[vk::push_constant]] SPushConstants pc; @@ -15,14 +16,14 @@ PSInput main() PSInput output; float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); - InstanceData instance = vk::RawBufferLoad(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()); + InstanceData instance = vk::BufferPointer(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); float32_t4x4 transform; transform[0] = instance.transform[0]; transform[1] = instance.transform[1]; transform[2] = instance.transform[2]; transform[3] = float32_t4(0, 0, 0, 1); - float32_t4 position = mul(transform, float32_t4(vertex, 1)); + float32_t4 position = math::linalg::promoted_mul(transform, vertex); output.position = mul(pc.MVP, position); output.color = instance.color; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index 2bcd378e40..ec05d5c73b 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -1,5 +1,5 @@ -#ifndef _DRAW_AABB_COMMON_HLSL -#define _DRAW_AABB_COMMON_HLSL +#ifndef _NBL_DEBUG_DRAW_EXT_COMMON_HLSL +#define _NBL_DEBUG_DRAW_EXT_COMMON_HLSL #include "nbl/builtin/hlsl/cpp_compat.hlsl" @@ -7,7 +7,7 @@ namespace nbl { namespace ext { -namespace debugdraw +namespace debug_draw { struct InstanceData diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 6cb2f365ec..2a6f6f67ea 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -11,7 +11,7 @@ using namespace system; using namespace asset; using namespace hlsl; -namespace nbl::ext::debugdraw +namespace nbl::ext::debug_draw { core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) @@ -113,6 +113,12 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet auto vertexShader = compileShader("aabb_instances.vertex.hlsl", IShader::E_SHADER_STAGE::ESS_VERTEX); auto fragmentShader = compileShader("aabb_instances.fragment.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT); + if (!vertexShader || !fragmentShader) + { + params.utilities->getLogger()->log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; pipelineParams[0].layout = params.pipelineLayout.get(); pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 3011fe5b4c..7e97cb74a4 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -35,7 +35,7 @@ if(NBL_EMBED_BUILTIN_RESOURCES) LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.vertex.hlsl") # (*) LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.fragment.hlsl") # (*) - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}" "${_ARCHIVE_ENTRY_KEY_}" "nbl::ext::debugdraw::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}" "${_ARCHIVE_ENTRY_KEY_}" "nbl::ext::debug_draw::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${LIB_NAME} ${_BR_TARGET_}) endif() From 33692fd374fa03fcee7a04b348e32ff419401bc1 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 19 Aug 2025 11:31:33 +0700 Subject: [PATCH 035/472] use hlsl cpp compat matrices, aabb --- examples_tests | 2 +- include/nbl/ext/DebugDraw/CDrawAABB.h | 7 +-- .../builtin/hlsl/aabb_instances.vertex.hlsl | 8 +--- .../ext/DebugDraw/builtin/hlsl/common.hlsl | 13 +----- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 45 ++++++++++--------- 5 files changed, 29 insertions(+), 46 deletions(-) diff --git a/examples_tests b/examples_tests index b31cfbae4f..8518c2b342 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b31cfbae4f6801c592593d39c3045dd62b6c83da +Subproject commit 8518c2b342217548d0c6797b26b5c5e20bc4df60 diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 034a0321bf..721f39b796 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -55,15 +55,12 @@ class DrawAABB final : public core::IReferenceCounted // records draw command for single AABB, user has to set pipeline outside bool renderSingle(video::IGPUCommandBuffer* commandBuffer); - bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); + bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat); static std::array getVerticesFromAABB(const core::aabbox3d& aabb); - void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); - - void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color = { 1,0,0,1 }); - + void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color = { 1,0,0,1 }); void clearAABBs(); protected: diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl index ff993f8541..929ff2e60d 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -18,13 +18,7 @@ PSInput main() float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); InstanceData instance = vk::BufferPointer(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); - float32_t4x4 transform; - transform[0] = instance.transform[0]; - transform[1] = instance.transform[1]; - transform[2] = instance.transform[2]; - transform[3] = float32_t4(0, 0, 0, 1); - float32_t4 position = math::linalg::promoted_mul(transform, vertex); - output.position = mul(pc.MVP, position); + output.position = math::linalg::promoted_mul(instance.transform, vertex); output.color = instance.color; return output; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index ec05d5c73b..4502b04aa0 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -12,21 +12,12 @@ namespace debug_draw struct InstanceData { -#ifdef __HLSL_VERSION - float32_t3x4 transform; -#else - float transform[3*4]; -#endif - nbl::hlsl::float32_t4 color; + hlsl::float32_t4x4 transform; + hlsl::float32_t4 color; }; struct SPushConstants { -#ifdef __HLSL_VERSION - float32_t4x4 MVP; -#else - float MVP[4*4]; -#endif uint64_t pVertexBuffer; uint64_t pInstanceBuffer; }; diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 2a6f6f67ea..799c9f2c9e 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -3,6 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/ext/DebugDraw/CDrawAABB.h" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" using namespace nbl; using namespace core; @@ -247,7 +248,7 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer) return true; } -bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4) +bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat) { using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); @@ -261,17 +262,23 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); assert(streamingPtr); - commandBuffer->bindGraphicsPipeline(m_pipeline.get()); // move outside of loop, only bind once + commandBuffer->bindGraphicsPipeline(m_pipeline.get()); commandBuffer->setLineWidth(1.f); - auto instancesIt = m_instances.begin(); + auto instances = m_instances; + for (auto& inst : instances) + { + inst.transform = hlsl::mul(cameraMat, inst.transform); + } + + auto instancesIt = instances.begin(); const uint32_t verticesByteSize = sizeof(float32_t3) * m_unitAABBVertices.size(); const uint32_t availableInstancesByteSize = streaming->getBuffer()->getSize() - verticesByteSize; const uint32_t instancesPerIter = availableInstancesByteSize / sizeof(InstanceData); using suballocator_t = core::LinearAddressAllocatorST; - while (instancesIt != m_instances.end()) + while (instancesIt != instances.end()) { - const uint32_t instanceCount = min(instancesPerIter, m_instances.size()); + const uint32_t instanceCount = min(instancesPerIter, instances.size()); offset_t inputOffset = 0u; offset_t ImaginarySizeUpperBound = 0x1 << 30; suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); @@ -290,7 +297,6 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa assert(!streaming->needsManualFlushOrInvalidate()); SPushConstants pc; - memcpy(pc.MVP, cameraMat3x4, sizeof(pc.MVP)); pc.pVertexBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; @@ -342,33 +348,28 @@ std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color) -{ - addAABB(shapes::AABB<3, float>{{aabb.MinEdge.X, aabb.MinEdge.Y, aabb.MinEdge.Z}, { aabb.MaxEdge.X, aabb.MaxEdge.Y, aabb.MaxEdge.Z }}, color); -} - void DrawAABB::addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color) { - const auto transform = hlsl::float32_t3x4(1); + const auto transform = hlsl::float32_t4x4(1); addOBB(aabb, transform, color); } -void DrawAABB::addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color) +void DrawAABB::addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color) { InstanceData instance; instance.color = color; - - core::matrix3x4SIMD instanceTransform; - instanceTransform.setTranslation(core::vectorSIMDf(aabb.minVx.x, aabb.minVx.y, aabb.minVx.z, 0)); const auto diagonal = aabb.getExtent(); - instanceTransform.setScale(core::vectorSIMDf(diagonal.x, diagonal.y, diagonal.z)); - - core::matrix3x4SIMD worldTransform; - memcpy(worldTransform.pointer(), &transform, sizeof(transform)); - instanceTransform = core::concatenateBFollowedByA(worldTransform, instanceTransform); - memcpy(instance.transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); + hlsl::float32_t4x4 instanceTransform; + instanceTransform[0][3] = aabb.minVx.x; + instanceTransform[1][3] = aabb.minVx.y; + instanceTransform[2][3] = aabb.minVx.z; + instanceTransform[3][3] = 1.f; + instanceTransform[0][0] = diagonal.x; + instanceTransform[1][1] = diagonal.y; + instanceTransform[2][2] = diagonal.z; + instance.transform = math::linalg::promoted_mul(transform, instanceTransform); m_instances.push_back(instance); } From 72e35698ec59a141aaca94085116868a01ad1c0e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 19 Aug 2025 15:30:43 +0700 Subject: [PATCH 036/472] change batch render to use indexed draw --- include/nbl/ext/DebugDraw/CDrawAABB.h | 11 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 152 ++++++++++++++++++-------- 2 files changed, 117 insertions(+), 46 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 721f39b796..78b32638e2 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -15,6 +15,8 @@ namespace nbl::ext::debug_draw class DrawAABB final : public core::IReferenceCounted { public: + static constexpr inline uint32_t IndicesCount = 24u; + struct SCachedCreationParameters { using streaming_buffer_t = video::StreamingTransientDataBufferST>; @@ -30,6 +32,7 @@ class DrawAABB final : public core::IReferenceCounted struct SCreationParameters : SCachedCreationParameters { + video::IQueue* transfer = nullptr; core::smart_refctd_ptr assetManager = nullptr; core::smart_refctd_ptr pipelineLayout; @@ -57,22 +60,24 @@ class DrawAABB final : public core::IReferenceCounted bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat); - static std::array getVerticesFromAABB(const core::aabbox3d& aabb); + //static std::array getVerticesFromAABB(const core::aabbox3d& aabb); void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color = { 1,0,0,1 }); void clearAABBs(); protected: - DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline); + DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline, core::smart_refctd_ptr indicesBuffer); ~DrawAABB() override; private: static core::smart_refctd_ptr createPipeline(SCreationParameters& params); static bool createStreamingBuffer(SCreationParameters& params); + static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); std::vector m_instances; - std::array m_unitAABBVertices; + std::array m_unitAABBVertices; + core::smart_refctd_ptr m_indicesBuffer; SCachedCreationParameters m_cachedCreationParams; diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 799c9f2c9e..676f8eafa2 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -32,14 +32,31 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) return nullptr; } - return core::smart_refctd_ptr(new DrawAABB(std::move(params), pipeline)); + auto indicesBuffer = createIndicesBuffer(params); + if (!indicesBuffer) + { + logger->log("Failed to create indices buffer!", ILogger::ELL_ERROR); + return nullptr; + } + + return core::smart_refctd_ptr(new DrawAABB(std::move(params), pipeline, indicesBuffer)); } -DrawAABB::DrawAABB(SCreationParameters&& params, smart_refctd_ptr pipeline) - : m_cachedCreationParams(std::move(params)), m_pipeline(pipeline) +DrawAABB::DrawAABB(SCreationParameters&& params, smart_refctd_ptr pipeline, smart_refctd_ptr indicesBuffer) + : m_cachedCreationParams(std::move(params)), m_pipeline(std::move(pipeline)), m_indicesBuffer(std::move(indicesBuffer)) { const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); - m_unitAABBVertices = getVerticesFromAABB(unitAABB); + float32_t3 pMin = { 0, 0, 0 }; + float32_t3 pMax = { 1, 1, 1 }; + + m_unitAABBVertices[0] = float32_t3(pMin.x, pMin.y, pMin.z); + m_unitAABBVertices[1] = float32_t3(pMax.x, pMin.y, pMin.z); + m_unitAABBVertices[2] = float32_t3(pMin.x, pMin.y, pMax.z); + m_unitAABBVertices[3] = float32_t3(pMax.x, pMin.y, pMax.z); + m_unitAABBVertices[4] = float32_t3(pMin.x, pMax.y, pMin.z); + m_unitAABBVertices[5] = float32_t3(pMax.x, pMax.y, pMin.z); + m_unitAABBVertices[6] = float32_t3(pMin.x, pMax.y, pMax.z); + m_unitAABBVertices[7] = float32_t3(pMax.x, pMax.y, pMax.z); } DrawAABB::~DrawAABB() @@ -205,6 +222,53 @@ bool DrawAABB::createStreamingBuffer(SCreationParameters& params) return true; } +smart_refctd_ptr DrawAABB::createIndicesBuffer(SCreationParameters& params) +{ + std::array unitAABBIndices; + unitAABBIndices[0] = 0; + unitAABBIndices[1] = 1; + unitAABBIndices[2] = 0; + unitAABBIndices[3] = 2; + + unitAABBIndices[4] = 3; + unitAABBIndices[5] = 1; + unitAABBIndices[6] = 3; + unitAABBIndices[7] = 2; + + unitAABBIndices[8] = 4; + unitAABBIndices[9] = 5; + unitAABBIndices[10] = 4; + unitAABBIndices[11] = 6; + + unitAABBIndices[12] = 7; + unitAABBIndices[13] = 5; + unitAABBIndices[14] = 7; + unitAABBIndices[15] = 6; + + unitAABBIndices[16] = 0; + unitAABBIndices[17] = 4; + unitAABBIndices[18] = 1; + unitAABBIndices[19] = 5; + + unitAABBIndices[20] = 2; + unitAABBIndices[21] = 6; + unitAABBIndices[22] = 3; + unitAABBIndices[23] = 7; + + IGPUBuffer::SCreationParams bufparams; + bufparams.size = sizeof(uint32_t) * unitAABBIndices.size(); + bufparams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + + smart_refctd_ptr indicesBuffer; + params.utilities->createFilledDeviceLocalBufferOnDedMem( + SIntendedSubmitInfo{ .queue = params.transfer }, + std::move(bufparams), + unitAABBIndices.data() + ).move_into(indicesBuffer); + + return indicesBuffer; +} + core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) { return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); @@ -264,6 +328,8 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa commandBuffer->bindGraphicsPipeline(m_pipeline.get()); commandBuffer->setLineWidth(1.f); + asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; + commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); auto instances = m_instances; for (auto& inst : instances) @@ -301,7 +367,7 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; commandBuffer->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); - commandBuffer->draw(m_unitAABBVertices.size(), instanceCount, 0, 0); + commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); } @@ -309,44 +375,44 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa return true; } -std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) -{ - const auto& pMin = aabb.MinEdge; - const auto& pMax = aabb.MaxEdge; - - std::array vertices; - vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); - - vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); - - vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); - vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); - - vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); - vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); - vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); - vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); - - vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); - - vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); - vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); - vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); - - return vertices; -} +//std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) +//{ +// const auto& pMin = aabb.MinEdge; +// const auto& pMax = aabb.MaxEdge; +// +// std::array vertices; +// vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 +// vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 +// vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 +// vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 +// +// vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 +// vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 +// vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 +// vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 +// +// vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 +// vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 +// vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 +// vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 +// +// vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 +// vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 +// vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 +// vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 +// +// vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 +// vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 +// vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 +// vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 +// +// vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 +// vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 +// vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 +// vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 +// +// return vertices; +//} void DrawAABB::addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color) { From 5285e78b92a907c39f067696564110757e904a67 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 10:22:49 +0700 Subject: [PATCH 037/472] simplified single AABB draw --- include/nbl/ext/DebugDraw/CDrawAABB.h | 25 +-- .../ext/DebugDraw/builtin/hlsl/common.hlsl | 6 + .../DebugDraw/builtin/hlsl/single.vertex.hlsl | 23 +++ src/nbl/ext/DebugDraw/CDrawAABB.cpp | 184 ++++++++---------- src/nbl/ext/DebugDraw/CMakeLists.txt | 1 + 5 files changed, 129 insertions(+), 110 deletions(-) create mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 78b32638e2..dad33bea27 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -16,6 +16,7 @@ class DrawAABB final : public core::IReferenceCounted { public: static constexpr inline uint32_t IndicesCount = 24u; + static constexpr inline uint32_t VerticesCount = 8u; struct SCachedCreationParameters { @@ -35,53 +36,55 @@ class DrawAABB final : public core::IReferenceCounted video::IQueue* transfer = nullptr; core::smart_refctd_ptr assetManager = nullptr; - core::smart_refctd_ptr pipelineLayout; + core::smart_refctd_ptr singlePipelineLayout; + core::smart_refctd_ptr batchPipelineLayout; core::smart_refctd_ptr renderpass = nullptr; }; // creates an instance that can draw one AABB via push constant or multiple using streaming buffer static core::smart_refctd_ptr create(SCreationParameters&& params); - // creates default pipeline layout for push constant version - static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); + // creates pipeline layout from push constant range + static core::smart_refctd_ptr createPipelineLayoutFromPCRange(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); // creates default pipeline layout for streaming version static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); - static core::smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); - //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } // records draw command for single AABB, user has to set pipeline outside - bool renderSingle(video::IGPUCommandBuffer* commandBuffer); + bool renderSingle(video::IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat); bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat); - //static std::array getVerticesFromAABB(const core::aabbox3d& aabb); + static hlsl::float32_t4x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color = { 1,0,0,1 }); void clearAABBs(); protected: - DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline, core::smart_refctd_ptr indicesBuffer); + DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, + core::smart_refctd_ptr indicesBuffer, core::smart_refctd_ptr verticesBuffer); ~DrawAABB() override; private: - static core::smart_refctd_ptr createPipeline(SCreationParameters& params); + static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); + static core::smart_refctd_ptr createVerticesBuffer(SCreationParameters& params); std::vector m_instances; - std::array m_unitAABBVertices; core::smart_refctd_ptr m_indicesBuffer; + core::smart_refctd_ptr m_verticesBuffer; SCachedCreationParameters m_cachedCreationParams; - core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_singlePipeline; + core::smart_refctd_ptr m_batchPipeline; }; } diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index 4502b04aa0..03a3bbfa49 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -16,6 +16,12 @@ struct InstanceData hlsl::float32_t4 color; }; +struct SSinglePushConstants +{ + uint64_t pVertexBuffer; + InstanceData instance; +}; + struct SPushConstants { uint64_t pVertexBuffer; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl new file mode 100644 index 0000000000..e9b68a811c --- /dev/null +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl @@ -0,0 +1,23 @@ +#pragma shader_stage(vertex) + +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#include "common.hlsl" + +using namespace nbl::hlsl; +using namespace nbl::ext::debug_draw; + +[[vk::push_constant]] SSinglePushConstants pc; + +[shader("vertex")] +PSInput main() +{ + PSInput output; + float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); + + output.position = math::linalg::promoted_mul(pc.instance.transform, vertex); + output.color = pc.instance.color; + + return output; +} \ No newline at end of file diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 676f8eafa2..53a6e3cebb 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -19,8 +19,14 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) { auto* const logger = params.utilities->getLogger(); - auto pipeline = createPipeline(params); - if (!pipeline) + auto singlePipeline = createPipeline(params, params.singlePipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!singlePipeline) + { + logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + auto batchPipeline = createPipeline(params, params.batchPipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!batchPipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); return nullptr; @@ -38,25 +44,21 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) logger->log("Failed to create indices buffer!", ILogger::ELL_ERROR); return nullptr; } + auto verticesBuffer = createVerticesBuffer(params); + if (!verticesBuffer) + { + logger->log("Failed to create vertices buffer!", ILogger::ELL_ERROR); + return nullptr; + } - return core::smart_refctd_ptr(new DrawAABB(std::move(params), pipeline, indicesBuffer)); + return core::smart_refctd_ptr(new DrawAABB(std::move(params), singlePipeline, batchPipeline, indicesBuffer, verticesBuffer)); } -DrawAABB::DrawAABB(SCreationParameters&& params, smart_refctd_ptr pipeline, smart_refctd_ptr indicesBuffer) - : m_cachedCreationParams(std::move(params)), m_pipeline(std::move(pipeline)), m_indicesBuffer(std::move(indicesBuffer)) +DrawAABB::DrawAABB(SCreationParameters&& params, core::smart_refctd_ptr singlePipeline, smart_refctd_ptr batchPipeline, + smart_refctd_ptr indicesBuffer, smart_refctd_ptr verticesBuffer) + : m_cachedCreationParams(std::move(params)), m_singlePipeline(std::move(singlePipeline)), m_batchPipeline(std::move(batchPipeline)), + m_indicesBuffer(std::move(indicesBuffer)), m_verticesBuffer(std::move(verticesBuffer)) { - const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); - float32_t3 pMin = { 0, 0, 0 }; - float32_t3 pMax = { 1, 1, 1 }; - - m_unitAABBVertices[0] = float32_t3(pMin.x, pMin.y, pMin.z); - m_unitAABBVertices[1] = float32_t3(pMax.x, pMin.y, pMin.z); - m_unitAABBVertices[2] = float32_t3(pMin.x, pMin.y, pMax.z); - m_unitAABBVertices[3] = float32_t3(pMax.x, pMin.y, pMax.z); - m_unitAABBVertices[4] = float32_t3(pMin.x, pMax.y, pMin.z); - m_unitAABBVertices[5] = float32_t3(pMax.x, pMax.y, pMin.z); - m_unitAABBVertices[6] = float32_t3(pMin.x, pMax.y, pMax.z); - m_unitAABBVertices[7] = float32_t3(pMax.x, pMax.y, pMax.z); } DrawAABB::~DrawAABB() @@ -88,7 +90,7 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l return smart_refctd_ptr(archive); } -smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params) +smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath) { auto system = smart_refctd_ptr(params.assetManager->getSystem()); auto* set = params.assetManager->getCompilerSet(); @@ -128,8 +130,8 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet if (!system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); - auto vertexShader = compileShader("aabb_instances.vertex.hlsl", IShader::E_SHADER_STAGE::ESS_VERTEX); - auto fragmentShader = compileShader("aabb_instances.fragment.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT); + auto vertexShader = compileShader(vsPath, IShader::E_SHADER_STAGE::ESS_VERTEX); + auto fragmentShader = compileShader(fsPath, IShader::E_SHADER_STAGE::ESS_FRAGMENT); if (!vertexShader || !fragmentShader) { @@ -138,7 +140,7 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet } video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; - pipelineParams[0].layout = params.pipelineLayout.get(); + pipelineParams[0].layout = pipelineLayout; pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; pipelineParams[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; pipelineParams[0].cached = { @@ -269,7 +271,37 @@ smart_refctd_ptr DrawAABB::createIndicesBuffer(SCreationParameters& return indicesBuffer; } -core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) +smart_refctd_ptr DrawAABB::createVerticesBuffer(SCreationParameters& params) +{ + const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); + float32_t3 pMin = { 0, 0, 0 }; + float32_t3 pMax = { 1, 1, 1 }; + + std::array unitAABBVertices; + unitAABBVertices[0] = float32_t3(pMin.x, pMin.y, pMin.z); + unitAABBVertices[1] = float32_t3(pMax.x, pMin.y, pMin.z); + unitAABBVertices[2] = float32_t3(pMin.x, pMin.y, pMax.z); + unitAABBVertices[3] = float32_t3(pMax.x, pMin.y, pMax.z); + unitAABBVertices[4] = float32_t3(pMin.x, pMax.y, pMin.z); + unitAABBVertices[5] = float32_t3(pMax.x, pMax.y, pMin.z); + unitAABBVertices[6] = float32_t3(pMin.x, pMax.y, pMax.z); + unitAABBVertices[7] = float32_t3(pMax.x, pMax.y, pMax.z); + + IGPUBuffer::SCreationParams bufparams; + bufparams.size = sizeof(float32_t3) * unitAABBVertices.size(); + bufparams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + + smart_refctd_ptr vertexBuffer; + params.utilities->createFilledDeviceLocalBufferOnDedMem( + SIntendedSubmitInfo{ .queue = params.transfer }, + std::move(bufparams), + unitAABBVertices.data() + ).move_into(vertexBuffer); + + return vertexBuffer; +} + +core::smart_refctd_ptr DrawAABB::createPipelineLayoutFromPCRange(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) { return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); } @@ -284,30 +316,22 @@ core::smart_refctd_ptr DrawAABB::createDefaultPipelin return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); } -smart_refctd_ptr DrawAABB::createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment) +bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat) { - smart_refctd_ptr pipeline; - - video::IGPUGraphicsPipeline::SCreationParams params[1] = {}; - params[0].layout = layout; - params[0].vertexShader = vertex; - params[0].fragmentShader = fragment; - params[0].cached = { - .primitiveAssembly = { - .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, - } - }; - params[0].renderpass = renderpass; - - device->createGraphicsPipelines(nullptr, params, &pipeline); + commandBuffer->bindGraphicsPipeline(m_singlePipeline.get()); + commandBuffer->setLineWidth(1.f); + asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; + commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); - return pipeline; -} + SSinglePushConstants pc; + pc.pVertexBuffer = m_verticesBuffer->getDeviceAddress(); -bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer) -{ - commandBuffer->setLineWidth(1.f); - commandBuffer->draw(24, 1, 0, 0); + hlsl::float32_t4x4 instanceTransform = getTransformFromAABB(aabb); + pc.instance.transform = hlsl::mul(cameraMat, instanceTransform); + pc.instance.color = color; + + commandBuffer->pushConstants(m_singlePipeline->getLayout(), ESS_VERTEX, 0, sizeof(SSinglePushConstants), &pc); + commandBuffer->drawIndexed(IndicesCount, 1, 0, 0, 0); return true; } @@ -326,7 +350,7 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); assert(streamingPtr); - commandBuffer->bindGraphicsPipeline(m_pipeline.get()); + commandBuffer->bindGraphicsPipeline(m_batchPipeline.get()); commandBuffer->setLineWidth(1.f); asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); @@ -338,9 +362,7 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa } auto instancesIt = instances.begin(); - const uint32_t verticesByteSize = sizeof(float32_t3) * m_unitAABBVertices.size(); - const uint32_t availableInstancesByteSize = streaming->getBuffer()->getSize() - verticesByteSize; - const uint32_t instancesPerIter = availableInstancesByteSize / sizeof(InstanceData); + const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); using suballocator_t = core::LinearAddressAllocatorST; while (instancesIt != instances.end()) { @@ -348,7 +370,6 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa offset_t inputOffset = 0u; offset_t ImaginarySizeUpperBound = 0x1 << 30; suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); - uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(verticesByteSize, sizeof(float32_t3)); uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); const uint32_t totalSize = imaginaryChunk.get_allocated_size(); @@ -356,17 +377,16 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); - memcpy(streamingPtr + vertexByteOffset, m_unitAABBVertices.data(), sizeof(m_unitAABBVertices[0]) * m_unitAABBVertices.size()); memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); instancesIt += instanceCount; assert(!streaming->needsManualFlushOrInvalidate()); SPushConstants pc; - pc.pVertexBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; + pc.pVertexBuffer = m_verticesBuffer->getDeviceAddress(); pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; - commandBuffer->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + commandBuffer->pushConstants(m_batchPipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); @@ -375,44 +395,19 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa return true; } -//std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) -//{ -// const auto& pMin = aabb.MinEdge; -// const auto& pMax = aabb.MaxEdge; -// -// std::array vertices; -// vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 -// vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 -// vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 -// vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 -// -// vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 -// vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 -// vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 -// vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 -// -// vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 -// vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 -// vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 -// vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 -// -// vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 -// vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 -// vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 -// vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 -// -// vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 -// vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 -// vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 -// vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 -// -// vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 -// vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 -// vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 -// vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 -// -// return vertices; -//} +hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) +{ + const auto diagonal = aabb.getExtent(); + hlsl::float32_t4x4 transform; + transform[0][3] = aabb.minVx.x; + transform[1][3] = aabb.minVx.y; + transform[2][3] = aabb.minVx.z; + transform[3][3] = 1.f; + transform[0][0] = diagonal.x; + transform[1][1] = diagonal.y; + transform[2][2] = diagonal.z; + return transform; +} void DrawAABB::addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color) { @@ -424,17 +419,8 @@ void DrawAABB::addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::floa { InstanceData instance; instance.color = color; - const auto diagonal = aabb.getExtent(); - - hlsl::float32_t4x4 instanceTransform; - instanceTransform[0][3] = aabb.minVx.x; - instanceTransform[1][3] = aabb.minVx.y; - instanceTransform[2][3] = aabb.minVx.z; - instanceTransform[3][3] = 1.f; - instanceTransform[0][0] = diagonal.x; - instanceTransform[1][1] = diagonal.y; - instanceTransform[2][2] = diagonal.z; + hlsl::float32_t4x4 instanceTransform = getTransformFromAABB(aabb); instance.transform = math::linalg::promoted_mul(transform, instanceTransform); m_instances.push_back(instance); } diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 7e97cb74a4..b62d06f518 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -32,6 +32,7 @@ if(NBL_EMBED_BUILTIN_RESOURCES) set(_BR_TARGET_ extDebugDrawbuiltinResourceData) LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "common.hlsl") + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "single.vertex.hlsl") # (*) LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.vertex.hlsl") # (*) LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.fragment.hlsl") # (*) From 328aa3429528c6fd841cf3a70359139e601eaa36 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 10:59:45 +0700 Subject: [PATCH 038/472] change batch render to take span of InstanceData --- include/nbl/ext/DebugDraw/CDrawAABB.h | 7 +------ src/nbl/ext/DebugDraw/CDrawAABB.cpp | 29 +++++---------------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index dad33bea27..089e885887 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -58,14 +58,10 @@ class DrawAABB final : public core::IReferenceCounted // records draw command for single AABB, user has to set pipeline outside bool renderSingle(video::IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat); - bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat); + bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat); static hlsl::float32_t4x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); - void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); - void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color = { 1,0,0,1 }); - void clearAABBs(); - protected: DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, core::smart_refctd_ptr indicesBuffer, core::smart_refctd_ptr verticesBuffer); @@ -77,7 +73,6 @@ class DrawAABB final : public core::IReferenceCounted static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); static core::smart_refctd_ptr createVerticesBuffer(SCreationParameters& params); - std::vector m_instances; core::smart_refctd_ptr m_indicesBuffer; core::smart_refctd_ptr m_verticesBuffer; diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 53a6e3cebb..3c40f9306e 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -336,7 +336,7 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes return true; } -bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat) +bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) { using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); @@ -355,9 +355,11 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); - auto instances = m_instances; - for (auto& inst : instances) + std::vector instances(aabbInstances.size()); + for (uint32_t i = 0; i < aabbInstances.size(); i++) { + auto& inst = instances[i]; + inst = aabbInstances[i]; inst.transform = hlsl::mul(cameraMat, inst.transform); } @@ -409,25 +411,4 @@ hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, fl return transform; } -void DrawAABB::addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color) -{ - const auto transform = hlsl::float32_t4x4(1); - addOBB(aabb, transform, color); -} - -void DrawAABB::addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color) -{ - InstanceData instance; - instance.color = color; - - hlsl::float32_t4x4 instanceTransform = getTransformFromAABB(aabb); - instance.transform = math::linalg::promoted_mul(transform, instanceTransform); - m_instances.push_back(instance); -} - -void DrawAABB::clearAABBs() -{ - m_instances.clear(); -} - } From a14c9dca918353cf713a500b95eec92b4bd728eb Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 11:44:35 +0700 Subject: [PATCH 039/472] latest example --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 8518c2b342..323c782226 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8518c2b342217548d0c6797b26b5c5e20bc4df60 +Subproject commit 323c782226a402e0e4d21e902029a0602f616cff From 9a35c9f6a30aa7ceb4ab7868acae4790a3833299 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 12:00:50 +0700 Subject: [PATCH 040/472] removed vertex buffer, use const vertex array in shader instead --- include/nbl/ext/DebugDraw/CDrawAABB.h | 4 +- .../builtin/hlsl/aabb_instances.vertex.hlsl | 14 +++++- .../ext/DebugDraw/builtin/hlsl/common.hlsl | 2 - .../DebugDraw/builtin/hlsl/single.vertex.hlsl | 13 +++++- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 45 ++----------------- 5 files changed, 28 insertions(+), 50 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 089e885887..1efe973d10 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -64,17 +64,15 @@ class DrawAABB final : public core::IReferenceCounted protected: DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, - core::smart_refctd_ptr indicesBuffer, core::smart_refctd_ptr verticesBuffer); + core::smart_refctd_ptr indicesBuffer); ~DrawAABB() override; private: static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); - static core::smart_refctd_ptr createVerticesBuffer(SCreationParameters& params); core::smart_refctd_ptr m_indicesBuffer; - core::smart_refctd_ptr m_verticesBuffer; SCachedCreationParameters m_cachedCreationParams; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl index 929ff2e60d..bb535a7216 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -13,9 +13,19 @@ using namespace nbl::ext::debug_draw; [shader("vertex")] PSInput main() { - PSInput output; + const float32_t3 unitAABBVertices[8] = { + float32_t3(0.0, 0.0, 0.0), + float32_t3(1.0, 0.0, 0.0), + float32_t3(0.0, 0.0, 1.0), + float32_t3(1.0, 0.0, 1.0), + float32_t3(0.0, 1.0, 0.0), + float32_t3(1.0, 1.0, 0.0), + float32_t3(0.0, 1.0, 1.0), + float32_t3(1.0, 1.0, 1.0) + }; - float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); + PSInput output; + float32_t3 vertex = unitAABBVertices[glsl::gl_VertexIndex()]; InstanceData instance = vk::BufferPointer(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); output.position = math::linalg::promoted_mul(instance.transform, vertex); diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index 03a3bbfa49..a178d45465 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -18,13 +18,11 @@ struct InstanceData struct SSinglePushConstants { - uint64_t pVertexBuffer; InstanceData instance; }; struct SPushConstants { - uint64_t pVertexBuffer; uint64_t pInstanceBuffer; }; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl index e9b68a811c..e1a426dec8 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl @@ -13,8 +13,19 @@ using namespace nbl::ext::debug_draw; [shader("vertex")] PSInput main() { + const float32_t3 unitAABBVertices[8] = { + float32_t3(0.0, 0.0, 0.0), + float32_t3(1.0, 0.0, 0.0), + float32_t3(0.0, 0.0, 1.0), + float32_t3(1.0, 0.0, 1.0), + float32_t3(0.0, 1.0, 0.0), + float32_t3(1.0, 1.0, 0.0), + float32_t3(0.0, 1.0, 1.0), + float32_t3(1.0, 1.0, 1.0) + }; + PSInput output; - float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); + float32_t3 vertex = unitAABBVertices[glsl::gl_VertexIndex()]; output.position = math::linalg::promoted_mul(pc.instance.transform, vertex); output.color = pc.instance.color; diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 3c40f9306e..a11052aa76 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -44,20 +44,13 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) logger->log("Failed to create indices buffer!", ILogger::ELL_ERROR); return nullptr; } - auto verticesBuffer = createVerticesBuffer(params); - if (!verticesBuffer) - { - logger->log("Failed to create vertices buffer!", ILogger::ELL_ERROR); - return nullptr; - } - return core::smart_refctd_ptr(new DrawAABB(std::move(params), singlePipeline, batchPipeline, indicesBuffer, verticesBuffer)); + return core::smart_refctd_ptr(new DrawAABB(std::move(params), singlePipeline, batchPipeline, indicesBuffer)); } -DrawAABB::DrawAABB(SCreationParameters&& params, core::smart_refctd_ptr singlePipeline, smart_refctd_ptr batchPipeline, - smart_refctd_ptr indicesBuffer, smart_refctd_ptr verticesBuffer) +DrawAABB::DrawAABB(SCreationParameters&& params, core::smart_refctd_ptr singlePipeline, smart_refctd_ptr batchPipeline, smart_refctd_ptr indicesBuffer) : m_cachedCreationParams(std::move(params)), m_singlePipeline(std::move(singlePipeline)), m_batchPipeline(std::move(batchPipeline)), - m_indicesBuffer(std::move(indicesBuffer)), m_verticesBuffer(std::move(verticesBuffer)) + m_indicesBuffer(std::move(indicesBuffer)) { } @@ -271,36 +264,6 @@ smart_refctd_ptr DrawAABB::createIndicesBuffer(SCreationParameters& return indicesBuffer; } -smart_refctd_ptr DrawAABB::createVerticesBuffer(SCreationParameters& params) -{ - const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); - float32_t3 pMin = { 0, 0, 0 }; - float32_t3 pMax = { 1, 1, 1 }; - - std::array unitAABBVertices; - unitAABBVertices[0] = float32_t3(pMin.x, pMin.y, pMin.z); - unitAABBVertices[1] = float32_t3(pMax.x, pMin.y, pMin.z); - unitAABBVertices[2] = float32_t3(pMin.x, pMin.y, pMax.z); - unitAABBVertices[3] = float32_t3(pMax.x, pMin.y, pMax.z); - unitAABBVertices[4] = float32_t3(pMin.x, pMax.y, pMin.z); - unitAABBVertices[5] = float32_t3(pMax.x, pMax.y, pMin.z); - unitAABBVertices[6] = float32_t3(pMin.x, pMax.y, pMax.z); - unitAABBVertices[7] = float32_t3(pMax.x, pMax.y, pMax.z); - - IGPUBuffer::SCreationParams bufparams; - bufparams.size = sizeof(float32_t3) * unitAABBVertices.size(); - bufparams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - - smart_refctd_ptr vertexBuffer; - params.utilities->createFilledDeviceLocalBufferOnDedMem( - SIntendedSubmitInfo{ .queue = params.transfer }, - std::move(bufparams), - unitAABBVertices.data() - ).move_into(vertexBuffer); - - return vertexBuffer; -} - core::smart_refctd_ptr DrawAABB::createPipelineLayoutFromPCRange(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) { return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); @@ -324,7 +287,6 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); SSinglePushConstants pc; - pc.pVertexBuffer = m_verticesBuffer->getDeviceAddress(); hlsl::float32_t4x4 instanceTransform = getTransformFromAABB(aabb); pc.instance.transform = hlsl::mul(cameraMat, instanceTransform); @@ -385,7 +347,6 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa assert(!streaming->needsManualFlushOrInvalidate()); SPushConstants pc; - pc.pVertexBuffer = m_verticesBuffer->getDeviceAddress(); pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; commandBuffer->pushConstants(m_batchPipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); From c6bd10b3a98bf0c29c0bb5ce76e32f88e738d31f Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 16:15:36 +0700 Subject: [PATCH 041/472] validate creation params, added draw modes at create time --- examples_tests | 2 +- include/nbl/ext/DebugDraw/CDrawAABB.h | 10 +++++ src/nbl/ext/DebugDraw/CDrawAABB.cpp | 64 ++++++++++++++++++++++++--- 3 files changed, 68 insertions(+), 8 deletions(-) diff --git a/examples_tests b/examples_tests index 323c782226..f75dc215f9 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 323c782226a402e0e4d21e902029a0602f616cff +Subproject commit f75dc215f94f7a30c5083433f78e8937e154da44 diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 1efe973d10..a8e3205f22 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -18,6 +18,13 @@ class DrawAABB final : public core::IReferenceCounted static constexpr inline uint32_t IndicesCount = 24u; static constexpr inline uint32_t VerticesCount = 8u; + enum DrawMode : uint16_t + { + ADM_DRAW_SINGLE = 0b01, + ADM_DRAW_BATCH = 0b10, + ADM_DRAW_BOTH = 0b11 + }; + struct SCachedCreationParameters { using streaming_buffer_t = video::StreamingTransientDataBufferST>; @@ -25,6 +32,8 @@ class DrawAABB final : public core::IReferenceCounted static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + DrawMode drawMode = ADM_DRAW_BOTH; + core::smart_refctd_ptr utilities; //! optional, default MDI buffer allocated if not provided @@ -68,6 +77,7 @@ class DrawAABB final : public core::IReferenceCounted ~DrawAABB() override; private: + static bool validateCreationParameters(SCreationParameters& params); static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index a11052aa76..f3f33d2733 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -19,17 +19,32 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) { auto* const logger = params.utilities->getLogger(); - auto singlePipeline = createPipeline(params, params.singlePipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); - if (!singlePipeline) + if (!validateCreationParameters(params)) { - logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); return nullptr; } - auto batchPipeline = createPipeline(params, params.batchPipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); - if (!batchPipeline) + + smart_refctd_ptr singlePipeline = nullptr; + if (params.drawMode & ADM_DRAW_SINGLE) { - logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); - return nullptr; + singlePipeline = createPipeline(params, params.singlePipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!singlePipeline) + { + logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + } + + smart_refctd_ptr batchPipeline = nullptr; + if (params.drawMode & ADM_DRAW_BATCH) + { + batchPipeline = createPipeline(params, params.batchPipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!batchPipeline) + { + logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } } if (!createStreamingBuffer(params)) @@ -83,6 +98,29 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l return smart_refctd_ptr(archive); } +bool DrawAABB::validateCreationParameters(SCreationParameters& creationParams) +{ + const auto validation = std::to_array + ({ + std::make_pair(bool(creationParams.assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(creationParams.assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), + std::make_pair(bool(creationParams.utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(creationParams.transfer), "Invalid `creationParams.transfer` is nullptr!"), + std::make_pair(bool(creationParams.renderpass), "Invalid `creationParams.renderpass` is nullptr!"), + (creationParams.assetManager && creationParams.utilities && creationParams.transfer && creationParams.renderpass) ? std::make_pair(bool(creationParams.utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[creationParams.transfer->getFamilyIndex()].queueFlags.hasFlags(IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") + }); + + system::logger_opt_ptr logger = creationParams.utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, ILogger::ELL_ERROR); + return false; + } + + return true; +} + smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath) { auto system = smart_refctd_ptr(params.assetManager->getSystem()); @@ -281,6 +319,12 @@ core::smart_refctd_ptr DrawAABB::createDefaultPipelin bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat) { + if (!(m_cachedCreationParams.drawMode & ADM_DRAW_SINGLE)) + { + m_cachedCreationParams.utilities->getLogger()->log("DrawAABB has not been enabled for draw single!", ILogger::ELL_ERROR); + return false; + } + commandBuffer->bindGraphicsPipeline(m_singlePipeline.get()); commandBuffer->setLineWidth(1.f); asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; @@ -300,6 +344,12 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) { + if (!(m_cachedCreationParams.drawMode & ADM_DRAW_BATCH)) + { + m_cachedCreationParams.utilities->getLogger()->log("DrawAABB has not been enabled for draw batches!", ILogger::ELL_ERROR); + return false; + } + using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all From b63520841b9376096d61756561b94176974f9d3d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 25 Aug 2025 14:25:42 +0700 Subject: [PATCH 042/472] Dummy --- .../asset/utils/CPolygonGeometryManipulator.h | 22 + include/nbl/builtin/hlsl/shapes/aabb.hlsl | 24 + .../utils/CPolygonGeometryManipulator.cpp | 490 ++++++++++++++++++ 3 files changed, 536 insertions(+) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 3aa3c25304..99ac6f440a 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -99,6 +99,28 @@ class NBL_API2 CPolygonGeometryManipulator EEM_QUATERNION, EEM_COUNT }; + + struct VertexCollection + { + using FetchFn = std::function; + FetchFn fetch; + size_t size; + + static auto fromSpan(std::span vertices) -> VertexCollection + { + return VertexCollection{ + .fetch = [data = vertices.data()](size_t vertexIndex)-> hlsl::float32_t3 + { + return data[vertexIndex]; + }, + .size = vertices.size() + }; + } + + hlsl::float32_t3 operator[](size_t index) const { return fetch(index); } + }; + static hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(const VertexCollection& vertexCollection); + #if 0 // TODO: REDO //! Struct used to pass chosen comparison method and epsilon to functions performing error metrics. /** diff --git a/include/nbl/builtin/hlsl/shapes/aabb.hlsl b/include/nbl/builtin/hlsl/shapes/aabb.hlsl index d07b38df37..5c6435e3e7 100644 --- a/include/nbl/builtin/hlsl/shapes/aabb.hlsl +++ b/include/nbl/builtin/hlsl/shapes/aabb.hlsl @@ -60,6 +60,30 @@ struct AABB point_t maxVx; }; +template +struct OBB +{ + using scalar_t = Scalar; + using point_t = vector; + + static OBB createAxisAligned(point_t mid, point_t len) + { + OBB ret; + ret.mid = mid; + ret.ext = len; + for (auto dim_i = 0; dim_i < D; dim_i++) + { + ret.axes[dim_i] = point_t(); + ret.axes[dim_i][D] = 1; + } + return ret; + } + + point_t mid; + std::array axes; + point_t ext; +}; + namespace util { namespace impl diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 1e08c172ba..be9d6dec3c 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -20,6 +20,496 @@ namespace nbl::asset { + +template +struct Extremals +{ + std::array values; + + T* minPtr() + { + return values.data(); + } + + const T* minPtr() const + { + return values.data(); + } + + T* maxPtr() + { + return values.data() + CountV; + } + + const T* maxPtr() const + { + return values.data() + CountV; + } +}; + + +hlsl::shapes::OBB<> CPolygonGeometryManipulator::calculateOBB(const VertexCollection& vertices) +{ + constexpr size_t SAMPLE_DIR_COUNT = 7; // Number of sample directions + constexpr size_t SAMPLE_COUNT = SAMPLE_DIR_COUNT * 2; + + if (vertices.size <= 0) + { + return hlsl::shapes::OBB<>::createAxisAligned({}, {}); + } + + static auto getQualityValue = [](hlsl::float32_t3 len) -> hlsl::float32_t + { + return len.x * len.y + len.x * len.z + len.y * len.z; //half box area + }; + + using ExtremalVertices = Extremals; + using ExtremalProjections = Extremals; + using Axes = std::array; + using Edges = std::array; + + struct ExtremalSamples + { + ExtremalVertices vertices; + ExtremalProjections projections; + }; + + struct LargeBaseTriangle + { + hlsl::float32_t3 normal = {}; + Axes vertices = {}; + Edges edges = {}; + enum Flag + { + NORMAL, + SECOND_POINT_CLOSE, + THIRD_POINT_CLOSE + } flag; + }; + + static auto findExtremals_7FixedDirs = [](const VertexCollection& vertices)-> ExtremalSamples + { + ExtremalSamples result; + hlsl::float32_t proj; + + const auto firstVertex = vertices.fetch(0); + + auto* minProjections = result.projections.minPtr(); + auto* maxProjections = result.projections.maxPtr(); + + auto* minVertices = result.vertices.minPtr(); + auto* maxVertices = result.vertices.maxPtr(); + + // Slab 0: dir {1, 0, 0} + proj = firstVertex.x; + minProjections[0] = minProjections[0] = proj; + minVertices[0] = firstVertex; maxVertices[0] = firstVertex; + // Slab 1: dir {0, 1, 0} + proj = firstVertex.y; + minProjections[1] = maxProjections[1] = proj; + minVertices[1] = firstVertex; maxVertices[1] = firstVertex; + // Slab 2: dir {0, 0, 1} + proj = firstVertex.z; + minProjections[2] = maxProjections[2] = proj; + minVertices[2] = firstVertex; maxVertices[2] = firstVertex; + // Slab 3: dir {1, 1, 1} + proj = firstVertex.x + firstVertex.y + firstVertex.z; + minProjections[3] = maxProjections[3] = proj; + minVertices[3] = firstVertex; maxVertices[3] = firstVertex; + // Slab 4: dir {1, 1, -1} + proj = firstVertex.x + firstVertex.y - firstVertex.z; + minProjections[4] = maxProjections[4] = proj; + minVertices[4] = firstVertex; maxVertices[4] = firstVertex; + // Slab 5: dir {1, -1, 1} + proj = firstVertex.x - firstVertex.y + firstVertex.z; + minProjections[5] = maxProjections[5] = proj; + minVertices[5] = firstVertex; maxVertices[5] = firstVertex; + // Slab 6: dir {1, -1, -1} + proj = firstVertex.x - firstVertex.y - firstVertex.z; + minProjections[6] = maxProjections[6] = proj; + minVertices[6] = firstVertex; maxVertices[6] = firstVertex; + + for (size_t vertex_i = 1; vertex_i < vertices.size; vertex_i++) + { + const auto vertex = vertices.fetch(vertex_i); + // Slab 0: dir {1, 0, 0} + proj = vertices.fetch(vertex_i).x; + if (proj < minProjections[0]) { minProjections[0] = proj; minVertices[0] = vertices.fetch(vertex_i); } + if (proj > maxProjections[0]) { maxProjections[0] = proj; maxVertices[0] = vertices.fetch(vertex_i); } + // Slab 1: dir {0, 1, 0} + proj = vertices.fetch(vertex_i).y; + if (proj < minProjections[1]) { minProjections[1] = proj; minVertices[1] = vertices.fetch(vertex_i); } + if (proj > maxProjections[1]) { maxProjections[1] = proj; maxVertices[1] = vertices.fetch(vertex_i); } + // Slab 2: dir {0, 0, 1} + proj = vertices.fetch(vertex_i).z; + if (proj < minProjections[2]) { minProjections[2] = proj; minVertices[2] = vertices.fetch(vertex_i); } + if (proj > maxProjections[2]) { maxProjections[2] = proj; maxVertices[2] = vertices.fetch(vertex_i); } + // Slab 3: dir {1, 1, 1} + proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; + if (proj < minProjections[3]) { minProjections[3] = proj; minVertices[3] = vertices.fetch(vertex_i); } + if (proj > maxProjections[3]) { maxProjections[3] = proj; maxVertices[3] = vertices.fetch(vertex_i); } + // Slab 4: dir {1, 1, -1} + proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; + if (proj < minProjections[4]) { minProjections[4] = proj; minVertices[4] = vertices.fetch(vertex_i); } + if (proj > maxProjections[4]) { maxProjections[4] = proj; maxVertices[4] = vertices.fetch(vertex_i); } + // Slab 5: dir {1, -1, 1} + proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; + if (proj < minProjections[5]) { minProjections[5] = proj; minVertices[5] = vertices.fetch(vertex_i); } + if (proj > maxProjections[5]) { maxProjections[5] = proj; maxVertices[5] = vertices.fetch(vertex_i); } + // Slab 6: dir {1, -1, -1} + proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; + if (proj < minProjections[6]) { minProjections[6] = proj; minVertices[6] = vertices.fetch(vertex_i); } + if (proj > maxProjections[6]) { maxProjections[6] = proj; maxVertices[6] = vertices.fetch(vertex_i); } + } + + return result; + }; + + + static auto findFurthestPointPair = [](const ExtremalVertices& extremalVertices) -> std::pair + { + int indexFurthestPair = 0; + auto maxSqDist = hlsl::dot(extremalVertices.maxPtr()[0], extremalVertices.minPtr()[0]); + for (int k = 1; k < SAMPLE_DIR_COUNT; k++) + { + const auto sqDist = hlsl::dot(extremalVertices.maxPtr()[k], extremalVertices.minPtr()[k]); + if (sqDist > maxSqDist) { maxSqDist = sqDist; indexFurthestPair = k; } + } + return { + extremalVertices.minPtr()[indexFurthestPair], + extremalVertices.maxPtr()[indexFurthestPair] + }; + }; + + static auto sqDistPointInfiniteEdge = [](const hlsl::float32_t3& q, const hlsl::float32_t3& p0, const hlsl::float32_t3& v) -> hlsl::float32_t + { + const auto u0 = q - p0; + const auto t = dot(v, u0); + const auto sqLen_v = hlsl::dot(v, v); + return hlsl::dot(u0, u0) - (t * t) / sqLen_v; + }; + + static auto findFurthestPointFromInfiniteEdge = [](const hlsl::float32_t3& p0, const hlsl::float32_t3& e0, const VertexCollection& vertices) + { + auto maxSqDist = sqDistPointInfiniteEdge(vertices[0], p0, e0); + int maxIndex = 0; + for (size_t i = 1; i < vertices.size; i++) + { + const auto sqDist = sqDistPointInfiniteEdge(vertices[i], p0, e0); + if (sqDist > maxSqDist) + { maxSqDist = sqDist; + maxIndex = i; + } + } + + struct Result + { + hlsl::float32_t3 point; + hlsl::float32_t sqDist; + }; + return Result{ + vertices[maxIndex], + maxSqDist + }; + }; + + static auto findExtremalProjs_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) + { + const auto firstProj = hlsl::dot(vertices[0], normal); + auto tMinProj = firstProj, tMaxProj = firstProj; + + for (int i = 1; i < vertices.size; i++) + { + const auto proj = hlsl::dot(vertices[i], normal); + if (proj < tMinProj) { tMinProj = proj; } + if (proj > tMaxProj) { tMaxProj = proj; } + } + + struct Result + { + hlsl::float32_t minProj; + hlsl::float32_t maxProj; + }; + return Result{ tMinProj, tMaxProj }; + }; + + static auto findExtremalPoints_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) + { + const auto firstProj = dot(vertices[0], normal); + + auto tMinProj = firstProj, tMaxProj = firstProj; + auto tMinVert = vertices[0], tMaxVert = vertices[0]; + + for (int i = 1; i < vertices.size; i++) + { + const auto proj = hlsl::dot(vertices[i], normal); + if (proj < tMinProj) { tMinProj = proj; tMinVert = vertices[i]; } + if (proj > tMaxProj) { tMaxProj = proj; tMaxVert = vertices[i]; } + } + + struct Result + { + hlsl::float32_t minProj; + hlsl::float32_t maxProj; + hlsl::float32_t3 minVert; + hlsl::float32_t3 maxVert; + }; + return Result{ tMinProj, tMaxProj, tMinVert, tMaxVert }; + }; + + static auto findUpperLowerTetraPoints = []( + const hlsl::float32_t3& n, + const VertexCollection& vertices, + const hlsl::float32_t3& p0) + { + const auto eps = 0.000001f; + const auto extremalPoints = findExtremalPoints_OneDir(n, vertices); + const auto triProj = hlsl::dot(p0, n); + + const auto maxVert = extremalPoints.maxProj - eps > triProj ? std::optional(extremalPoints.maxVert) : std::nullopt; + const auto minVert = extremalPoints.minProj + eps < triProj ? std::optional(extremalPoints.minVert) : std::nullopt; + + struct Result + { + std::optional minVert; + std::optional maxVert; + }; + return Result{ + minVert, + maxVert + }; + }; + + static auto findBestObbAxesFromTriangleNormalAndEdgeVectors = []( + const VertexCollection& vertices, + const hlsl::float32_t3 normal, + const std::array edges, + Axes& bestAxes, + hlsl::float32_t& bestVal) + { + hlsl::float32_t3 dmax, dmin, dlen; + + // The operands are assumed to be orthogonal and unit normals + const auto yExtremeProjs = findExtremalProjs_OneDir(normal, vertices); + dmin.y = yExtremeProjs.minProj; + dmax.y = yExtremeProjs.maxProj; + dlen.y = dmax.y - dmin.y; + + for (const auto& edge : edges) + { + const auto binormal = hlsl::cross(edge, normal); + + const auto xExtremeProjs = findExtremalProjs_OneDir(edge, vertices); + dmin.x = xExtremeProjs.minProj; + dmax.x = xExtremeProjs.maxProj; + dlen.x = dmax.x - dmin.x; + + const auto zExtremeProjs = findExtremalProjs_OneDir(binormal, vertices); + dmin.z = zExtremeProjs.minProj; + dmax.z = zExtremeProjs.maxProj; + dlen.z = dmax.z - dmin.z; + + const auto quality = getQualityValue(dlen); + if (quality < bestVal) + { + bestVal = quality; + bestAxes = { + edge, + normal, + binormal + }; + } + } + + }; + + + static auto findBaseTriangle = [](const ExtremalVertices& extremalVertices, const VertexCollection& vertices)-> LargeBaseTriangle + { + hlsl::float32_t eps = 0.000001f; + + std::array baseTriangleVertices; + Edges edges; + + // Find the furthest point pair among the selected min and max point pairs + std::tie(baseTriangleVertices[0], baseTriangleVertices[1]) = findFurthestPointPair(extremalVertices); + + // Degenerate case 1: + // If the found furthest points are located very close, return OBB aligned with the initial AABB + if (hlsl::dot(baseTriangleVertices[0], baseTriangleVertices[1]) < eps) + { + return { + .vertices = baseTriangleVertices, + .flag = LargeBaseTriangle::SECOND_POINT_CLOSE + }; + } + + // Compute edge vector of the line segment p0, p1 + edges[0] = hlsl::normalize(baseTriangleVertices[0] - baseTriangleVertices[1]); + + // Find a third point furthest away from line given by p0, e0 to define the large base triangle + const auto furthestPointRes = findFurthestPointFromInfiniteEdge(vertices[0], edges[0], vertices); + + // Degenerate case 2: + // If the third point is located very close to the line, return an OBB aligned with the line + if (furthestPointRes.sqDist < eps) + { + return { + .vertices = baseTriangleVertices, + .edges = edges, + .flag = LargeBaseTriangle::THIRD_POINT_CLOSE + }; + } + + // Compute the two remaining edge vectors and the normal vector of the base triangle + edges[1] = hlsl::normalize(baseTriangleVertices[1] - baseTriangleVertices[2]); + edges[2] = hlsl::normalize(baseTriangleVertices[2] - baseTriangleVertices[0]); + const auto normal = hlsl::normalize(hlsl::cross(edges[1], edges[0])); + + return { + .normal = normal, + .vertices = baseTriangleVertices, + .edges = edges, + .flag = LargeBaseTriangle::NORMAL + }; + }; + + auto findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle = [](const VertexCollection& vertices, + const LargeBaseTriangle& baseTriangle, + Axes& bestAxes, hlsl::float32_t& bestVal) + { + hlsl::float32_t3 q0, q1; // Top and bottom vertices for lower and upper tetra constructions + hlsl::float32_t3 f0, f1, f2; // Edge vectors towards q0; + hlsl::float32_t3 g0, g1, g2; // Edge vectors towards q1; + hlsl::float32_t3 n0, n1, n2; // Unit normals of top tetra tris + hlsl::float32_t3 m0, m1, m2; // Unit normals of bottom tetra tris + + // Find furthest points above and below the plane of the base triangle for tetra constructions + // For each found valid point, search for the best OBB axes based on the 3 arising triangles + const auto upperLowerTetraVertices = findUpperLowerTetraPoints(baseTriangle.normal, vertices, baseTriangle.vertices[0]); + if (upperLowerTetraVertices.minVert) + { + f0 = normalize(q0 - baseTriangle.vertices[0]); + f1 = normalize(q0 - baseTriangle.vertices[1]); + f2 = normalize(q0 - baseTriangle.vertices[2]); + n0 = normalize(cross(f1, baseTriangle.edges[0])); + n1 = normalize(cross(f2, baseTriangle.edges[1])); + n2 = normalize(cross(f0, baseTriangle.edges[2])); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); + } + if (upperLowerTetraVertices.maxVert) + { + g0 = normalize(q1 - baseTriangle.vertices[0]); + g1 = normalize(q1 - baseTriangle.vertices[1]); + g2 = normalize(q1 - baseTriangle.vertices[2]); + m0 = normalize(cross(g1, baseTriangle.edges[0])); + m1 = normalize(cross(g2, baseTriangle.edges[1])); + m2 = normalize(cross(g0, baseTriangle.edges[2])); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m0, { baseTriangle.edges[0], g1, g0 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m1, { baseTriangle.edges[1], g2, g1 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m2, { baseTriangle.edges[2], g0, g2 }, bestAxes, bestVal); + } + }; + + static auto buildObbFromAxesAndLocalMinMax = []( + const Axes& axes, + const hlsl::float32_t3& localMin, + const hlsl::float32_t3& localMax) -> hlsl::shapes::OBB<3, hlsl::float32_t> + { + const auto localMid = 0.5f * (localMin + localMax); + auto globalMid = axes[0] * localMid.x; + globalMid += axes[1] * localMid.y; + globalMid += axes[2] * localMid.z; + return { + .mid = globalMid, + .axes = axes, + .ext = 0.5f * (localMax - localMin) + }; + }; + + static auto computeObb = [](const Axes& axes, const VertexCollection& vertices) + { + const auto extremalX = findExtremalProjs_OneDir(axes[0], vertices); + const auto extremalY = findExtremalProjs_OneDir(axes[1], vertices); + const auto extremalZ = findExtremalProjs_OneDir(axes[2], vertices); + const auto localMin = hlsl::float32_t3{ extremalX.minProj, extremalY.minProj, extremalZ.minProj }; + const auto localMax = hlsl::float32_t3{ extremalX.maxProj, extremalY.maxProj, extremalZ.maxProj }; + return buildObbFromAxesAndLocalMinMax(axes, localMin, localMax); + }; + + static auto computeLineAlignedObb = [](const hlsl::float32_t3& u, const VertexCollection& vertices) + { + // Given u, build any orthonormal base u, v, w + + // Make sure r is not equal to u + auto r = u; + if (fabs(u.x) > fabs(u.y) && fabs(u.x) > fabs(u.z)) { r.x = 0; } + else if (fabs(u.y) > fabs(u.z)) { r.y = 0; } + else { r.z = 0; } + + const auto sqLen = hlsl::dot(r, r); + if (sqLen < FLT_EPSILON) { r.x = r.y = r.z = 1; } + + const auto v = normalize(cross(u, r)); + const auto w = normalize(cross(u, v)); + return computeObb({ u, v, w }, vertices); + }; + + const auto extremals = findExtremals_7FixedDirs(vertices); + + const auto* minProj = extremals.projections.minPtr(); + const auto* maxProj = extremals.projections.maxPtr(); + const auto* minVert = extremals.vertices.minPtr(); + const auto* maxVert = extremals.vertices.maxPtr(); + + + // Determine which points to use in the iterations below + const auto selectedVertices = [&] + { + if (vertices.size < SAMPLE_COUNT) { return vertices; } + return VertexCollection::fromSpan(extremals.vertices.values); + }(); + + // Compute size of AABB (max and min projections of vertices are already computed as slabs 0-2) + auto alMid = hlsl::float32_t3((minProj[0] + maxProj[0]) * 0.5f, (minProj[1] + maxProj[1]) * 0.5f, (minProj[2] + maxProj[2]) * 0.5f); + auto alLen = hlsl::float32_t3(maxProj[0] - minProj[0], maxProj[1] - minProj[1], maxProj[2] - minProj[2]); + auto alVal = getQualityValue(alLen); + + const auto baseTriangle = findBaseTriangle(extremals.vertices, vertices); + if (baseTriangle.flag == LargeBaseTriangle::SECOND_POINT_CLOSE) + return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); + if (baseTriangle.flag == LargeBaseTriangle::THIRD_POINT_CLOSE) + return computeLineAlignedObb(baseTriangle.edges[0], vertices); + + + Axes bestAxes = { + hlsl::float32_t3{1.f, 0.f, 0.f}, + {0.f, 1.f, 0.f}, + {0.f, 0.f, 1.f}, + }; + auto bestVal = alVal; + // Find best OBB axes based on the base triangle + findBestObbAxesFromTriangleNormalAndEdgeVectors(selectedVertices, baseTriangle.normal, baseTriangle.edges, bestAxes, bestVal); + + // Find improved OBB axes based on constructed di-tetrahedral shape raised from base triangle + findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle(selectedVertices, baseTriangle, bestAxes, bestVal); + + const auto obb = computeObb(bestAxes, vertices); + + // Check if the OBB extent is still smaller than the intial AABB + if (getQualityValue(2.f * obb.ext) < alVal) + { + return hlsl::shapes::OBB<>{ + .mid = alMid, + .axes = bestAxes, + .ext = alLen / 2.f, + }; + } + return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); +} + #if 0 //! Flips the direction of surfaces. Changes backfacing triangles to frontfacing //! triangles and vice versa. From 12164ac7a66bb2033ec2ed64cf74abb9328eed83 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 6 Sep 2025 10:55:52 +0700 Subject: [PATCH 043/472] Implement getTransformOBB for DrawAABB extension --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 ++ src/nbl/ext/DebugDraw/CDrawAABB.cpp | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index a8e3205f22..5129e62419 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -71,6 +71,8 @@ class DrawAABB final : public core::IReferenceCounted static hlsl::float32_t4x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); + static hlsl::float32_t4x4 getTransformFromOBB(const hlsl::shapes::OBB<3, float>& aabb); + protected: DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, core::smart_refctd_ptr indicesBuffer); diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index f3f33d2733..4f72b5a5a2 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -422,4 +422,25 @@ hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, fl return transform; } +hlsl::float32_t4x4 DrawAABB::getTransformFromOBB(const hlsl::shapes::OBB<3, float>& obb) +{ + const auto obbScale = obb.ext * 2.0f; + const auto obbMat = hlsl::transpose(float32_t4x4{ + hlsl::float32_t4(obb.axes[0] * obbScale.x, 0), + hlsl::float32_t4(obb.axes[1] * obbScale.y, 0), + hlsl::float32_t4(obb.axes[2] * obbScale.z, 0), + hlsl::float32_t4(obb.mid, 1) + }); + + const auto translateUnitCube = float32_t4x4{ + hlsl::float32_t4(1, 0, 0, -0.5f), + hlsl::float32_t4(0, 1, 0, -0.5f), + hlsl::float32_t4(0, 0, 1, -0.5f), + hlsl::float32_t4(0, 0, 0, 1), + }; + + const auto transform = mul(obbMat, translateUnitCube); + return transform; +} + } From 5a30e8078d4fb6bbd8b53a6aad4e5b4a5fe1f8f1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 6 Sep 2025 17:20:04 +0700 Subject: [PATCH 044/472] Move OBB Calculation into its own file --- .../asset/utils/CPolygonGeometryManipulator.h | 2 +- src/nbl/CMakeLists.txt | 1 + .../utils/CPolygonGeometryManipulator.cpp | 488 +----------------- 3 files changed, 5 insertions(+), 486 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 067ac2b0b2..cb62cacb76 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -16,7 +16,7 @@ namespace nbl::asset { //! An interface for easy manipulation of polygon geometries. -class CPolygonGeometryManipulator +class NBL_API2 CPolygonGeometryManipulator { public: static inline void recomputeContentHashes(ICPUPolygonGeometry* geo) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 935beffe2c..b0c27fafb0 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -180,6 +180,7 @@ set(NBL_ASSET_SOURCES # Meshes asset/utils/CForsythVertexCacheOptimizer.cpp asset/utils/CSmoothNormalGenerator.cpp + asset/utils/COBBGenerator.cpp asset/utils/CGeometryCreator.cpp asset/utils/CPolygonGeometryManipulator.cpp asset/utils/COverdrawPolygonGeometryOptimizer.cpp diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index be9d6dec3c..7b2bdb3963 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -16,500 +16,18 @@ #include "nbl/asset/utils/CSmoothNormalGenerator.h" #include "nbl/asset/utils/CForsythVertexCacheOptimizer.h" #include "nbl/asset/utils/COverdrawPolygonGeometryOptimizer.h" +#include "nbl/asset/utils/COBBGenerator.h" namespace nbl::asset { -template -struct Extremals -{ - std::array values; - - T* minPtr() - { - return values.data(); - } - - const T* minPtr() const - { - return values.data(); - } - - T* maxPtr() - { - return values.data() + CountV; - } - - const T* maxPtr() const - { - return values.data() + CountV; - } -}; - - hlsl::shapes::OBB<> CPolygonGeometryManipulator::calculateOBB(const VertexCollection& vertices) { - constexpr size_t SAMPLE_DIR_COUNT = 7; // Number of sample directions - constexpr size_t SAMPLE_COUNT = SAMPLE_DIR_COUNT * 2; - - if (vertices.size <= 0) - { - return hlsl::shapes::OBB<>::createAxisAligned({}, {}); - } - - static auto getQualityValue = [](hlsl::float32_t3 len) -> hlsl::float32_t - { - return len.x * len.y + len.x * len.z + len.y * len.z; //half box area - }; - - using ExtremalVertices = Extremals; - using ExtremalProjections = Extremals; - using Axes = std::array; - using Edges = std::array; - - struct ExtremalSamples - { - ExtremalVertices vertices; - ExtremalProjections projections; - }; - - struct LargeBaseTriangle - { - hlsl::float32_t3 normal = {}; - Axes vertices = {}; - Edges edges = {}; - enum Flag - { - NORMAL, - SECOND_POINT_CLOSE, - THIRD_POINT_CLOSE - } flag; - }; - - static auto findExtremals_7FixedDirs = [](const VertexCollection& vertices)-> ExtremalSamples - { - ExtremalSamples result; - hlsl::float32_t proj; - - const auto firstVertex = vertices.fetch(0); - - auto* minProjections = result.projections.minPtr(); - auto* maxProjections = result.projections.maxPtr(); - - auto* minVertices = result.vertices.minPtr(); - auto* maxVertices = result.vertices.maxPtr(); - - // Slab 0: dir {1, 0, 0} - proj = firstVertex.x; - minProjections[0] = minProjections[0] = proj; - minVertices[0] = firstVertex; maxVertices[0] = firstVertex; - // Slab 1: dir {0, 1, 0} - proj = firstVertex.y; - minProjections[1] = maxProjections[1] = proj; - minVertices[1] = firstVertex; maxVertices[1] = firstVertex; - // Slab 2: dir {0, 0, 1} - proj = firstVertex.z; - minProjections[2] = maxProjections[2] = proj; - minVertices[2] = firstVertex; maxVertices[2] = firstVertex; - // Slab 3: dir {1, 1, 1} - proj = firstVertex.x + firstVertex.y + firstVertex.z; - minProjections[3] = maxProjections[3] = proj; - minVertices[3] = firstVertex; maxVertices[3] = firstVertex; - // Slab 4: dir {1, 1, -1} - proj = firstVertex.x + firstVertex.y - firstVertex.z; - minProjections[4] = maxProjections[4] = proj; - minVertices[4] = firstVertex; maxVertices[4] = firstVertex; - // Slab 5: dir {1, -1, 1} - proj = firstVertex.x - firstVertex.y + firstVertex.z; - minProjections[5] = maxProjections[5] = proj; - minVertices[5] = firstVertex; maxVertices[5] = firstVertex; - // Slab 6: dir {1, -1, -1} - proj = firstVertex.x - firstVertex.y - firstVertex.z; - minProjections[6] = maxProjections[6] = proj; - minVertices[6] = firstVertex; maxVertices[6] = firstVertex; - - for (size_t vertex_i = 1; vertex_i < vertices.size; vertex_i++) - { - const auto vertex = vertices.fetch(vertex_i); - // Slab 0: dir {1, 0, 0} - proj = vertices.fetch(vertex_i).x; - if (proj < minProjections[0]) { minProjections[0] = proj; minVertices[0] = vertices.fetch(vertex_i); } - if (proj > maxProjections[0]) { maxProjections[0] = proj; maxVertices[0] = vertices.fetch(vertex_i); } - // Slab 1: dir {0, 1, 0} - proj = vertices.fetch(vertex_i).y; - if (proj < minProjections[1]) { minProjections[1] = proj; minVertices[1] = vertices.fetch(vertex_i); } - if (proj > maxProjections[1]) { maxProjections[1] = proj; maxVertices[1] = vertices.fetch(vertex_i); } - // Slab 2: dir {0, 0, 1} - proj = vertices.fetch(vertex_i).z; - if (proj < minProjections[2]) { minProjections[2] = proj; minVertices[2] = vertices.fetch(vertex_i); } - if (proj > maxProjections[2]) { maxProjections[2] = proj; maxVertices[2] = vertices.fetch(vertex_i); } - // Slab 3: dir {1, 1, 1} - proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; - if (proj < minProjections[3]) { minProjections[3] = proj; minVertices[3] = vertices.fetch(vertex_i); } - if (proj > maxProjections[3]) { maxProjections[3] = proj; maxVertices[3] = vertices.fetch(vertex_i); } - // Slab 4: dir {1, 1, -1} - proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; - if (proj < minProjections[4]) { minProjections[4] = proj; minVertices[4] = vertices.fetch(vertex_i); } - if (proj > maxProjections[4]) { maxProjections[4] = proj; maxVertices[4] = vertices.fetch(vertex_i); } - // Slab 5: dir {1, -1, 1} - proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; - if (proj < minProjections[5]) { minProjections[5] = proj; minVertices[5] = vertices.fetch(vertex_i); } - if (proj > maxProjections[5]) { maxProjections[5] = proj; maxVertices[5] = vertices.fetch(vertex_i); } - // Slab 6: dir {1, -1, -1} - proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; - if (proj < minProjections[6]) { minProjections[6] = proj; minVertices[6] = vertices.fetch(vertex_i); } - if (proj > maxProjections[6]) { maxProjections[6] = proj; maxVertices[6] = vertices.fetch(vertex_i); } - } - - return result; - }; - - - static auto findFurthestPointPair = [](const ExtremalVertices& extremalVertices) -> std::pair - { - int indexFurthestPair = 0; - auto maxSqDist = hlsl::dot(extremalVertices.maxPtr()[0], extremalVertices.minPtr()[0]); - for (int k = 1; k < SAMPLE_DIR_COUNT; k++) - { - const auto sqDist = hlsl::dot(extremalVertices.maxPtr()[k], extremalVertices.minPtr()[k]); - if (sqDist > maxSqDist) { maxSqDist = sqDist; indexFurthestPair = k; } - } - return { - extremalVertices.minPtr()[indexFurthestPair], - extremalVertices.maxPtr()[indexFurthestPair] - }; - }; - - static auto sqDistPointInfiniteEdge = [](const hlsl::float32_t3& q, const hlsl::float32_t3& p0, const hlsl::float32_t3& v) -> hlsl::float32_t - { - const auto u0 = q - p0; - const auto t = dot(v, u0); - const auto sqLen_v = hlsl::dot(v, v); - return hlsl::dot(u0, u0) - (t * t) / sqLen_v; - }; - - static auto findFurthestPointFromInfiniteEdge = [](const hlsl::float32_t3& p0, const hlsl::float32_t3& e0, const VertexCollection& vertices) - { - auto maxSqDist = sqDistPointInfiniteEdge(vertices[0], p0, e0); - int maxIndex = 0; - for (size_t i = 1; i < vertices.size; i++) - { - const auto sqDist = sqDistPointInfiniteEdge(vertices[i], p0, e0); - if (sqDist > maxSqDist) - { maxSqDist = sqDist; - maxIndex = i; - } - } - - struct Result - { - hlsl::float32_t3 point; - hlsl::float32_t sqDist; - }; - return Result{ - vertices[maxIndex], - maxSqDist - }; - }; - - static auto findExtremalProjs_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) - { - const auto firstProj = hlsl::dot(vertices[0], normal); - auto tMinProj = firstProj, tMaxProj = firstProj; - - for (int i = 1; i < vertices.size; i++) - { - const auto proj = hlsl::dot(vertices[i], normal); - if (proj < tMinProj) { tMinProj = proj; } - if (proj > tMaxProj) { tMaxProj = proj; } - } - - struct Result - { - hlsl::float32_t minProj; - hlsl::float32_t maxProj; - }; - return Result{ tMinProj, tMaxProj }; - }; - - static auto findExtremalPoints_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) - { - const auto firstProj = dot(vertices[0], normal); - - auto tMinProj = firstProj, tMaxProj = firstProj; - auto tMinVert = vertices[0], tMaxVert = vertices[0]; - - for (int i = 1; i < vertices.size; i++) - { - const auto proj = hlsl::dot(vertices[i], normal); - if (proj < tMinProj) { tMinProj = proj; tMinVert = vertices[i]; } - if (proj > tMaxProj) { tMaxProj = proj; tMaxVert = vertices[i]; } - } - - struct Result - { - hlsl::float32_t minProj; - hlsl::float32_t maxProj; - hlsl::float32_t3 minVert; - hlsl::float32_t3 maxVert; - }; - return Result{ tMinProj, tMaxProj, tMinVert, tMaxVert }; - }; - - static auto findUpperLowerTetraPoints = []( - const hlsl::float32_t3& n, - const VertexCollection& vertices, - const hlsl::float32_t3& p0) - { - const auto eps = 0.000001f; - const auto extremalPoints = findExtremalPoints_OneDir(n, vertices); - const auto triProj = hlsl::dot(p0, n); - - const auto maxVert = extremalPoints.maxProj - eps > triProj ? std::optional(extremalPoints.maxVert) : std::nullopt; - const auto minVert = extremalPoints.minProj + eps < triProj ? std::optional(extremalPoints.minVert) : std::nullopt; - - struct Result - { - std::optional minVert; - std::optional maxVert; - }; - return Result{ - minVert, - maxVert - }; - }; - - static auto findBestObbAxesFromTriangleNormalAndEdgeVectors = []( - const VertexCollection& vertices, - const hlsl::float32_t3 normal, - const std::array edges, - Axes& bestAxes, - hlsl::float32_t& bestVal) - { - hlsl::float32_t3 dmax, dmin, dlen; - - // The operands are assumed to be orthogonal and unit normals - const auto yExtremeProjs = findExtremalProjs_OneDir(normal, vertices); - dmin.y = yExtremeProjs.minProj; - dmax.y = yExtremeProjs.maxProj; - dlen.y = dmax.y - dmin.y; - - for (const auto& edge : edges) - { - const auto binormal = hlsl::cross(edge, normal); - - const auto xExtremeProjs = findExtremalProjs_OneDir(edge, vertices); - dmin.x = xExtremeProjs.minProj; - dmax.x = xExtremeProjs.maxProj; - dlen.x = dmax.x - dmin.x; - - const auto zExtremeProjs = findExtremalProjs_OneDir(binormal, vertices); - dmin.z = zExtremeProjs.minProj; - dmax.z = zExtremeProjs.maxProj; - dlen.z = dmax.z - dmin.z; - - const auto quality = getQualityValue(dlen); - if (quality < bestVal) - { - bestVal = quality; - bestAxes = { - edge, - normal, - binormal - }; - } - } - - }; - - - static auto findBaseTriangle = [](const ExtremalVertices& extremalVertices, const VertexCollection& vertices)-> LargeBaseTriangle - { - hlsl::float32_t eps = 0.000001f; - - std::array baseTriangleVertices; - Edges edges; - - // Find the furthest point pair among the selected min and max point pairs - std::tie(baseTriangleVertices[0], baseTriangleVertices[1]) = findFurthestPointPair(extremalVertices); - - // Degenerate case 1: - // If the found furthest points are located very close, return OBB aligned with the initial AABB - if (hlsl::dot(baseTriangleVertices[0], baseTriangleVertices[1]) < eps) - { - return { - .vertices = baseTriangleVertices, - .flag = LargeBaseTriangle::SECOND_POINT_CLOSE - }; - } - - // Compute edge vector of the line segment p0, p1 - edges[0] = hlsl::normalize(baseTriangleVertices[0] - baseTriangleVertices[1]); - - // Find a third point furthest away from line given by p0, e0 to define the large base triangle - const auto furthestPointRes = findFurthestPointFromInfiniteEdge(vertices[0], edges[0], vertices); - - // Degenerate case 2: - // If the third point is located very close to the line, return an OBB aligned with the line - if (furthestPointRes.sqDist < eps) - { - return { - .vertices = baseTriangleVertices, - .edges = edges, - .flag = LargeBaseTriangle::THIRD_POINT_CLOSE - }; - } - - // Compute the two remaining edge vectors and the normal vector of the base triangle - edges[1] = hlsl::normalize(baseTriangleVertices[1] - baseTriangleVertices[2]); - edges[2] = hlsl::normalize(baseTriangleVertices[2] - baseTriangleVertices[0]); - const auto normal = hlsl::normalize(hlsl::cross(edges[1], edges[0])); - - return { - .normal = normal, - .vertices = baseTriangleVertices, - .edges = edges, - .flag = LargeBaseTriangle::NORMAL - }; - }; - - auto findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle = [](const VertexCollection& vertices, - const LargeBaseTriangle& baseTriangle, - Axes& bestAxes, hlsl::float32_t& bestVal) - { - hlsl::float32_t3 q0, q1; // Top and bottom vertices for lower and upper tetra constructions - hlsl::float32_t3 f0, f1, f2; // Edge vectors towards q0; - hlsl::float32_t3 g0, g1, g2; // Edge vectors towards q1; - hlsl::float32_t3 n0, n1, n2; // Unit normals of top tetra tris - hlsl::float32_t3 m0, m1, m2; // Unit normals of bottom tetra tris - - // Find furthest points above and below the plane of the base triangle for tetra constructions - // For each found valid point, search for the best OBB axes based on the 3 arising triangles - const auto upperLowerTetraVertices = findUpperLowerTetraPoints(baseTriangle.normal, vertices, baseTriangle.vertices[0]); - if (upperLowerTetraVertices.minVert) - { - f0 = normalize(q0 - baseTriangle.vertices[0]); - f1 = normalize(q0 - baseTriangle.vertices[1]); - f2 = normalize(q0 - baseTriangle.vertices[2]); - n0 = normalize(cross(f1, baseTriangle.edges[0])); - n1 = normalize(cross(f2, baseTriangle.edges[1])); - n2 = normalize(cross(f0, baseTriangle.edges[2])); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); - } - if (upperLowerTetraVertices.maxVert) - { - g0 = normalize(q1 - baseTriangle.vertices[0]); - g1 = normalize(q1 - baseTriangle.vertices[1]); - g2 = normalize(q1 - baseTriangle.vertices[2]); - m0 = normalize(cross(g1, baseTriangle.edges[0])); - m1 = normalize(cross(g2, baseTriangle.edges[1])); - m2 = normalize(cross(g0, baseTriangle.edges[2])); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m0, { baseTriangle.edges[0], g1, g0 }, bestAxes, bestVal); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m1, { baseTriangle.edges[1], g2, g1 }, bestAxes, bestVal); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m2, { baseTriangle.edges[2], g0, g2 }, bestAxes, bestVal); - } - }; - - static auto buildObbFromAxesAndLocalMinMax = []( - const Axes& axes, - const hlsl::float32_t3& localMin, - const hlsl::float32_t3& localMax) -> hlsl::shapes::OBB<3, hlsl::float32_t> - { - const auto localMid = 0.5f * (localMin + localMax); - auto globalMid = axes[0] * localMid.x; - globalMid += axes[1] * localMid.y; - globalMid += axes[2] * localMid.z; - return { - .mid = globalMid, - .axes = axes, - .ext = 0.5f * (localMax - localMin) - }; - }; - - static auto computeObb = [](const Axes& axes, const VertexCollection& vertices) - { - const auto extremalX = findExtremalProjs_OneDir(axes[0], vertices); - const auto extremalY = findExtremalProjs_OneDir(axes[1], vertices); - const auto extremalZ = findExtremalProjs_OneDir(axes[2], vertices); - const auto localMin = hlsl::float32_t3{ extremalX.minProj, extremalY.minProj, extremalZ.minProj }; - const auto localMax = hlsl::float32_t3{ extremalX.maxProj, extremalY.maxProj, extremalZ.maxProj }; - return buildObbFromAxesAndLocalMinMax(axes, localMin, localMax); - }; - - static auto computeLineAlignedObb = [](const hlsl::float32_t3& u, const VertexCollection& vertices) - { - // Given u, build any orthonormal base u, v, w - - // Make sure r is not equal to u - auto r = u; - if (fabs(u.x) > fabs(u.y) && fabs(u.x) > fabs(u.z)) { r.x = 0; } - else if (fabs(u.y) > fabs(u.z)) { r.y = 0; } - else { r.z = 0; } - - const auto sqLen = hlsl::dot(r, r); - if (sqLen < FLT_EPSILON) { r.x = r.y = r.z = 1; } - - const auto v = normalize(cross(u, r)); - const auto w = normalize(cross(u, v)); - return computeObb({ u, v, w }, vertices); - }; - - const auto extremals = findExtremals_7FixedDirs(vertices); - - const auto* minProj = extremals.projections.minPtr(); - const auto* maxProj = extremals.projections.maxPtr(); - const auto* minVert = extremals.vertices.minPtr(); - const auto* maxVert = extremals.vertices.maxPtr(); - - - // Determine which points to use in the iterations below - const auto selectedVertices = [&] - { - if (vertices.size < SAMPLE_COUNT) { return vertices; } - return VertexCollection::fromSpan(extremals.vertices.values); - }(); - - // Compute size of AABB (max and min projections of vertices are already computed as slabs 0-2) - auto alMid = hlsl::float32_t3((minProj[0] + maxProj[0]) * 0.5f, (minProj[1] + maxProj[1]) * 0.5f, (minProj[2] + maxProj[2]) * 0.5f); - auto alLen = hlsl::float32_t3(maxProj[0] - minProj[0], maxProj[1] - minProj[1], maxProj[2] - minProj[2]); - auto alVal = getQualityValue(alLen); - - const auto baseTriangle = findBaseTriangle(extremals.vertices, vertices); - if (baseTriangle.flag == LargeBaseTriangle::SECOND_POINT_CLOSE) - return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); - if (baseTriangle.flag == LargeBaseTriangle::THIRD_POINT_CLOSE) - return computeLineAlignedObb(baseTriangle.edges[0], vertices); - - - Axes bestAxes = { - hlsl::float32_t3{1.f, 0.f, 0.f}, - {0.f, 1.f, 0.f}, - {0.f, 0.f, 1.f}, - }; - auto bestVal = alVal; - // Find best OBB axes based on the base triangle - findBestObbAxesFromTriangleNormalAndEdgeVectors(selectedVertices, baseTriangle.normal, baseTriangle.edges, bestAxes, bestVal); - - // Find improved OBB axes based on constructed di-tetrahedral shape raised from base triangle - findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle(selectedVertices, baseTriangle, bestAxes, bestVal); - - const auto obb = computeObb(bestAxes, vertices); - - // Check if the OBB extent is still smaller than the intial AABB - if (getQualityValue(2.f * obb.ext) < alVal) - { - return hlsl::shapes::OBB<>{ - .mid = alMid, - .axes = bestAxes, - .ext = alLen / 2.f, - }; - } - return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); + return CObbGenerator::calculateOBB(vertices); } + #if 0 //! Flips the direction of surfaces. Changes backfacing triangles to frontfacing //! triangles and vice versa. From b6e9c035c2785ffa1352495167798bd7d67bff85 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 6 Sep 2025 17:20:16 +0700 Subject: [PATCH 045/472] Fix create axis aligned bounding box --- include/nbl/builtin/hlsl/shapes/aabb.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/shapes/aabb.hlsl b/include/nbl/builtin/hlsl/shapes/aabb.hlsl index 6c3a2d278b..1d5b772f63 100644 --- a/include/nbl/builtin/hlsl/shapes/aabb.hlsl +++ b/include/nbl/builtin/hlsl/shapes/aabb.hlsl @@ -70,11 +70,11 @@ struct OBB { OBB ret; ret.mid = mid; - ret.ext = len; + ret.ext = len * 0.5f; for (auto dim_i = 0; dim_i < D; dim_i++) { - ret.axes[dim_i] = point_t(); - ret.axes[dim_i][D] = 1; + ret.axes[dim_i] = point_t(0); + ret.axes[dim_i][dim_i] = 1; } return ret; } From e5ceb1b35175c2a3a7c6e2899e00a73c8a28e3d9 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 8 Sep 2025 10:56:03 +0700 Subject: [PATCH 046/472] enable debug draw by default --- CMakeLists.txt | 2 +- examples_tests | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c2c1c21f8..25134b04e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,7 +182,7 @@ option(NBL_FAST_MATH "Enable fast low-precision math" ON) option(NBL_BUILD_EXAMPLES "Enable building examples" ON) option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" OFF) # TODO: once it compies turn this ON by default! option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) -option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" OFF) +option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" ON) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) if(NBL_COMPILE_WITH_CUDA) diff --git a/examples_tests b/examples_tests index 347933d952..3b1016e58a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 347933d9523618715f6373a97cba85f38e177985 +Subproject commit 3b1016e58ab5d9cb53bd9c37707dad356d30173d From 199b75ab55722a3be5784e9d29e37d6bc8572bef Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Sat, 13 Sep 2025 20:52:04 +0200 Subject: [PATCH 047/472] Update CONTRIBUTING.md small change to make sure `ditt` doesn't autodelete --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3f5a1d4595..24020210a7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,7 +3,7 @@ _I would really love to write a nice proper CONTRIBUTING.md, so excuse me for wh We would really love for you to tackle one of our burning issues, which you can view by clicking this link: -https://github.com/buildaworldnet/IrrlichtBAW/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc +https://github.com/Devsh-Graphics-Programming/Nabla/issues (sorted by the most recently updated) From bfa233fe68521c52f7484a7ebf04ecef8d099b80 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Sep 2025 11:13:54 +0700 Subject: [PATCH 048/472] fix embed builtin resource build --- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index f3f33d2733..f445398e0c 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -5,6 +5,10 @@ #include "nbl/ext/DebugDraw/CDrawAABB.h" #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#ifdef NBL_EMBED_BUILTIN_RESOURCES +#include "nbl/ext/debug_draw/builtin/CArchive.h" +#endif + using namespace nbl; using namespace core; using namespace video; From 229457cc6a5c5d5bc2547262dcab12eb589af56a Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 16 Sep 2025 11:25:24 +0700 Subject: [PATCH 049/472] Fix Imgui to always render --- src/nbl/ext/ImGui/ImGui.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nbl/ext/ImGui/ImGui.cpp b/src/nbl/ext/ImGui/ImGui.cpp index f477e96cdf..4c7c96953e 100644 --- a/src/nbl/ext/ImGui/ImGui.cpp +++ b/src/nbl/ext/ImGui/ImGui.cpp @@ -332,6 +332,7 @@ core::smart_refctd_ptr UI::createPipeline(SCreation rasterizationParams.faceCullingMode = EFCM_NONE; rasterizationParams.depthWriteEnable = false; rasterizationParams.depthBoundsTestEnable = false; + rasterizationParams.depthCompareOp = ECO_ALWAYS; rasterizationParams.viewportCount = creationParams.viewportCount; } From 3b67580f67281905c2c6d694ed405148931841e0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 16 Sep 2025 21:21:17 +0200 Subject: [PATCH 050/472] resolve https://github.com/Devsh-Graphics-Programming/Nabla/pull/900#discussion_r2352585679 --- .../ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl | 2 +- .../nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl | 2 +- include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl | 2 +- include/nbl/system/ISystem.h | 1 + src/nbl/ext/DebugDraw/CDrawAABB.cpp | 2 +- src/nbl/ext/DebugDraw/CMakeLists.txt | 4 ++-- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl index 686e8934db..09a12f3d07 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl @@ -1,6 +1,6 @@ #pragma shader_stage(fragment) -#include "common.hlsl" +#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" using namespace nbl::ext::debug_draw; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl index bb535a7216..5f67aa2f1e 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -3,7 +3,7 @@ #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" -#include "common.hlsl" +#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" using namespace nbl::hlsl; using namespace nbl::ext::debug_draw; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl index e1a426dec8..64ca75d5ab 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl @@ -3,7 +3,7 @@ #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" -#include "common.hlsl" +#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" using namespace nbl::hlsl; using namespace nbl::ext::debug_draw; diff --git a/include/nbl/system/ISystem.h b/include/nbl/system/ISystem.h index 4e02221d7c..65f0351582 100644 --- a/include/nbl/system/ISystem.h +++ b/include/nbl/system/ISystem.h @@ -70,6 +70,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted // virtual inline bool isDirectory(const system::path& p) const { + // TODO: fix bug, input "nbl/ext/DebugDraw/builtin/hlsl" -> returs true when no such dir present in mounted stuff due to how it uses parent paths in loop (goes up up till matches "nbl" builtin archive and thinks it resolved the requested dir) if (isPathReadOnly(p)) return p.extension()==""; // TODO: this is a temporary decision until we figure out how to check if a file is directory in android APK else diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index f445398e0c..d231f21e3e 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -162,7 +162,7 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet return params.utilities->getLogicalDevice()->compileShader({ shaderSrc.get() }); }; - if (!system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) + if (!system->exists(path(NBL_ARCHIVE_ENTRY) / "common.hlsl", {})) mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); auto vertexShader = compileShader(vsPath, IShader::E_SHADER_STAGE::ESS_VERTEX); diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index b62d06f518..4cb2ee54cf 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -20,8 +20,8 @@ nbl_create_ext_library_project( ) # this should be standard for all extensions -set(_ARCHIVE_ENTRY_KEY_ "DebugDraw/builtin/hlsl") # then each one has unique archive key -get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext" ABSOLUTE) +set(_ARCHIVE_ENTRY_KEY_ "nbl/ext/DebugDraw/builtin/hlsl") # then each one has unique archive key +get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) From fe4600c03a8d658776ed7f54f11baa942c4b9fd9 Mon Sep 17 00:00:00 2001 From: devsh Date: Sat, 27 Sep 2025 15:19:20 +0200 Subject: [PATCH 051/472] make mitsuba loader compile by default --- CMakeLists.txt | 2 +- CMakePresets.json | 2 +- examples_tests | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d01197982..90be0328dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,7 +173,7 @@ option(NBL_BUILD_DPL "Enable DPL (Dynamic Parallelism Library)" OFF) option(NBL_PCH "Enable pre-compiled header" ON) option(NBL_FAST_MATH "Enable fast low-precision math" OFF) # the reason OFF is by default now is the var controling it at build time was set AFTER BuildConfigOptions was generated - resulting in the feature being always OFF regardless the value xD - so just for sanity, keeping the same behaviour by default option(NBL_BUILD_EXAMPLES "Enable building examples" ON) -option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" OFF) # TODO: once it compies turn this ON by default! +option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" ON) option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) diff --git a/CMakePresets.json b/CMakePresets.json index e91c46d8e9..3117e607ac 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -15,7 +15,7 @@ "NBL_UPDATE_GIT_SUBMODULE": "OFF", "NBL_COMPILE_WITH_CUDA": "OFF", "NBL_BUILD_OPTIX": "OFF", - "NBL_BUILD_MITSUBA_LOADER": "OFF", + "NBL_BUILD_MITSUBA_LOADER": "ON", "NBL_BUILD_RADEON_RAYS": "OFF", "_NBL_COMPILE_WITH_OPEN_EXR_": "ON", "NBL_EXPLICIT_MODULE_LOAD_LOG": "ON", diff --git a/examples_tests b/examples_tests index fd8ebfeaca..850ca5d414 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit fd8ebfeacaf42f3cf63b1545cccce03809f9c8d6 +Subproject commit 850ca5d41412d8182a72fb88dd4e9e61df55e19b From 8b6b21077d466628b09fbaa0469a8db9074168c6 Mon Sep 17 00:00:00 2001 From: devsh Date: Sat, 27 Sep 2025 23:48:13 +0200 Subject: [PATCH 052/472] draft the scene asset --- include/nbl/asset/IAsset.h | 36 ++++++------- include/nbl/asset/ICPUMorphTargets.h | 4 +- include/nbl/asset/ICPUScene.h | 54 ++++++++++++++++++++ include/nbl/asset/IScene.h | 23 +++++++++ include/nbl/asset/interchange/ISceneLoader.h | 30 +++++++++++ 5 files changed, 127 insertions(+), 20 deletions(-) create mode 100644 include/nbl/asset/ICPUScene.h create mode 100644 include/nbl/asset/IScene.h create mode 100644 include/nbl/asset/interchange/ISceneLoader.h diff --git a/include/nbl/asset/IAsset.h b/include/nbl/asset/IAsset.h index a691fa6af6..7c6a33193d 100644 --- a/include/nbl/asset/IAsset.h +++ b/include/nbl/asset/IAsset.h @@ -156,24 +156,24 @@ class IAsset : virtual public core::IReferenceCounted //! inline bool isMutable() const {return m_mutable;} - inline void visitDependents(std::function visit) const - { - visitDependents_impl([&visit](const IAsset* dep)->bool - { - if (dep) - return visit(dep); - return true; - }); - } - - inline void visitDependents(std::function visit) - { - assert(isMutable()); - visitDependents([&](const IAsset* dependent) -> bool - { - return visit(const_cast(dependent)); - }); - } + inline void visitDependents(std::function visit) const + { + visitDependents_impl([&visit](const IAsset* dep)->bool + { + if (dep) + return visit(dep); + return true; + }); + } + + inline void visitDependents(std::function visit) + { + assert(isMutable()); + visitDependents([&](const IAsset* dependent) -> bool + { + return visit(const_cast(dependent)); + }); + } virtual bool valid() const = 0; diff --git a/include/nbl/asset/ICPUMorphTargets.h b/include/nbl/asset/ICPUMorphTargets.h index 545d2cd8a9..29924f9727 100644 --- a/include/nbl/asset/ICPUMorphTargets.h +++ b/include/nbl/asset/ICPUMorphTargets.h @@ -23,7 +23,7 @@ class NBL_API2 ICPUMorphTargets : public IAsset, public IMorphTargetsvalid()) @@ -55,7 +55,7 @@ class NBL_API2 ICPUMorphTargets : public IAsset, public IMorphTargets visit) const //override + inline void visitDependents_impl(std::function visit) const override { auto nonNullOnly = [&visit](const IAsset* dep)->bool { diff --git a/include/nbl/asset/ICPUScene.h b/include/nbl/asset/ICPUScene.h new file mode 100644 index 0000000000..4ea7a485b4 --- /dev/null +++ b/include/nbl/asset/ICPUScene.h @@ -0,0 +1,54 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_I_CPU_SCENE_H_INCLUDED_ +#define _NBL_ASSET_I_CPU_SCENE_H_INCLUDED_ + + +#include "nbl/asset/IScene.h" +// TODO: change to true IR later +#include "nbl/asset/material_compiler3/CFrontendIR.h" + + +namespace nbl::asset +{ +// +class NBL_API2 ICPUScene : public IAsset, public IScene +{ + using base_t = IScene; + + public: + inline ICPUScene() = default; + + constexpr static inline auto AssetType = ET_SCENE; + inline E_TYPE getAssetType() const override { return AssetType; } + + inline bool valid() const override + { + return true; + } + + inline core::smart_refctd_ptr clone(uint32_t _depth=~0u) const + { + const auto nextDepth = _depth ? (_depth-1):0; + auto retval = core::smart_refctd_ptr(); + return retval; + } + + protected: + // + inline void visitDependents_impl(std::function visit) const override + { + } + + + // suggested contents: + // - morph target list + // - material table + // - instance list (morph target, keyframed transforms, material table indexings, FUTURE: reference skeleton) + // - area light list (OBB decompositions, material table indexings) + // - envlight data +}; +} + +#endif \ No newline at end of file diff --git a/include/nbl/asset/IScene.h b/include/nbl/asset/IScene.h new file mode 100644 index 0000000000..69bf00ab3a --- /dev/null +++ b/include/nbl/asset/IScene.h @@ -0,0 +1,23 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_I_SCENE_H_INCLUDED_ +#define _NBL_ASSET_I_SCENE_H_INCLUDED_ + + +#include "nbl/asset/IMorphTargets.h" + + +namespace nbl::asset +{ +// This is incredibly temporary, lots of things are going to change +class NBL_API2 IScene : public virtual core::IReferenceCounted +{ + public: + + protected: + virtual ~IScene() = default; +}; +} + +#endif \ No newline at end of file diff --git a/include/nbl/asset/interchange/ISceneLoader.h b/include/nbl/asset/interchange/ISceneLoader.h new file mode 100644 index 0000000000..f61a2d3cea --- /dev/null +++ b/include/nbl/asset/interchange/ISceneLoader.h @@ -0,0 +1,30 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_I_SCENE_LOADER_H_INCLUDED_ +#define _NBL_ASSET_I_SCENE_LOADER_H_INCLUDED_ + + +#include "nbl/core/declarations.h" + +#include "nbl/asset/ICPUScene.h" +#include "nbl/asset/interchange/IAssetLoader.h" + + +namespace nbl::asset +{ + +class ISceneLoader : public IAssetLoader +{ + public: + virtual inline uint64_t getSupportedAssetTypesBitfield() const override {return IAsset::ET_SCENE;} + + protected: + inline ISceneLoader() {} + + private: +}; + +} + +#endif From ab9e7b8e397a649580149bf04e7d1fc82ccf241b Mon Sep 17 00:00:00 2001 From: devsh Date: Sat, 27 Sep 2025 23:48:36 +0200 Subject: [PATCH 053/472] start reworking the Mitsuba Loader into a Scene Loader --- .../nbl/ext/MitsubaLoader/CMitsubaLoader.h | 19 ++- .../nbl/ext/MitsubaLoader/CMitsubaMetadata.h | 110 +---------------- include/nbl/ext/MitsubaLoader/SContext.h | 112 +++--------------- 3 files changed, 30 insertions(+), 211 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h index e61ab3fa87..c844e8dde4 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h @@ -1,18 +1,15 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_MITSUBA_LOADER_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_MITSUBA_LOADER_H_INCLUDED_ -#ifndef __C_MITSUBA_LOADER_H_INCLUDED__ -#define __C_MITSUBA_LOADER_H_INCLUDED__ #include "nbl/asset/asset.h" -#include "IFileSystem.h" -#include "nbl/asset/utils/ICPUVirtualTexture.h" - #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" -#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" -#include "nbl/ext/MitsubaLoader/CElementShape.h" +//#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" +//#include "nbl/ext/MitsubaLoader/CElementShape.h" #include "nbl/ext/MitsubaLoader/SContext.h" @@ -23,8 +20,7 @@ namespace nbl::ext::MitsubaLoader class CElementBSDF; class CMitsubaMaterialCompilerFrontend; - -// TODO: we need a GLSL to C++ compatibility wrapper +#if 0 // TODO //#include "nbl/builtin/glsl/ext/MitsubaLoader/instance_data_struct.glsl" #define uint uint32_t #define uvec2 uint64_t @@ -52,7 +48,7 @@ struct nbl_glsl_ext_Mitsuba_Loader_instance_data_t using instance_data_t = nbl_glsl_ext_Mitsuba_Loader_instance_data_t; -class CMitsubaLoader : public asset::IRenderpassIndependentPipelineLoader +class CMitsubaLoader : public asset::ISceneLoader { friend class CMitsubaMaterialCompilerFrontend; public: @@ -67,8 +63,6 @@ class CMitsubaLoader : public asset::IRenderpassIndependentPipelineLoader //! Destructor virtual ~CMitsubaLoader() = default; - static core::smart_refctd_ptr createPipelineLayout(asset::IAssetManager* _manager, const asset::ICPUVirtualTexture* _vt); - // core::vector getMesh(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape); core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const core::matrix3x4SIMD& relTform); @@ -101,6 +95,7 @@ class CMitsubaLoader : public asset::IRenderpassIndependentPipelineLoader //! Loads an asset from an opened file, returns nullptr in case of failure. asset::SAssetBundle loadAsset(io::IReadFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; }; +#endif } #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h index 087d59b772..b7c2a398cb 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h @@ -1,11 +1,10 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_MITSUBA_METADATA_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_MITSUBA_METADATA_H_INCLUDED_ -#ifndef __NBL_C_MITSUBA_METADATA_H_INCLUDED__ -#define __NBL_C_MITSUBA_METADATA_H_INCLUDED__ -#include "nbl/core/compile_config.h" #include "nbl/asset/metadata/IAssetMetadata.h" #include "nbl/asset/ICPUImage.h" @@ -15,15 +14,11 @@ #include "nbl/ext/MitsubaLoader/CElementSensor.h" #include "nbl/ext/MitsubaLoader/CElementShape.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader + +namespace nbl::ext::MitsubaLoader { //! A class to derive mitsuba mesh loader metadata objects from - class CMitsubaMetadata : public asset::IAssetMetadata { public: @@ -32,55 +27,12 @@ class CMitsubaMetadata : public asset::IAssetMetadata public: std::string m_id; }; - class CDerivativeMap : public asset::IImageMetadata - { - public: - CDerivativeMap() : m_scale(1.f) {} - explicit CDerivativeMap(float scale) : m_scale(scale) {} - - float m_scale; - }; - class CRenderpassIndependentPipeline : public asset::IRenderpassIndependentPipelineMetadata - { - public: - CRenderpassIndependentPipeline() : IRenderpassIndependentPipelineMetadata(), m_ds0() {} - template - CRenderpassIndependentPipeline(core::smart_refctd_ptr&& _ds0, Args&&... args) : IRenderpassIndependentPipelineMetadata(std::forward(args)...), m_ds0(std::move(_ds0)) - { - } - - inline CRenderpassIndependentPipeline& operator=(CRenderpassIndependentPipeline&& other) - { - IRenderpassIndependentPipelineMetadata::operator=(std::move(other)); - std::swap(m_ds0, other.m_ds0); - return *this; - } - - core::smart_refctd_ptr m_ds0; - }; class CMesh : public asset::IMeshMetadata, public CID { public: - CMesh() : IMeshMetadata(), CID(), m_instanceAuxData(nullptr,nullptr), type(CElementShape::Type::INVALID) {} + CMesh() : IMeshMetadata(), CID(), type(CElementShape::Type::INVALID) {} ~CMesh() {} - struct SInstanceAuxilaryData - { - SInstanceAuxilaryData& operator=(SInstanceAuxilaryData&& other) - { - frontEmitter = std::move(other.frontEmitter); - backEmitter = std::move(other.backEmitter); - bsdf = std::move(other.bsdf); - return *this; - } - - CElementEmitter frontEmitter; // type is invalid if not used - CElementEmitter backEmitter; // type is invalid if not used - CMitsubaMaterialCompilerFrontend::front_and_back_t bsdf; - }; - - core::SRange m_instanceAuxData; - CElementShape::Type type; }; struct SGlobal @@ -92,14 +44,6 @@ class CMitsubaMetadata : public asset::IAssetMetadata CElementIntegrator m_integrator; core::vector m_sensors; - core::vector m_emitters; - core::smart_refctd_ptr m_VT; - core::smart_refctd_ptr m_ds0; - core::vector> m_envMapImages; - //has to go after #version and before required user-provided descriptors and functions - std::string m_materialCompilerGLSL_declarations; - //has to go after required user-provided descriptors and functions and before the rest of shader (especially entry point function) - std::string m_materialCompilerGLSL_source; } m_global; CMitsubaMetadata() : @@ -113,11 +57,6 @@ class CMitsubaMetadata : public asset::IAssetMetadata const char* getLoaderName() const override { return LoaderName; } //! - inline const CRenderpassIndependentPipeline* getAssetSpecificMetadata(const asset::ICPURenderpassIndependentPipeline* asset) const - { - const auto found = IAssetMetadata::getAssetSpecificMetadata(asset); - return static_cast(found); - } inline const CMesh* getAssetSpecificMetadata(const asset::ICPUMesh* asset) const { const auto found = IAssetMetadata::getAssetSpecificMetadata(asset); @@ -127,26 +66,9 @@ class CMitsubaMetadata : public asset::IAssetMetadata private: friend class CMitsubaLoader; - meta_container_t m_metaPplnStorage; - core::smart_refctd_dynamic_array m_semanticStorage; - CRenderpassIndependentPipeline* m_metaPplnStorageIt; - meta_container_t m_metaMeshStorage; - core::smart_refctd_dynamic_array m_metaMeshInstanceStorage; - core::smart_refctd_dynamic_array m_metaMeshInstanceAuxStorage; CMesh* m_meshStorageIt; - CMesh::SInstance* m_instanceStorageIt; - CMesh::SInstanceAuxilaryData* m_instanceAuxStorageIt; - - meta_container_t m_metaDerivMapStorage; - CDerivativeMap* m_metaDerivMapStorageIt; - inline void reservePplnStorage(uint32_t pplnCount, core::smart_refctd_dynamic_array&& _semanticStorage) - { - m_metaPplnStorage = IAssetMetadata::createContainer(pplnCount); - m_semanticStorage = std::move(_semanticStorage); - m_metaPplnStorageIt = m_metaPplnStorage->begin(); - } inline void reserveMeshStorage(uint32_t meshCount, uint32_t instanceCount) { m_metaMeshStorage = IAssetMetadata::createContainer(meshCount); @@ -156,17 +78,6 @@ class CMitsubaMetadata : public asset::IAssetMetadata m_instanceStorageIt = m_metaMeshInstanceStorage->begin(); m_instanceAuxStorageIt = m_metaMeshInstanceAuxStorage->begin(); } - inline void reserveDerivMapStorage(uint32_t count) - { - m_metaDerivMapStorage = IAssetMetadata::createContainer(count); - m_metaDerivMapStorageIt = m_metaDerivMapStorage->begin(); - } - inline void addPplnMeta(const asset::ICPURenderpassIndependentPipeline* ppln, core::smart_refctd_ptr&& _ds0) - { - *m_metaPplnStorageIt = CMitsubaMetadata::CRenderpassIndependentPipeline(std::move(_ds0),core::SRange(m_semanticStorage->begin(),m_semanticStorage->end())); - IAssetMetadata::insertAssetSpecificMetadata(ppln,m_metaPplnStorageIt); - m_metaPplnStorageIt++; - } template inline uint32_t addMeshMeta(const asset::ICPUMesh* mesh, std::string&& id, const CElementShape::Type type, InstanceIterator instancesBegin, InstanceIterator instancesEnd) { @@ -195,16 +106,7 @@ class CMitsubaMetadata : public asset::IAssetMetadata return meta->m_instances.size(); } - inline void addDerivMapMeta(const asset::ICPUImage* derivmap, float scale) - { - auto* meta = m_metaDerivMapStorageIt++; - meta->m_scale = scale; - IAssetMetadata::insertAssetSpecificMetadata(derivmap, meta); - } }; } -} -} - #endif diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index 687f97054d..572a927fba 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -1,48 +1,38 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_MITSUBA_LOADER_CONTEXT_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_MITSUBA_LOADER_CONTEXT_H_INCLUDED_ -#ifndef __C_MITSUBA_LOADER_CONTEXT_H_INCLUDED__ -#define __C_MITSUBA_LOADER_CONTEXT_H_INCLUDED__ - -#include "nbl/asset/ICPUMesh.h" -#include "nbl/asset/utils/IGeometryCreator.h" -#include "nbl/asset/material_compiler/CMaterialCompilerGLSLRasterBackend.h" +#include "nbl/asset/ICPUPolygonGeometry.h" +//#include "nbl/asset/utils/IGeometryCreator.h" #include "nbl/asset/interchange/CIESProfileLoader.h" -#include "nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h" -#include "nbl/ext/MitsubaLoader/CElementShape.h" +//#include "nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h" +//#include "nbl/ext/MitsubaLoader/CElementShape.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader +namespace nbl::ext::MitsubaLoader { struct SContext { public: SContext( - const asset::IGeometryCreator* _geomCreator, - const asset::IMeshManipulator* _manipulator, +// const asset::IGeometryCreator* _geomCreator, +// const asset::IMeshManipulator* _manipulator, const asset::IAssetLoader::SAssetLoadContext& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, - CMitsubaMetadata* _metadata +// CMitsubaMetadata* _metadata ); - const asset::IGeometryCreator* creator; - const asset::IMeshManipulator* manipulator; +// const asset::IGeometryCreator* creator; +// const asset::IMeshManipulator* manipulator; const asset::IAssetLoader::SAssetLoadContext inner; asset::IAssetLoader::IAssetLoaderOverride* override_; - CMitsubaMetadata* meta; - - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VT_PAGE_SZ_LOG2 = 7u;//128 - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VT_PHYSICAL_PAGE_TEX_TILES_PER_DIM_LOG2 = 4u;//16 - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VT_PAGE_PADDING = 8u; - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VT_MAX_ALLOCATABLE_TEX_SZ_LOG2 = 12u;//4096 +// CMitsubaMetadata* meta; +#if 0 // using group_ass_type = core::vector>; //core::map groupCache; @@ -171,21 +161,6 @@ struct SContext return params; } - inline core::smart_refctd_ptr getSampler(const asset::ICPUSampler::SParams& params) const - { - const std::string samplerKey = samplerCacheKey(params); - const asset::IAsset::E_TYPE types[2] = {asset::IAsset::ET_SAMPLER,asset::IAsset::ET_TERMINATING_ZERO}; - auto samplerBundle = override_->findCachedAsset(samplerKey,types,inner,0u); - if (samplerBundle.getContents().empty()) - { - auto sampler = core::make_smart_refctd_ptr(params); - override_->insertAssetIntoCache(asset::SAssetBundle(nullptr,{sampler}),samplerKey,inner,0); - return sampler; - } - else - return core::smart_refctd_ptr_static_cast(samplerBundle.getContents().begin()[0]); - } - //index of root node in IR using bsdf_type = const CMitsubaMaterialCompilerFrontend::front_and_back_t; //caches instr buffer instr-wise offset (.first) and instruction count (.second) for each bsdf node @@ -214,67 +189,14 @@ struct SContext }; core::unordered_multimap mapMesh2instanceData; - struct SPipelineCacheKey - { - asset::SVertexInputParams vtxParams; - asset::SPrimitiveAssemblyParams primParams; - - inline bool operator==(const SPipelineCacheKey& rhs) const - { - return memcmp(&vtxParams, &rhs.vtxParams, sizeof(vtxParams)) == 0 && memcmp(&primParams, &rhs.primParams, sizeof(primParams)) == 0; - } - - struct hash - { - inline size_t operator()(const SPipelineCacheKey& k) const - { - constexpr size_t BYTESZ = sizeof(k.vtxParams) + sizeof(k.primParams); - uint8_t mem[BYTESZ]{}; - uint8_t* ptr = mem; - memcpy(ptr, &k.vtxParams, sizeof(k.vtxParams)); - ptr += sizeof(k.vtxParams); - memcpy(ptr, &k.primParams, sizeof(k.primParams)); - ptr += sizeof(k.primParams); - - return std::hash{}(std::string_view(reinterpret_cast(mem), BYTESZ)); - } - }; - }; core::unordered_map, SPipelineCacheKey::hash> pipelineCache; - +#endif //material compiler core::smart_refctd_ptr ir; CMitsubaMaterialCompilerFrontend frontend; - asset::material_compiler::CMaterialCompilerGLSLRasterBackend::SContext backend_ctx; - asset::material_compiler::CMaterialCompilerGLSLRasterBackend backend; private: - // TODO: commonalize this to all loaders - static std::string samplerCacheKey(const asset::ICPUSampler::SParams& samplerParams) - { - std::string samplerCacheKey = "__Sampler"; - - if (samplerParams.MinFilter==asset::ISampler::ETF_LINEAR) - samplerCacheKey += "?trilinear"; - else - samplerCacheKey += "?nearest"; - - static const char* wrapModeName[] = - { - "?repeat", - "?clamp_to_edge", - "?clamp_to_border", - "?mirror", - "?mirror_clamp_to_edge", - "?mirror_clamp_to_border" - }; - samplerCacheKey += wrapModeName[samplerParams.TextureWrapU]; - samplerCacheKey += wrapModeName[samplerParams.TextureWrapV]; - - return samplerCacheKey; - } }; -}}} - +} #endif \ No newline at end of file From 36d99431215297015c714e8de10ad83a127cd037 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 8 Oct 2025 15:16:33 +0200 Subject: [PATCH 054/472] Get IES cpps to compile --- include/nbl/asset/asset.h | 2 +- src/nbl/CMakeLists.txt | 5 ++ .../asset/interchange/CIESProfileLoader.cpp | 47 +++++++++++++++---- src/nbl/asset/interchange/CIESProfileLoader.h | 23 +-------- src/nbl/asset/utils/CIESProfile.cpp | 6 ++- src/nbl/asset/utils/CIESProfile.h | 1 - src/nbl/asset/utils/CIESProfileParser.h | 2 +- 7 files changed, 51 insertions(+), 35 deletions(-) diff --git a/include/nbl/asset/asset.h b/include/nbl/asset/asset.h index fe70e81646..f08844a182 100644 --- a/include/nbl/asset/asset.h +++ b/include/nbl/asset/asset.h @@ -67,6 +67,6 @@ #include "nbl/asset/metadata/CMTLMetadata.h" #include "nbl/asset/metadata/CPLYMetadata.h" #include "nbl/asset/metadata/CSTLMetadata.h" -//#include "nbl/asset/metadata/CIESProfileMetadata.h" +#include "nbl/asset/metadata/CIESProfileMetadata.h" #endif diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 6bf9e9abdd..9105be3b41 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -221,6 +221,11 @@ set(NBL_ASSET_SOURCES asset/interchange/CImageWriterTGA.cpp asset/interchange/CImageWriterOpenEXR.cpp # TODO: Nahim asset/interchange/CGLIWriter.cpp + +# IES profile loaders + asset/interchange/CIESProfileLoader.cpp + asset/utils/CIESProfileParser.cpp + asset/utils/CIESProfile.cpp ) set(NBL_VIDEO_SOURCES # Utilities diff --git a/src/nbl/asset/interchange/CIESProfileLoader.cpp b/src/nbl/asset/interchange/CIESProfileLoader.cpp index eecdde3190..744756b607 100644 --- a/src/nbl/asset/interchange/CIESProfileLoader.cpp +++ b/src/nbl/asset/interchange/CIESProfileLoader.cpp @@ -3,24 +3,53 @@ using namespace nbl; using namespace asset; -asset::SAssetBundle -CIESProfileLoader::loadAsset(io::IReadFile* _file, - const asset::IAssetLoader::SAssetLoadParams& _params, - asset::IAssetLoader::IAssetLoaderOverride* _override, - uint32_t _hierarchyLevel) { - if (!_file) +bool CIESProfileLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const +{ + system::IFile::success_t success; + std::string versionBuffer(0x45, ' '); + const auto* fName = _file->getFileName().c_str(); + _file->read(success, versionBuffer.data(), 0, versionBuffer.size()); + + if (success) + { + for (const auto& it : CIESProfileParser::VALID_SIGNATURES) + if (versionBuffer.find(it.data()) != std::string::npos) + return true; + + logger.log("%s: Invalid IES signature for \"%s\" file!", system::ILogger::ELL_ERROR, __FUNCTION__, fName); + } + else + logger.log("%s: Failed to read \"%s\" file!", system::ILogger::ELL_ERROR, __FUNCTION__, fName); + + return false; +} + +asset::SAssetBundle CIESProfileLoader::loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) +{ + if (not _file) + { + _params.logger.log("%s: Nullptr system::IFile pointer!", system::ILogger::ELL_ERROR, __FUNCTION__); return {}; + } IAssetLoader::SAssetLoadContext loadContex(_params, _file); core::vector data(_file->getSize()); - _file->read(data.data(), _file->getSize()); + system::IFile::success_t success; + const auto* fName = _file->getFileName().c_str(); + _file->read(success, data.data(), 0, _file->getSize()); + + if (not success) + { + _params.logger.log("%s: Failed to read \"%s\" file!", system::ILogger::ELL_ERROR, __FUNCTION__, fName); + return {}; + } CIESProfileParser parser(data.data(), data.size()); CIESProfile profile; - if (!parser.parse(profile)) + if (not parser.parse(profile)) { - os::Printer::log("ERROR: Emission profile parsing error: " + std::string(parser.getErrorMsg()), ELL_ERROR); + _params.logger.log("%s: Failed to parse emission profile for \"%s\" file!", system::ILogger::ELL_ERROR, __FUNCTION__, fName); return {}; } diff --git a/src/nbl/asset/interchange/CIESProfileLoader.h b/src/nbl/asset/interchange/CIESProfileLoader.h index 64ef9688ee..5eb26c861b 100644 --- a/src/nbl/asset/interchange/CIESProfileLoader.h +++ b/src/nbl/asset/interchange/CIESProfileLoader.h @@ -5,8 +5,6 @@ #define __NBL_ASSET_C_IES_PROFILE_LOADER_H_INCLUDED__ #include "nbl/asset/ICPUImage.h" -#include "nbl/asset/ICPUShader.h" - #include "nbl/asset/IAssetManager.h" #include "nbl/asset/interchange/IAssetLoader.h" @@ -25,20 +23,7 @@ class CIESProfileLoader final : public asset::IAssetLoader \return True if file seems to be loadable. */ - bool isALoadableFileFormat(io::IReadFile *_file) const override - { - const size_t begginingOfFile = _file->getPos(); - _file->seek(0ull); - std::string versionBuffer(0x45, ' '); - _file->read(versionBuffer.data(), versionBuffer.size()); - _file->seek(begginingOfFile); - - for (const auto& it : CIESProfileParser::VALID_SIGNATURES) - if (versionBuffer.find(it.data()) != std::string::npos) - return true; - - return false; - } + bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const override; //! Returns an array of string literals terminated by nullptr const char **getAssociatedFileExtensions() const override @@ -55,11 +40,7 @@ class CIESProfileLoader final : public asset::IAssetLoader uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_IMAGE_VIEW; } //! Loads an asset from an opened file, returns nullptr in case of failure. - asset::SAssetBundle - loadAsset(io::IReadFile* _file, - const asset::IAssetLoader::SAssetLoadParams& _params, - asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, - uint32_t _hierarchyLevel = 0u) override; + asset::SAssetBundle loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; }; } // namespace nbl::asset #endif // __NBL_ASSET_C_IES_PROFILE_LOADER_H_INCLUDED__ diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index b507ab0d45..5f89019ad7 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -139,7 +139,9 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu region.bufferImageHeight = 0u; region.bufferOffset = 0u; - auto buffer = core::make_smart_refctd_ptr(texelBytesz * bufferRowLength * height); + asset::ICPUBuffer::SCreationParams bParams; + bParams.size = texelBytesz * bufferRowLength * height; + auto buffer = asset::ICPUBuffer::create(std::move(bParams)); if (!outImg->setBufferAndRegions(std::move(buffer), core::make_refctd_dynamic_array>(1ull, region))) return {}; @@ -189,7 +191,7 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu CBasicImageFilterCommon::clip_region_functor_t clip(state.subresource, state.outRange, creationParams.format); const auto& regions = outImg->getRegions(state.subresource.mipLevel); - CBasicImageFilterCommon::executePerRegion(std::forward(policy), outImg.get(), fill, regions.begin(), regions.end(), clip); + CBasicImageFilterCommon::executePerRegion(std::forward(policy), outImg.get(), fill, regions, clip); } ICPUImageView::SCreationParams viewParams = {}; diff --git a/src/nbl/asset/utils/CIESProfile.h b/src/nbl/asset/utils/CIESProfile.h index ea3d539613..479483c052 100644 --- a/src/nbl/asset/utils/CIESProfile.h +++ b/src/nbl/asset/utils/CIESProfile.h @@ -6,7 +6,6 @@ #define __NBL_ASSET_C_IES_PROFILE_H_INCLUDED__ #include "nbl/asset/metadata/CIESProfileMetadata.h" -#include "nbl/core/Types.h" #include namespace nbl diff --git a/src/nbl/asset/utils/CIESProfileParser.h b/src/nbl/asset/utils/CIESProfileParser.h index cc613efc29..c5b57dd77b 100644 --- a/src/nbl/asset/utils/CIESProfileParser.h +++ b/src/nbl/asset/utils/CIESProfileParser.h @@ -99,7 +99,7 @@ namespace nbl if (!this->errorMsg) this->errorMsg = errorMsg; - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) return -1.0; else return 0; From 99c1b8a4038a9f52cb3d2fdd4dd123d3488107fd Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 9 Oct 2025 12:37:22 +0200 Subject: [PATCH 055/472] start the loader --- examples_tests | 2 +- include/nbl/asset/asset.h | 1 + include/nbl/asset/interchange/IAssetLoader.h | 4 +- include/nbl/asset/interchange/IImageLoader.h | 5 +- .../nbl/ext/MitsubaLoader/CMitsubaLoader.h | 32 +- .../CMitsubaMaterialCompilerFrontend.h | 11 +- .../nbl/ext/MitsubaLoader/CSerializedLoader.h | 2 +- include/nbl/ext/MitsubaLoader/ParserUtil.h | 46 ++- include/nbl/ext/MitsubaLoader/SContext.h | 10 +- src/nbl/asset/interchange/CIESProfileLoader.h | 13 +- src/nbl/ext/MitsubaLoader/CMakeLists.txt | 4 +- src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 280 +++--------------- 12 files changed, 103 insertions(+), 307 deletions(-) diff --git a/examples_tests b/examples_tests index 850ca5d414..38be9e2711 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 850ca5d41412d8182a72fb88dd4e9e61df55e19b +Subproject commit 38be9e27119e574fd9424bbbe3b955dfdd4616a4 diff --git a/include/nbl/asset/asset.h b/include/nbl/asset/asset.h index fe70e81646..71f689c6fc 100644 --- a/include/nbl/asset/asset.h +++ b/include/nbl/asset/asset.h @@ -61,6 +61,7 @@ #include "nbl/asset/interchange/IAssetLoader.h" #include "nbl/asset/interchange/IImageLoader.h" #include "nbl/asset/interchange/IGeometryLoader.h" +#include "nbl/asset/interchange/ISceneLoader.h" #include "nbl/asset/interchange/IAssetWriter.h" #include "nbl/asset/interchange/IImageWriter.h" #include "nbl/asset/metadata/COpenEXRMetadata.h" diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 64ed4a7fd3..35bff3acab 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -86,8 +86,8 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted enum E_LOADER_PARAMETER_FLAGS : uint64_t { ELPF_NONE = 0, //!< default value, it doesn't do anything - ELPF_RIGHT_HANDED_MESHES = 0x1, //!< specifies that a mesh will be flipped in such a way that it'll look correctly in right-handed camera system - ELPF_DONT_COMPILE_GLSL = 0x2, //!< it states that GLSL won't be compiled to SPIR-V if it is loaded or generated + /*deprecated*/ELPF_RIGHT_HANDED_MESHES = 0x1, //!< specifies that a mesh will be flipped in such a way that it'll look correctly in right-handed camera system + /*deprecated*/ELPF_DONT_COMPILE_GLSL = 0x2, //!< it states that GLSL won't be compiled to SPIR-V if it is loaded or generated ELPF_LOAD_METADATA_ONLY = 0x4 //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. }; diff --git a/include/nbl/asset/interchange/IImageLoader.h b/include/nbl/asset/interchange/IImageLoader.h index c7b6119ede..a1177e7d00 100644 --- a/include/nbl/asset/interchange/IImageLoader.h +++ b/include/nbl/asset/interchange/IImageLoader.h @@ -1,9 +1,8 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_ASSET_I_IMAGE_LOADER_H_INCLUDED__ -#define __NBL_ASSET_I_IMAGE_LOADER_H_INCLUDED__ +#ifndef _NBL_ASSET_I_IMAGE_LOADER_H_INCLUDED_ +#define _NBL_ASSET_I_IMAGE_LOADER_H_INCLUDED_ #include "nbl/core/declarations.h" diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h index c844e8dde4..09275963b3 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h @@ -46,17 +46,15 @@ struct nbl_glsl_ext_Mitsuba_Loader_instance_data_t #undef mat4x3 #undef nbl_glsl_MC_material_data_t using instance_data_t = nbl_glsl_ext_Mitsuba_Loader_instance_data_t; - +#endif class CMitsubaLoader : public asset::ISceneLoader { - friend class CMitsubaMaterialCompilerFrontend; +// friend class CMitsubaMaterialCompilerFrontend; public: //! Constructor - CMitsubaLoader(asset::IAssetManager* _manager, io::IFileSystem* _fs); - - void initialize() override; - + inline CMitsubaLoader() = default; +#if 0 protected: io::IFileSystem* m_filesystem; @@ -78,24 +76,18 @@ class CMitsubaLoader : public asset::ISceneLoader core::smart_refctd_ptr createDS0(const SContext& _ctx, asset::ICPUPipelineLayout* _layout, const asset::material_compiler::CMaterialCompilerGLSLBackendCommon::result_t& _compResult, Iter meshBegin, Iter meshEnd); public: - //! Check if the file might be loaded by this class - /** Check might look into the file. - \param file File handle to check. - \return True if file seems to be loadable. */ - bool isALoadableFileFormat(io::IReadFile* _file) const override; - - //! Returns an array of string literals terminated by nullptr - const char** getAssociatedFileExtensions() const override; +#endif + bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger=nullptr) const override; - //! Returns the assets loaded by the loader - /** Bits of the returned value correspond to each IAsset::E_TYPE - enumeration member, and the return value cannot be 0. */ - uint64_t getSupportedAssetTypesBitfield() const override { return asset::IAsset::ET_MESH/*|asset::IAsset::ET_SCENE|asset::IAsset::ET_IMPLEMENTATION_SPECIFIC_METADATA*/; } + inline const char** getAssociatedFileExtensions() const override + { + static const char* ext[]{ "xml", nullptr }; + return ext; + } //! Loads an asset from an opened file, returns nullptr in case of failure. - asset::SAssetBundle loadAsset(io::IReadFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; + asset::SAssetBundle loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override=nullptr, uint32_t _hierarchyLevel=0u) override; }; -#endif } #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h b/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h index 42bad88655..5ef55d4e54 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h @@ -1,13 +1,11 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _C_MITSUBA_MATERIAL_COMPILER_FRONTEND_H_INCLUDED_ +#define _C_MITSUBA_MATERIAL_COMPILER_FRONTEND_H_INCLUDED_ -#ifndef __C_MITSUBA_MATERIAL_COMPILER_FRONTEND_H_INCLUDED__ -#define __C_MITSUBA_MATERIAL_COMPILER_FRONTEND_H_INCLUDED__ -#include "nbl/core/Types.h" - -#include "nbl/asset/material_compiler/IR.h" +//#include "nbl/asset/material_compiler/IR.h" #include "nbl/ext/MitsubaLoader/CElementBSDF.h" #include "nbl/ext/MitsubaLoader/CElementEmitter.h" @@ -23,6 +21,7 @@ struct SContext; class CMitsubaMaterialCompilerFrontend { public: +#ifdef 0 using IRNode = asset::material_compiler::IR::INode; using EmitterNode = asset::material_compiler::IR::CEmitterNode; enum E_IMAGE_VIEW_SEMANTIC : uint8_t @@ -60,8 +59,8 @@ class CMitsubaMaterialCompilerFrontend tex_ass_type getErrorTexture(const E_IMAGE_VIEW_SEMANTIC semantic) const; IRNode* createIRNode(asset::material_compiler::IR* ir, const CElementBSDF* _bsdf); +#endif }; } - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CSerializedLoader.h b/include/nbl/ext/MitsubaLoader/CSerializedLoader.h index 1ac08aba79..44bb0739c5 100644 --- a/include/nbl/ext/MitsubaLoader/CSerializedLoader.h +++ b/include/nbl/ext/MitsubaLoader/CSerializedLoader.h @@ -21,7 +21,7 @@ class CSerializedLoader final : public asset::IGeometryLoader public: inline CSerializedLoader() = default; - inline bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger = nullptr) const override + inline bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger=nullptr) const override { FileHeader header; diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index 65c8e1fb58..981884554e 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -1,50 +1,39 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_I_PARSER_UTIL_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_I_PARSER_UTIL_H_INCLUDED_ -#ifndef __I_PARSER_UTIL_H_INCLUDED__ -#define __I_PARSER_UTIL_H_INCLUDED__ - -//#include "nbl/core/core.h" - -//#include "IFileSystem.h" #include "nbl/asset/interchange/IAssetLoader.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" -#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" +//#include "nbl/ext/MitsubaLoader/CElementFactory.h" +//#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" #include "expat/lib/expat.h" #include -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader +namespace nbl::ext::MitsubaLoader { - - - class ParserLog { -public: - static inline void setLogger(const system::logger_opt_ptr& logger) { ParserLog::logger = logger; }; + public: + static inline void setLogger(const system::logger_opt_ptr& _logger) {logger=_logger;} - /*prints this message: - Mitsuba loader error: - Invalid .xml file structure: message */ - static void invalidXMLFileStructure(const std::string& errorMessage); + /*prints this message: + Mitsuba loader error: + Invalid .xml file structure: message */ + static void invalidXMLFileStructure(const std::string& errorMessage); -private: - static system::logger_opt_ptr logger; + private: + static system::logger_opt_ptr logger; }; template -class ElementPool // : public std::tuple...> +class ElementPool // similar to : public std::tuple...> { core::SimpleBlockBasedAllocator,core::aligned_allocator> poolAllocator; public: @@ -68,7 +57,9 @@ class ParserManager XML_Parser parser; system::path currentXMLDir; }; + public: +#if 0 //! Constructor ParserManager(system::ISystem* _system, asset::IAssetLoader::IAssetLoaderOverride* _override) : m_system(_system), m_override(_override), m_sceneDeclCount(0), @@ -127,12 +118,9 @@ class ParserManager each element of index N is parent of the element of index N+1 the scene element is a parent of all elements of index 0 */ core::stack > elements; - +#endif friend class CElementFactory; }; } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index 572a927fba..c1d9c6d9b1 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -15,6 +15,8 @@ namespace nbl::ext::MitsubaLoader { +class CMitsubaMetadata; + struct SContext { public: @@ -23,14 +25,14 @@ struct SContext // const asset::IMeshManipulator* _manipulator, const asset::IAssetLoader::SAssetLoadContext& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, -// CMitsubaMetadata* _metadata + CMitsubaMetadata* _metadata ); // const asset::IGeometryCreator* creator; // const asset::IMeshManipulator* manipulator; const asset::IAssetLoader::SAssetLoadContext inner; asset::IAssetLoader::IAssetLoaderOverride* override_; -// CMitsubaMetadata* meta; + CMitsubaMetadata* meta; #if 0 // @@ -192,8 +194,8 @@ struct SContext core::unordered_map, SPipelineCacheKey::hash> pipelineCache; #endif //material compiler - core::smart_refctd_ptr ir; - CMitsubaMaterialCompilerFrontend frontend; +// core::smart_refctd_ptr ir; +// CMitsubaMaterialCompilerFrontend frontend; private: }; diff --git a/src/nbl/asset/interchange/CIESProfileLoader.h b/src/nbl/asset/interchange/CIESProfileLoader.h index 64ef9688ee..d0b116a3c3 100644 --- a/src/nbl/asset/interchange/CIESProfileLoader.h +++ b/src/nbl/asset/interchange/CIESProfileLoader.h @@ -1,16 +1,17 @@ // Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef __NBL_ASSET_C_IES_PROFILE_LOADER_H_INCLUDED__ -#define __NBL_ASSET_C_IES_PROFILE_LOADER_H_INCLUDED__ +#ifndef _NBL_ASSET_C_IES_PROFILE_LOADER_H_INCLUDED_ +#define _NBL_ASSET_C_IES_PROFILE_LOADER_H_INCLUDED_ #include "nbl/asset/ICPUImage.h" -#include "nbl/asset/ICPUShader.h" #include "nbl/asset/IAssetManager.h" - #include "nbl/asset/interchange/IAssetLoader.h" -#include "nbl/asset/utils/CIESProfileParser.h" + +#if 0 // TODO: Arek + +#include "nbl/asset/utils/CIESProfileParser.h" // TODO: move to `src/asset/interchange` #include "nbl/asset/metadata/CIESProfileMetadata.h" namespace nbl::asset @@ -62,4 +63,6 @@ class CIESProfileLoader final : public asset::IAssetLoader uint32_t _hierarchyLevel = 0u) override; }; } // namespace nbl::asset +#endif // end TODO: Arek + #endif // __NBL_ASSET_C_IES_PROFILE_LOADER_H_INCLUDED__ diff --git a/src/nbl/ext/MitsubaLoader/CMakeLists.txt b/src/nbl/ext/MitsubaLoader/CMakeLists.txt index 6efa07ba23..f321324b60 100644 --- a/src/nbl/ext/MitsubaLoader/CMakeLists.txt +++ b/src/nbl/ext/MitsubaLoader/CMakeLists.txt @@ -21,7 +21,7 @@ set(NBL_EXT_MITSUBA_LOADER_H # ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaSerializedMetadata.h # ${NBL_EXT_INTERNAL_INCLUDE_DIR}/ParserUtil.h ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CSerializedLoader.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaLoader.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaLoader.h # ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaMaterialCompilerFrontend.h ) @@ -40,7 +40,7 @@ set(NBL_EXT_MITSUBA_LOADER_SRC # CElementFactory.cpp # ParserUtil.cpp CSerializedLoader.cpp -# CMitsubaLoader.cpp + CMitsubaLoader.cpp # CMitsubaMaterialCompilerFrontend.cpp ) diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index 093a5b0624..cb6c9fc365 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -1,19 +1,19 @@ -#include "..\..\..\..\include\nbl\ext\MitsubaLoader\CMitsubaLoader.h" // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "os.h" #include #include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" +#if 0 #include "nbl/asset/utils/CDerivativeMapCreator.h" #include "nbl/ext/MitsubaLoader/CMitsubaSerializedMetadata.h" #include "nbl/ext/MitsubaLoader/CGLSLMitsubaLoaderBuiltinIncludeLoader.h" +#endif #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) @@ -24,57 +24,10 @@ namespace nbl { using namespace asset; -namespace ext -{ -namespace MitsubaLoader -{ - -_NBL_STATIC_INLINE_CONSTEXPR const char* DUMMY_VERTEX_SHADER = -R"(#version 430 core - -layout (location = 0) in vec3 vPosition; -layout (location = 2) in vec2 vUV; -layout (location = 3) in vec3 vNormal; - -layout (location = 0) out vec3 WorldPos; -layout (location = 1) flat out uint InstanceIndex; -layout (location = 2) out vec3 Normal; -layout (location = 3) out vec2 UV; - -#include -#include - -#ifndef _NBL_VERT_SET1_BINDINGS_DEFINED_ -#define _NBL_VERT_SET1_BINDINGS_DEFINED_ -layout (set = 1, binding = 0, row_major, std140) uniform UBO { - nbl_glsl_SBasicViewParameters params; -} CamData; -#endif //_NBL_VERT_SET1_BINDINGS_DEFINED_ - -#include - -layout (set = 0, binding = 5, row_major, std430) readonly restrict buffer InstDataBuffer { - nbl_glsl_ext_Mitsuba_Loader_instance_data_t data[]; -} InstData; - -void main() +namespace ext::MitsubaLoader { - mat4x3 tform = InstData.data[gl_InstanceIndex].tform; - mat4 mvp = nbl_glsl_pseudoMul4x4with4x3(CamData.params.MVP, tform); - gl_Position = nbl_glsl_pseudoMul4x4with3x1(mvp, vPosition); - WorldPos = nbl_glsl_pseudoMul3x4with3x1(tform, vPosition); - mat3 normalMat = mat3(InstData.data[gl_InstanceIndex].normalMatrixRow0,InstData.data[gl_InstanceIndex].normalMatrixRow1,InstData.data[gl_InstanceIndex].normalMatrixRow2); - Normal = transpose(normalMat)*normalize(vNormal); - UV = vUV; - InstanceIndex = gl_InstanceIndex; -} - -)"; -_NBL_STATIC_INLINE_CONSTEXPR const char* FRAGMENT_SHADER_PROLOGUE = -R"(#version 430 core -#extension GL_EXT_shader_integer_mix : require -)"; +#if 0 // old material compiler _NBL_STATIC_INLINE_CONSTEXPR const char* FRAGMENT_SHADER_INPUT_OUTPUT = R"( layout (location = 0) in vec3 WorldPos; @@ -166,62 +119,9 @@ void main() } #endif )"; - -_NBL_STATIC_INLINE_CONSTEXPR const char* VERTEX_SHADER_CACHE_KEY = "nbl/builtin/specialized_shader/loaders/mitsuba_xml/default"; - -_NBL_STATIC_INLINE_CONSTEXPR uint32_t PAGE_TAB_TEX_BINDING = 0u; -_NBL_STATIC_INLINE_CONSTEXPR uint32_t PHYS_PAGE_VIEWS_BINDING = 1u; -_NBL_STATIC_INLINE_CONSTEXPR uint32_t PRECOMPUTED_VT_DATA_BINDING = 2u; -_NBL_STATIC_INLINE_CONSTEXPR uint32_t INSTR_BUF_BINDING = 3u; -_NBL_STATIC_INLINE_CONSTEXPR uint32_t BSDF_BUF_BINDING = 4u; -_NBL_STATIC_INLINE_CONSTEXPR uint32_t INSTANCE_DATA_BINDING = 5u; -_NBL_STATIC_INLINE_CONSTEXPR uint32_t PREFETCH_INSTR_BUF_BINDING = 6u; -_NBL_STATIC_INLINE_CONSTEXPR uint32_t EMITTER_DATA_BUF_BINDING = 7u; -_NBL_STATIC_INLINE_CONSTEXPR uint32_t DS0_BINDING_COUNT_WO_VT = 6u; - -template -static void insertAssetIntoCache(core::smart_refctd_ptr& asset, const char* path, IAssetManager* _assetMgr) // TODO: @Crisspl this is duplicate code -{ - asset::SAssetBundle bundle(nullptr,{ asset }); - _assetMgr->changeAssetKey(bundle, path); - _assetMgr->insertAssetIntoCache(bundle); -} -// @Crisspl TODO this needs to use the IAssetLoaderOverride instead -template -static auto getBuiltinAsset(const char* _key, IAssetManager* _assetMgr) -> std::enable_if_t, core::smart_refctd_ptr> -{ - size_t storageSz = 1ull; - asset::SAssetBundle bundle; - const IAsset::E_TYPE types[]{ assetType, static_cast(0u) }; - - _assetMgr->findAssets(storageSz, &bundle, _key, types); - auto assets = bundle.getContents(); - if (assets.empty()) - return nullptr; - //assert(!assets.empty()); - - return core::smart_refctd_ptr_static_cast(assets.begin()[0]); -} - -static core::smart_refctd_ptr createSpecShader(const char* _glsl, asset::ISpecializedShader::E_SHADER_STAGE _stage) -{ - auto shader = core::make_smart_refctd_ptr(_glsl); - asset::ICPUSpecializedShader::SInfo info(nullptr, nullptr, "main", _stage); - auto specd = core::make_smart_refctd_ptr(std::move(shader), std::move(info)); - - return specd; -} -static core::smart_refctd_ptr createAndCacheVertexShader(asset::IAssetManager* _manager, const char* _glsl) -{ - auto vs = createSpecShader(_glsl, asset::ISpecializedShader::ESS_VERTEX); - - insertAssetIntoCache(vs, VERTEX_SHADER_CACHE_KEY, _manager); - - return vs; -} static core::smart_refctd_ptr createFragmentShader(const asset::material_compiler::CMaterialCompilerGLSLBackendCommon::result_t& _mcRes, size_t _VTstorageViewCount) { - std::string source = + std::string source = FRAGMENT_SHADER_PROLOGUE + _mcRes.fragmentShaderSource_declarations + FRAGMENT_SHADER_INPUT_OUTPUT + @@ -232,28 +132,9 @@ static core::smart_refctd_ptr createFragmentShader return createSpecShader(source.c_str(), asset::ISpecializedShader::ESS_FRAGMENT); } -static core::smart_refctd_ptr createPipeline(core::smart_refctd_ptr&& _layout, core::smart_refctd_ptr&& _vertshader, core::smart_refctd_ptr&& _fragshader) -{ - auto vs = std::move(_vertshader); - auto fs = std::move(_fragshader); - asset::ICPUSpecializedShader* shaders[2]{ vs.get(), fs.get() }; - - SRasterizationParams rasterParams; - rasterParams.faceCullingMode = asset::EFCM_NONE; - rasterParams.frontFaceIsCCW = 1; - auto pipeline = core::make_smart_refctd_ptr( - std::move(_layout), - shaders, shaders+2, - //all the params will be overriden with those loaded with meshes - SVertexInputParams(), - SBlendParams(), - SPrimitiveAssemblyParams(), - rasterParams - ); - - return pipeline; -} +#endif +#if 0 static core::smart_refctd_ptr createImageView(core::smart_refctd_ptr&& _img) // TODO: this should seriously be a utility somewhere { const auto& iparams = _img->getCreationParameters(); @@ -363,139 +244,68 @@ static core::smart_refctd_ptr createSingleChannelImage(const a return outImg; } - -core::smart_refctd_ptr CMitsubaLoader::createPipelineLayout(asset::IAssetManager* _manager, const asset::ICPUVirtualTexture* _vt) -{ - core::smart_refctd_ptr ds0layout; - { - auto sizes = _vt->getDSlayoutBindings(nullptr, nullptr); - auto bindings = core::make_refctd_dynamic_array>(sizes.first + DS0_BINDING_COUNT_WO_VT); - auto samplers = core::make_refctd_dynamic_array< core::smart_refctd_dynamic_array>>(sizes.second); - - _vt->getDSlayoutBindings(bindings->data(), samplers->data(), PAGE_TAB_TEX_BINDING, PHYS_PAGE_VIEWS_BINDING); - auto* b = bindings->data() + (bindings->size() - DS0_BINDING_COUNT_WO_VT); - b[0].binding = PRECOMPUTED_VT_DATA_BINDING; - b[0].count = 1u; - b[0].samplers = nullptr; - b[0].stageFlags = asset::ISpecializedShader::ESS_FRAGMENT; - b[0].type = asset::EDT_STORAGE_BUFFER; - - b[1].binding = INSTR_BUF_BINDING; - b[1].count = 1u; - b[1].samplers = nullptr; - b[1].stageFlags = asset::ISpecializedShader::ESS_FRAGMENT; - b[1].type = asset::EDT_STORAGE_BUFFER; - - b[2].binding = BSDF_BUF_BINDING; - b[2].count = 1u; - b[2].samplers = nullptr; - b[2].stageFlags = asset::ISpecializedShader::ESS_FRAGMENT; - b[2].type = asset::EDT_STORAGE_BUFFER; - - b[3].binding = INSTANCE_DATA_BINDING; - b[3].count = 1u; - b[3].samplers = nullptr; - b[3].stageFlags = static_cast(asset::ISpecializedShader::ESS_FRAGMENT | asset::ISpecializedShader::ESS_VERTEX); - b[3].type = asset::EDT_STORAGE_BUFFER; - - b[4].binding = PREFETCH_INSTR_BUF_BINDING; - b[4].count = 1u; - b[4].samplers = nullptr; - b[4].stageFlags = asset::ISpecializedShader::ESS_FRAGMENT; - b[4].type = asset::EDT_STORAGE_BUFFER; - - b[5].binding = EMITTER_DATA_BUF_BINDING; - b[5].count = 1u; - b[5].samplers = nullptr; - b[5].stageFlags = asset::ISpecializedShader::ESS_FRAGMENT; - b[5].type = asset::EDT_STORAGE_BUFFER; - - ds0layout = core::make_smart_refctd_ptr(bindings->data(), bindings->data() + bindings->size()); - } - auto ds1layout = getBuiltinAsset("nbl/builtin/descriptor_set_layout/basic_view_parameters", _manager); - - return core::make_smart_refctd_ptr(nullptr, nullptr, std::move(ds0layout), std::move(ds1layout), nullptr, nullptr); -} - -CMitsubaLoader::CMitsubaLoader(asset::IAssetManager* _manager, io::IFileSystem* _fs) : asset::IRenderpassIndependentPipelineLoader(_manager), m_filesystem(_fs) -{ -#ifdef _NBL_DEBUG - setDebugName("CMitsubaLoader"); #endif -} - -void CMitsubaLoader::initialize() -{ - IRenderpassIndependentPipelineLoader::initialize(); - auto* glslc = m_assetMgr->getGLSLCompiler(); - - glslc->getIncludeHandler()->addBuiltinIncludeLoader(core::make_smart_refctd_ptr(m_filesystem)); -} - -bool CMitsubaLoader::isALoadableFileFormat(io::IReadFile* _file) const +bool CMitsubaLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const { - constexpr uint32_t stackSize = 16u*1024u; + constexpr uint32_t stackSize = 16u<<10u; char tempBuff[stackSize+1]; tempBuff[stackSize] = 0; static const char* stringsToFind[] = { " 2u*maxStringSize, "WTF?"); + static_assert(stackSize>2u*maxStringSize); - const size_t prevPos = _file->getPos(); const auto fileSize = _file->getSize(); - if (fileSize < maxStringSize) + if (fileSizeseek(0); - _file->read(tempBuff, 3u); - bool utf16 = false; - if (tempBuff[0]==0xEFu && tempBuff[1]==0xBBu && tempBuff[2]==0xBFu) - utf16 = false; - else if (reinterpret_cast(tempBuff)[0]==0xFEFFu) + size_t pos = 3; + bool utf16; { - utf16 = true; - _file->seek(2); + system::IFile::success_t success; + _file->read(success,tempBuff,0,pos); + if (!success) + return false; + if (tempBuff[0] == 0xEFu && tempBuff[1] == 0xBBu && tempBuff[2] == 0xBFu) + utf16 = false; + else if (reinterpret_cast(tempBuff)[0] == 0xFEFFu) + { + utf16 = true; + pos = 2; + } + else + pos = 0; } - else - _file->seek(0); - while (true) + + while (posgetPos(); - if (pos >= fileSize) - break; - if (pos > maxStringSize) - _file->seek(_file->getPos()-maxStringSize); - _file->read(tempBuff,stackSize); + if (pos>maxStringSize) + pos -= maxStringSize; + system::ISystem::future_t bytesRead; + _file->read(bytesRead,tempBuff,pos,stackSize); + if (!bytesRead.wait()) + return false; + tempBuff[bytesRead.copy()] = '\0'; + // TODO: should we require all 3 are found? for (auto i=0u; i(tempBuff),stringsToFindW[i])!=nullptr):(strstr(tempBuff, stringsToFind[i])!=nullptr)) - { - _file->seek(prevPos); + if (utf16 ? (wcsstr(reinterpret_cast(tempBuff),stringsToFindW[i])!=nullptr):(strstr(tempBuff,stringsToFind[i])!=nullptr)) return true; - } } - _file->seek(prevPos); return false; } -const char** CMitsubaLoader::getAssociatedFileExtensions() const -{ - static const char* ext[]{ "xml", nullptr }; - return ext; -} - -asset::SAssetBundle CMitsubaLoader::loadAsset(io::IReadFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) +asset::SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) { - ParserManager parserManager(m_assetMgr->getFileSystem(),_override); - if (!parserManager.parse(_file)) +// ParserManager parserManager(m_assetMgr->getFileSystem(),_override); +// if (!parserManager.parse(_file)) return {}; - +#if 0 if (_params.loaderFlags & IAssetLoader::ELPF_LOAD_METADATA_ONLY) { - auto emptyMesh = core::make_smart_refctd_ptr(); - return SAssetBundle(std::move(parserManager.m_metadata),{ std::move(emptyMesh) }); + auto emptyScene = core::make_smart_refctd_ptr(); + return SAssetBundle(std::move(parserManager.m_metadata),{ std::move(emptyScene) }); } else { @@ -626,8 +436,10 @@ asset::SAssetBundle CMitsubaLoader::loadAsset(io::IReadFile* _file, const asset: return asset::SAssetBundle(std::move(parserManager.m_metadata),std::move(meshSmartPtrArray)); } +#endif } +#if 0 core::vector CMitsubaLoader::getMesh(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape) { if (!shape) @@ -1399,7 +1211,7 @@ SContext::SContext( ); meta->m_global.m_VT = core::smart_refctd_ptr(backend_ctx.vt.getCPUVirtualTexture()); } +#endif -} } } \ No newline at end of file From 972fd5791272c596a73e421f7f984f5158e1ca77 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 9 Oct 2025 16:39:09 +0200 Subject: [PATCH 056/472] Note that `nbl::ext::MitsubaLoader::ParserManager::ElementPool` probably leaks memory --- examples_tests | 2 +- .../nbl/ext/MitsubaLoader/CMitsubaLoader.h | 17 +-- .../nbl/ext/MitsubaLoader/CMitsubaMetadata.h | 30 ++-- include/nbl/ext/MitsubaLoader/ParserUtil.h | 77 +++++----- src/nbl/ext/MitsubaLoader/CMakeLists.txt | 2 +- src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 11 +- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 141 +++++++++--------- 7 files changed, 142 insertions(+), 138 deletions(-) diff --git a/examples_tests b/examples_tests index 38be9e2711..bfcff8a686 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 38be9e27119e574fd9424bbbe3b955dfdd4616a4 +Subproject commit bfcff8a686409dd7c0d55607bb8cb6bcc0e0b80a diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h index 09275963b3..f43b88c8a4 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h @@ -48,19 +48,14 @@ struct nbl_glsl_ext_Mitsuba_Loader_instance_data_t using instance_data_t = nbl_glsl_ext_Mitsuba_Loader_instance_data_t; #endif -class CMitsubaLoader : public asset::ISceneLoader +class CMitsubaLoader final : public asset::ISceneLoader { // friend class CMitsubaMaterialCompilerFrontend; - public: - //! Constructor - inline CMitsubaLoader() = default; -#if 0 - protected: - io::IFileSystem* m_filesystem; + core::smart_refctd_ptr m_system; //! Destructor virtual ~CMitsubaLoader() = default; - +#if 0 // core::vector getMesh(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape); core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const core::matrix3x4SIMD& relTform); @@ -74,9 +69,11 @@ class CMitsubaLoader : public asset::ISceneLoader template core::smart_refctd_ptr createDS0(const SContext& _ctx, asset::ICPUPipelineLayout* _layout, const asset::material_compiler::CMaterialCompilerGLSLBackendCommon::result_t& _compResult, Iter meshBegin, Iter meshEnd); - - public: #endif + public: + //! Constructor + inline CMitsubaLoader(core::smart_refctd_ptr&& _system) : m_system(std::move(_system)) {} + bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger=nullptr) const override; inline const char** getAssociatedFileExtensions() const override diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h index b7c2a398cb..bf24e9d1ff 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h @@ -8,11 +8,11 @@ #include "nbl/asset/metadata/IAssetMetadata.h" #include "nbl/asset/ICPUImage.h" -#include "nbl/ext/MitsubaLoader/SContext.h" -#include "nbl/ext/MitsubaLoader/CElementEmitter.h" -#include "nbl/ext/MitsubaLoader/CElementIntegrator.h" -#include "nbl/ext/MitsubaLoader/CElementSensor.h" -#include "nbl/ext/MitsubaLoader/CElementShape.h" +//#include "nbl/ext/MitsubaLoader/SContext.h" +//#include "nbl/ext/MitsubaLoader/CElementEmitter.h" +//#include "nbl/ext/MitsubaLoader/CElementIntegrator.h" +//#include "nbl/ext/MitsubaLoader/CElementSensor.h" +//#include "nbl/ext/MitsubaLoader/CElementShape.h" namespace nbl::ext::MitsubaLoader @@ -27,6 +27,7 @@ class CMitsubaMetadata : public asset::IAssetMetadata public: std::string m_id; }; +#if 0 class CMesh : public asset::IMeshMetadata, public CID { public: @@ -45,27 +46,25 @@ class CMitsubaMetadata : public asset::IAssetMetadata CElementIntegrator m_integrator; core::vector m_sensors; } m_global; - - CMitsubaMetadata() : - IAssetMetadata(), m_metaPplnStorage(), m_semanticStorage(), m_metaPplnStorageIt(nullptr), - m_metaMeshStorage(), m_metaMeshInstanceStorage(), m_metaMeshInstanceAuxStorage(), - m_meshStorageIt(nullptr), m_instanceStorageIt(nullptr), m_instanceAuxStorageIt(nullptr) +#endif + inline CMitsubaMetadata() : IAssetMetadata()/*, m_metaMeshStorage(), m_metaMeshInstanceStorage(), m_metaMeshInstanceAuxStorage(), + m_meshStorageIt(nullptr), m_instanceStorageIt(nullptr), m_instanceAuxStorageIt(nullptr)*/ { } - _NBL_STATIC_INLINE_CONSTEXPR const char* LoaderName = "ext::MitsubaLoader::CMitsubaLoader"; - const char* getLoaderName() const override { return LoaderName; } - + constexpr static inline const char* LoaderName = "ext::MitsubaLoader::CMitsubaLoader"; + const char* getLoaderName() const override {return LoaderName;} +#if 0 //! inline const CMesh* getAssetSpecificMetadata(const asset::ICPUMesh* asset) const { const auto found = IAssetMetadata::getAssetSpecificMetadata(asset); return static_cast(found); } - +#endif private: friend class CMitsubaLoader; - +#if 0 meta_container_t m_metaMeshStorage; CMesh* m_meshStorageIt; @@ -106,6 +105,7 @@ class CMitsubaMetadata : public asset::IAssetMetadata return meta->m_instances.size(); } +#endif }; } diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index 981884554e..575fa037b2 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -8,7 +8,7 @@ #include "nbl/asset/interchange/IAssetLoader.h" //#include "nbl/ext/MitsubaLoader/CElementFactory.h" -//#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" +#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" #include "expat/lib/expat.h" @@ -17,21 +17,9 @@ namespace nbl::ext::MitsubaLoader { -class ParserLog -{ - public: - static inline void setLogger(const system::logger_opt_ptr& _logger) {logger=_logger;} - - /*prints this message: - Mitsuba loader error: - Invalid .xml file structure: message */ - static void invalidXMLFileStructure(const std::string& errorMessage); - - private: - static system::logger_opt_ptr logger; -}; - +class IElement; +// TODO: replace with common Class for Material Compiler V3 Node Pool template class ElementPool // similar to : public std::tuple...> { @@ -51,56 +39,68 @@ class ElementPool // similar to : public std::tuple...> class ParserManager { protected: + // TODO: need per-file/per-parse contexts and per-load (one shapegroup, one metadata, one stack, etc. - basically the members of `ParserManager` now) struct Context { + /*prints this message: + Mitsuba loader error: + Invalid .xml file structure: message */ + void invalidXMLFileStructure(const std::string& errorMessage) const; + + // + inline void killParseWithError(const std::string& message) const + { + invalidXMLFileStructure(message); + XML_StopParser(parser,false); + } + + system::path currentXMLDir; + // ParserManager* manager; + system::logger_opt_ptr logger; + // XML_Parser parser; - system::path currentXMLDir; }; public: -#if 0 //! Constructor - ParserManager(system::ISystem* _system, asset::IAssetLoader::IAssetLoaderOverride* _override) : - m_system(_system), m_override(_override), m_sceneDeclCount(0), - m_metadata(core::make_smart_refctd_ptr()) - { - } + inline ParserManager(system::ISystem* _system, asset::IAssetLoader::IAssetLoaderOverride* _override) : + propertyElements({ + "float", "string", "boolean", "integer", + "rgb", "srgb", "spectrum", "blackbody", + "point", "vector", + "matrix", "rotate", "translate", "scale", "lookat" + }), m_system(_system), m_override(_override), m_metadata(core::make_smart_refctd_ptr()) {} // static void elementHandlerStart(void* _data, const char* _el, const char** _atts); static void elementHandlerEnd(void* _data, const char* _el); - // - inline void killParseWithError(const Context& ctx, const std::string& message) - { - _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure(message); - XML_StopParser(ctx.parser, false); - } - bool parse(system::IFile* _file, const system::logger_opt_ptr& _logger); void parseElement(const Context& ctx, const char* _el, const char** _atts); void onEnd(const Context& ctx, const char* _el); +#if 0 // core::vector > shapegroups; - // +#endif + // note that its shared between per-file contexts core::smart_refctd_ptr m_metadata; private: // void processProperty(const Context& ctx, const char* _el, const char** _atts); - // + const core::unordered_set propertyElements; + // TODO: re-architect this and move into context so the PArserManager can be persistent system::ISystem* m_system; asset::IAssetLoader::IAssetLoaderOverride* m_override; // - uint32_t m_sceneDeclCount; - // - ElementPool< + uint32_t m_sceneDeclCount = 0; + // TODO: This leaks memory all over the place because destructors are not ran! + ElementPool objects; + CElementEmitter*/ + > objects; // aliases and names core::unordered_map handles; - /*stack of currently processed elements each element of index N is parent of the element of index N+1 the scene element is a parent of all elements of index 0 */ core::stack > elements; -#endif + friend class CElementFactory; }; diff --git a/src/nbl/ext/MitsubaLoader/CMakeLists.txt b/src/nbl/ext/MitsubaLoader/CMakeLists.txt index f321324b60..e4fc746049 100644 --- a/src/nbl/ext/MitsubaLoader/CMakeLists.txt +++ b/src/nbl/ext/MitsubaLoader/CMakeLists.txt @@ -38,7 +38,7 @@ set(NBL_EXT_MITSUBA_LOADER_SRC # CElementTexture.cpp # CElementEmitter.cpp # CElementFactory.cpp -# ParserUtil.cpp + ParserUtil.cpp CSerializedLoader.cpp CMitsubaLoader.cpp # CMitsubaMaterialCompilerFrontend.cpp diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index cb6c9fc365..6f75d0c110 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -298,15 +298,16 @@ bool CMitsubaLoader::isALoadableFileFormat(system::IFile* _file, const system::l asset::SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) { -// ParserManager parserManager(m_assetMgr->getFileSystem(),_override); -// if (!parserManager.parse(_file)) + ParserManager parserManager(m_system.get(),_override); + if (!parserManager.parse(_file,_params.logger)) return {}; -#if 0 - if (_params.loaderFlags & IAssetLoader::ELPF_LOAD_METADATA_ONLY) + + //if (_params.loaderFlags&IAssetLoader::ELPF_LOAD_METADATA_ONLY) { auto emptyScene = core::make_smart_refctd_ptr(); - return SAssetBundle(std::move(parserManager.m_metadata),{ std::move(emptyScene) }); + return SAssetBundle(std::move(parserManager.m_metadata),{std::move(emptyScene)}); } +#if 0 else { // diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 785fe622dc..4d21ea1f77 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h + #include "nbl/ext/MitsubaLoader/ParserUtil.h" #include "nbl/ext/MitsubaLoader/CElementFactory.h" @@ -9,21 +10,16 @@ #include -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ -system::logger_opt_ptr ParserLog::logger = nullptr; +namespace nbl::ext::MitsubaLoader +{ +using namespace nbl::system; -void ParserLog::invalidXMLFileStructure(const std::string& errorMessage) +void ParserManager::Context::invalidXMLFileStructure(const std::string& errorMessage) const { - std::string message = "Mitsuba loader error - Invalid .xml file structure: \'" - + errorMessage + '\''; + std::string message = "Mitsuba loader error - Invalid .xml file structure: \'" + errorMessage + '\''; - //ParserLog::logger.log(message, system::ILogger::E_LOG_LEVEL::ELL_ERROR); + logger.log(message,ILogger::E_LOG_LEVEL::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); } @@ -42,48 +38,53 @@ void ParserManager::elementHandlerEnd(void* _data, const char* _el) } - -bool ParserManager::parse(system::IFile* _file, const system::logger_opt_ptr& _logger) +bool ParserManager::parse(IFile* _file, const logger_opt_ptr& _logger) { XML_Parser parser = XML_ParserCreate(nullptr); if (!parser) { - _logger.log("Could not create XML Parser!", system::ILogger::E_LOG_LEVEL::ELL_ERROR); + _logger.log("Could not create XML Parser!",ILogger::E_LOG_LEVEL::ELL_ERROR); return false; } - XML_SetElementHandler(parser, elementHandlerStart, elementHandlerEnd); + XML_SetElementHandler(parser,elementHandlerStart,elementHandlerEnd); //from now data (instance of ParserData struct) will be visible to expat handlers - Context ctx = {this,parser,_file->getFileName().parent_path()/""}; - XML_SetUserData(parser, &ctx); + Context ctx = {_file->getFileName().parent_path()/"",this,_logger,parser}; + XML_SetUserData(parser,&ctx); + const size_t size = _file->getSize(); + const char* buff = reinterpret_cast(const_cast(_file)->getMappedPointer()); + if (!buff) + { + buff = reinterpret_cast(_NBL_ALIGNED_MALLOC(size,4096u)); + IFile::success_t success; + _file->read(success,const_cast(buff),0u,size); + if (!success) + { + _logger.log("Could read the file into XML Parser Buffer!",ILogger::E_LOG_LEVEL::ELL_ERROR); + return false; + } + } + XML_Status parseStatus = XML_Parse(parser,buff,size,0); + if (_file->getMappedPointer()!=buff) + _NBL_ALIGNED_FREE(const_cast(buff)); - char* buff = (char*)_NBL_ALIGNED_MALLOC(_file->getSize(), 4096u); - - system::future future; - _file->read(future, (void*)buff, 0u, _file->getSize()); - future.get(); - - XML_Status parseStatus = XML_Parse(parser, buff, _file->getSize(), 0); - _NBL_ALIGNED_FREE(buff); XML_ParserFree(parser); switch (parseStatus) { case XML_STATUS_ERROR: { - _logger.log("Parse status: XML_STATUS_ERROR", system::ILogger::E_LOG_LEVEL::ELL_ERROR); + _logger.log("Parse status: XML_STATUS_ERROR",ILogger::E_LOG_LEVEL::ELL_ERROR); return false; } break; case XML_STATUS_OK: - #ifdef _NBL_DEBUG - _logger.log("Parse status: XML_STATUS_OK", system::ILogger::E_LOG_LEVEL::ELL_INFO); - #endif + _logger.log("Parse status: XML_STATUS_OK",ILogger::E_LOG_LEVEL::ELL_INFO); break; case XML_STATUS_SUSPENDED: { - _logger.log("Parse status: XML_STATUS_SUSPENDED", system::ILogger::E_LOG_LEVEL::ELL_INFO); + _logger.log("Parse status: XML_STATUS_SUSPENDED",ILogger::E_LOG_LEVEL::ELL_INFO); return false; } break; @@ -92,33 +93,26 @@ bool ParserManager::parse(system::IFile* _file, const system::logger_opt_ptr& _l return true; } -static const core::unordered_set propertyElements = { - "float", "string", "boolean", "integer", - "rgb", "srgb", "spectrum", "blackbody", - "point", "vector", - "matrix", "rotate", "translate", "scale", "lookat" -}; - void ParserManager::parseElement(const Context& ctx, const char* _el, const char** _atts) { - if (core::strcmpi(_el, "scene") == 0) + if (core::strcmpi(_el, "scene")==0) { auto count = 0u; while (_atts && _atts[count]) { count++; } - if (count != 2u) + if (count!=2u) { - killParseWithError(ctx,"Wrong number of attributes for scene element"); + ctx.killParseWithError("Wrong number of attributes for scene element"); return; } - if (core::strcmpi(_atts[0], "version")) + if (core::strcmpi(_atts[0],"version")) { - ParserLog::invalidXMLFileStructure(std::string(_atts[0]) + " is not an attribute of scene element"); + ctx.invalidXMLFileStructure(std::string(_atts[0]) + " is not an attribute of scene element"); return; } - else if (core::strcmpi(_atts[1], "0.5.0")) + else if (core::strcmpi(_atts[1],"0.5.0")) { - ParserLog::invalidXMLFileStructure("Version " + std::string(_atts[1]) + " is unsupported"); + ctx.invalidXMLFileStructure("Version " + std::string(_atts[1]) + " is unsupported"); return; } m_sceneDeclCount++; @@ -127,26 +121,40 @@ void ParserManager::parseElement(const Context& ctx, const char* _el, const char if (m_sceneDeclCount==0u) { - killParseWithError(ctx,"there is no scene element"); + ctx.killParseWithError("there is no scene element"); return; } - if (core::strcmpi(_el, "include") == 0) + if (core::strcmpi(_el,"include")==0) { - system::ISystem::future_t> future; - bool validInput = m_system->createFile(future, ctx.currentXMLDir.string()+_atts[1], system::IFile::ECF_READ); - if (!validInput) // try global path - validInput = m_system->createFile(future, _atts[1], system::IFile::ECF_READ); - if (!validInput) + core::smart_refctd_ptr file; + auto tryOpen = [&](const system::path& path)->bool + { + for (auto i=0; i<2; i++) + { + ISystem::future_t> future; + auto flags = IFile::ECF_READ; + if (i==0) + flags |= IFile::ECF_MAPPABLE; + m_system->createFile(future,ctx.currentXMLDir/_atts[1],flags); + if (future.wait()) + future.acquire().move_into(file); + if (file) + return true; + } + return false; + }; + // first try as relative path, then as global + if (!tryOpen(ctx.currentXMLDir/_atts[1])) + if (!tryOpen(_atts[1])) { - ParserLog::invalidXMLFileStructure(std::string("Could not open include file: ") + _atts[1]); + ctx.invalidXMLFileStructure(std::string("Could not open include file: ")+_atts[1]); return; } - auto file = future.get(); - parse(file.get(), system::logger_opt_ptr(nullptr)); // TODO: fix + parse(file.get(),ctx.logger); return; } - +#if 0 if (propertyElements.find(_el)!=propertyElements.end()) { processProperty(ctx, _el, _atts); @@ -157,7 +165,7 @@ void ParserManager::parseElement(const Context& ctx, const char* _el, const char auto found = _map.find(_el); if (found==_map.end()) { - ParserLog::invalidXMLFileStructure(std::string("Could not process element ") + _el); + invalidXMLFileStructure(std::string("Could not process element ") + _el); elements.push({nullptr,""}); return; } @@ -171,37 +179,38 @@ void ParserManager::parseElement(const Context& ctx, const char* _el, const char elements.push(el); if (el.first && el.first->id.size()) handles[el.first->id] = el.first; +#endif } void ParserManager::processProperty(const Context& ctx, const char* _el, const char** _atts) { if (elements.empty()) { - killParseWithError(ctx,"cannot set a property with no element on the stack."); + ctx.killParseWithError("cannot set a property with no element on the stack."); return; } if (!elements.top().first) { - ParserLog::invalidXMLFileStructure("cannot set property on element that failed to be created."); + ctx.invalidXMLFileStructure("cannot set property on element that failed to be created."); return; } - auto optProperty = CPropertyElementManager::createPropertyData(_el, _atts); +#if 0 + auto optProperty = CPropertyElementManager::createPropertyData(_el,_atts); if (optProperty.first == false) { - ParserLog::invalidXMLFileStructure("could not create property data."); + invalidXMLFileStructure("could not create property data."); return; } elements.top().first->addProperty(std::move(optProperty.second)); - - return; +#endif } void ParserManager::onEnd(const Context& ctx, const char* _el) { - if (propertyElements.find(_el) != propertyElements.end()) + if (propertyElements.find(_el)!=propertyElements.end()) return; if (core::strcmpi(_el, "scene") == 0) @@ -209,11 +218,10 @@ void ParserManager::onEnd(const Context& ctx, const char* _el) m_sceneDeclCount--; return; } - +#if 0 if (elements.empty()) return; - auto element = elements.top(); elements.pop(); @@ -243,9 +251,8 @@ void ParserManager::onEnd(const Context& ctx, const char* _el) if (shape) shapegroups.emplace_back(shape,std::move(element.second)); } +#endif } -} -} } \ No newline at end of file From e3129939c8253ec04525bdb726578cfe61b754ac Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 09:33:03 +0200 Subject: [PATCH 057/472] cap kind args --- cmake/common.cmake | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 645837aaaa..b7df2690e2 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1210,7 +1210,7 @@ struct DeviceConfigCaps get_target_property(HEADER_RULE_GENERATED ${IMPL_TARGET} NBL_HEADER_GENERATED_RULE) if(NOT HEADER_RULE_GENERATED) - set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include") + set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include") set(INCLUDE_FILE "${INCLUDE_DIR}/$") set(INCLUDE_CONTENT $) @@ -1323,12 +1323,27 @@ namespace @IMPL_NAMESPACE@ { set(CAP_NAMES "") set(CAP_TYPES "") + set(CAP_KINDS "") if(HAS_CAPS) math(EXPR LAST_CAP "${CAPS_LENGTH} - 1") foreach(CAP_IDX RANGE 0 ${LAST_CAP}) + string(JSON CAP_KIND ERROR_VARIABLE CAP_TYPE_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} kind) string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} name) string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} type) + # -> TODO: improve validation, input should be string + if(CAP_TYPE_ERROR) + set(CAP_KIND limits) # I assume its limit by default (or when invalid value present, currently) + else() + if(NOT CAP_KIND MATCHES "^(limits|features)$") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP kind \"${CAP_KIND}\" for ${CAP_NAME}\n" + "Allowed kinds are: limits, features" + ) + endif() + endif() + # <- + if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t)$") ERROR_WHILE_PARSING_ITEM( "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" @@ -1366,6 +1381,7 @@ namespace @IMPL_NAMESPACE@ { set(CAP_VALUES_${CAP_IDX} "${VALUES}") list(APPEND CAP_NAMES "${CAP_NAME}") list(APPEND CAP_TYPES "${CAP_TYPE}") + list(APPEND CAP_KINDS "${CAP_KIND}") endforeach() endif() @@ -1405,12 +1421,16 @@ namespace @IMPL_NAMESPACE@ { ]=]) unset(RETVAL_EVAL) - foreach(CAP ${CAP_NAMES}) + list(LENGTH CAP_NAMES CAP_COUNT) + math(EXPR CAP_COUNT "${CAP_COUNT} - 1") + foreach(i RANGE ${CAP_COUNT}) + list(GET CAP_NAMES ${i} CAP) + list(GET CAP_KINDS ${i} KIND) string(CONFIGURE [=[ - retval += ".@CAP@_" + std::to_string(limits.@CAP@); + retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); ]=] RETVALUE_VIEW @ONLY) string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") - endforeach(CAP) + endforeach() string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") @@ -1461,6 +1481,7 @@ namespace @IMPL_NAMESPACE@ { list(GET CAP_NAMES ${CAP_INDEX} CURRENT_CAP) list(GET CAP_TYPES ${CAP_INDEX} CURRENT_TYPE) + list(GET CAP_KINDS ${CAP_INDEX} CURRENT_KIND) set(VAR_NAME "CAP_VALUES_${CAP_INDEX}") set(VALUES "${${VAR_NAME}}") From 25c4bdcd157d0cfc418d36bddbd4a688ea07c4ad Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 10:23:00 +0200 Subject: [PATCH 058/472] fix a bug after my NSC rules update --- cmake/common.cmake | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index b7df2690e2..92250a276b 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1422,15 +1422,18 @@ namespace @IMPL_NAMESPACE@ { ]=]) unset(RETVAL_EVAL) list(LENGTH CAP_NAMES CAP_COUNT) - math(EXPR CAP_COUNT "${CAP_COUNT} - 1") - foreach(i RANGE ${CAP_COUNT}) - list(GET CAP_NAMES ${i} CAP) - list(GET CAP_KINDS ${i} KIND) - string(CONFIGURE [=[ + if(CAP_COUNT GREATER 0) + math(EXPR LAST_CAP "${CAP_COUNT} - 1") + foreach(i RANGE ${LAST_CAP}) + list(GET CAP_NAMES ${i} CAP) + list(GET CAP_KINDS ${i} KIND) + string(CONFIGURE [=[ retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); -]=] RETVALUE_VIEW @ONLY) - string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") - endforeach() +]=] RETVALUE_VIEW @ONLY) + string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") + endforeach() + endif() + string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") From 3d18b585b6e3be2ea3e9c5b19527769738819c06 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 13:36:38 +0200 Subject: [PATCH 059/472] update examples_tests submodule, we will first make CI pass with IES shaders --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 2b4db21239..c3b463a33f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2b4db2123918f380cc0a35f6889315a02f84ea73 +Subproject commit c3b463a33f864ddb9f9b4099b660b313cad6ab5c From 27de67f1a1a749e3f119170ef26be6fb48c65822 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 15:31:58 +0200 Subject: [PATCH 060/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c3b463a33f..29f64a283b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c3b463a33f864ddb9f9b4099b660b313cad6ab5c +Subproject commit 29f64a283b2ccaef76f169b997a47647631d84c9 From 1ab734c0202dbcd9601739a7bd62900e268264cf Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 10 Oct 2025 18:05:01 +0200 Subject: [PATCH 061/472] remove global variables and rewrite `PropertyElement.h` and `PropertyElement.cpp` --- .../nbl/ext/MitsubaLoader/PropertyElement.h | 233 +++++++----- src/nbl/ext/MitsubaLoader/PropertyElement.cpp | 352 +++++++++--------- 2 files changed, 321 insertions(+), 264 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/PropertyElement.h b/include/nbl/ext/MitsubaLoader/PropertyElement.h index ac257bd4b3..f5a6e9266b 100644 --- a/include/nbl/ext/MitsubaLoader/PropertyElement.h +++ b/include/nbl/ext/MitsubaLoader/PropertyElement.h @@ -1,24 +1,28 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_PROPERTY_ELEMENT_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_PROPERTY_ELEMENT_H_INCLUDED_ -#ifndef __PROPERTY_ELEMENT_H_INCLUDED__ -#define __PROPERTY_ELEMENT_H_INCLUDED__ #include "nbl/core/declarations.h" -#include "matrix4SIMD.h" -#include +#include "nbl/builtin/hlsl/cpp_compat.hlsl" -namespace nbl -{ -namespace ext + +namespace nbl::ext::MitsubaLoader { -namespace MitsubaLoader +// maybe move somewhere +inline void invalidXMLFileStructure(system::logger_opt_ptr logger, const std::string& errorMessage) { + std::string message = "Mitsuba loader error - Invalid .xml file structure: \'" + errorMessage + '\''; + logger.log(message,system::ILogger::E_LOG_LEVEL::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); +} struct SPropertyElementData { - enum Type + // TODO: enum class, and smaller type + enum Type : uint32_t { FLOAT, INTEGER, @@ -37,14 +41,12 @@ struct SPropertyElementData VECTOR, INVALID }; - - static const core::unordered_map StringToType; - _NBL_STATIC_INLINE_CONSTEXPR uint32_t MaxAttributes = 5u; - static const char* attributeStrings[Type::INVALID][MaxAttributes]; + // + constexpr static inline uint32_t MaxAttributes = 5u; inline SPropertyElementData() : type(Type::INVALID) { - std::fill(mvalue.pointer(), mvalue.pointer() + 16, 0.f); + memset(&fvalue,0,sizeof(mvalue)); } inline SPropertyElementData(const SPropertyElementData& other) : SPropertyElementData() { @@ -54,17 +56,11 @@ struct SPropertyElementData { operator=(std::move(other)); } - inline SPropertyElementData(const std::string& _type) : SPropertyElementData() - { - auto found = StringToType.find(_type); - if (found != StringToType.end()) - type = found->second; - } inline explicit SPropertyElementData(float value) : type(FLOAT) { fvalue = value; } inline explicit SPropertyElementData(int32_t value) : type(INTEGER) { ivalue = value; } inline explicit SPropertyElementData(bool value) : type(BOOLEAN) { bvalue = value; } //explicit SPropertyElementData(const std::string& value) : type(STRING) { #error } - inline explicit SPropertyElementData(Type _type, const core::vectorSIMDf& value): type(INVALID) + inline explicit SPropertyElementData(Type _type, const hlsl::float32_t4& value): type(INVALID) { switch (_type) { @@ -80,9 +76,9 @@ struct SPropertyElementData break; }; } - ~SPropertyElementData() + inline ~SPropertyElementData() { - if (type == Type::STRING) + if (type==Type::STRING) _NBL_ALIGNED_FREE((void*)svalue); } @@ -129,7 +125,7 @@ struct SPropertyElementData mvalue = other.mvalue; break; default: - std::fill(mvalue.pointer(), mvalue.pointer()+16, 0.f); + memset(&fvalue,0,sizeof(mvalue)); break; } return *this; @@ -171,17 +167,19 @@ struct SPropertyElementData mvalue = other.mvalue; break; default: - std::fill(other.mvalue.pointer(), other.mvalue.pointer() + 16, 0.f); + memset(&fvalue,0,sizeof(mvalue)); break; } return *this; } - + // TODO: enum class on the template param + template + struct get_type; template - struct get_typename; + using get_type_t = typename get_type::type; template - const typename get_typename::type& getProperty() const; + const get_type_t& getProperty() const; inline uint8_t getVectorDimension() const { @@ -201,63 +199,85 @@ struct SPropertyElementData int32_t ivalue; bool bvalue; const char* svalue; - core::vectorSIMDf vvalue; // rgb, srgb, vector, point - core::matrix4SIMD mvalue; // matrix, translate, rotate, scale, lookat + hlsl::float32_t4 vvalue; // rgb, srgb, vector, point + hlsl::float32_t4x4 mvalue; // matrix, translate, rotate, scale, lookat }; }; struct SNamedPropertyElement : SPropertyElementData { - SNamedPropertyElement() : SPropertyElementData(), name("") + inline SNamedPropertyElement() : SPropertyElementData(), name("") { } - SNamedPropertyElement(const std::string& _type) : SNamedPropertyElement() - { - auto found = SPropertyElementData::StringToType.find(_type); - if (found != SPropertyElementData::StringToType.end()) - type = found->second; - } - SNamedPropertyElement(const SNamedPropertyElement& other) : SNamedPropertyElement() + inline SNamedPropertyElement(const SNamedPropertyElement& other) : SNamedPropertyElement() { SNamedPropertyElement::operator=(other); } - SNamedPropertyElement(SNamedPropertyElement&& other) : SNamedPropertyElement() + inline SNamedPropertyElement(SNamedPropertyElement&& other) : SNamedPropertyElement() { SNamedPropertyElement::operator=(std::move(other)); } - bool initialize(const char** _atts, const char** outputMatch) + inline bool initialize(const char** _atts, const char** outputMatch) { - if (type == Type::INVALID || !_atts) + if (type==Type::INVALID || !_atts) return false; - for (auto it = _atts; *it; it++) + constexpr const char* AttributeStrings[SPropertyElementData::Type::INVALID][SPropertyElementData::MaxAttributes] = { + {"value"}, // FLOAT + {"value"}, // INTEGER + {"value"}, // BOOLEAN + {"value"}, // STRING + {"value","intent"}, // RGB + {"value","intent"}, // SRGB + {"value","intent","filename"}, // SPECTRUM + {"temperature","scale"}, // BLACKBODY + {"value"}, // MATRIX + {"x","y","z"}, // TRANSLATE + {"angle","x","y","z"}, // ROTATE + {"value","x","y","z"}, // SCALE + {"origin","target","up"}, // LOOKAT + {"x","y","z"}, // POINT + {"x","y","z","w"} // VECTOR + }; + // TODO: some magical constexpr thing to count up + //constexpr size_t AttributeCount[SPropertyElementData::Type::INVALID][SPropertyElementData::MaxAttributes] = {}; + + for (auto it=_atts; *it; it++) { - if (core::strcmpi(*it, "name") == 0) + // found the name attribute + if (core::strcmpi(*it,"name") == 0) { + // value follows the attribute name it++; if (*it) { + // next attribute is the actual name, first is just the `name=` name = *it; continue; } - else + else // no name present e.g. `name=""` return false; } - for (auto i = 0u; i < SPropertyElementData::MaxAttributes; i++) + // now go through the expected attributes + for (auto i=0u; i struct SPropertyElementData::get_typename +template<> struct SPropertyElementData::get_type { using type = float; }; -template<> struct SPropertyElementData::get_typename +template<> struct SPropertyElementData::get_type { using type = int32_t; }; -template<> struct SPropertyElementData::get_typename +template<> struct SPropertyElementData::get_type { using type = bool; }; -template<> struct SPropertyElementData::get_typename +template<> struct SPropertyElementData::get_type { using type = const char*; }; -template<> struct SPropertyElementData::get_typename -{ using type = core::vectorSIMDf; }; -template<> struct SPropertyElementData::get_typename -{ using type = core::vectorSIMDf; }; -template<> struct SPropertyElementData::get_typename -{ using type = core::vectorSIMDf; }; -template<> struct SPropertyElementData::get_typename -{ using type = core::vectorSIMDf; }; -template<> struct SPropertyElementData::get_typename -{ using type = core::vectorSIMDf; }; -template<> struct SPropertyElementData::get_typename +template<> struct SPropertyElementData::get_type +{ using type = hlsl::float32_t4; }; +template<> struct SPropertyElementData::get_type +{ using type = hlsl::float32_t4; }; +template<> struct SPropertyElementData::get_type +{ using type = hlsl::float32_t4; }; +template<> struct SPropertyElementData::get_type +{ using type = hlsl::float32_t4; }; +template<> struct SPropertyElementData::get_type +{ using type = hlsl::float32_t4; }; +template<> struct SPropertyElementData::get_type { using type = void; }; -template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; -template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; -template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; -template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; -template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; -template<> struct SPropertyElementData::get_typename +template<> struct SPropertyElementData::get_type +{ using type = hlsl::float32_t4x4; }; +template<> struct SPropertyElementData::get_type +{ using type = hlsl::float32_t4x4; }; +template<> struct SPropertyElementData::get_type +{ using type = hlsl::float32_t4x4; }; +template<> struct SPropertyElementData::get_type +{ using type = hlsl::float32_t4x4; }; +template<> struct SPropertyElementData::get_type +{ using type = hlsl::float32_t4x4; }; +template<> struct SPropertyElementData::get_type { using type = void; }; +// TODO: rewrite rest to be less `::` verbose +template<> auto SPropertyElementData::getProperty() const -> const get_type_t& +{ return fvalue; } +template<> auto SPropertyElementData::getProperty() const -> const get_type_t& +{ return ivalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return bvalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return svalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return vvalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return vvalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return vvalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return vvalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return vvalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return mvalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return mvalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return mvalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return mvalue; } +template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +{ return mvalue; } -class CPropertyElementManager +class CPropertyElementManager final { - public: - static std::pair createPropertyData(const char* _el, const char** _atts); + const core::unordered_map StringToType; - static bool retrieveBooleanValue(const std::string& _data, bool& success); - static core::matrix4SIMD retrieveMatrix(const std::string& _data, bool& success); - static core::vectorSIMDf retrieveVector(const std::string& _data, bool& success); - static core::vectorSIMDf retrieveHex(const std::string& _data, bool& success); + static std::optional retrieveBooleanValue(const std::string_view& _data, system::logger_opt_ptr logger); + static hlsl::float32_t4x4 retrieveMatrix(const std::string_view& _data, system::logger_opt_ptr logger); + static hlsl::float32_t4 retrieveVector(const std::string_view& _data, system::logger_opt_ptr logger); + static hlsl::float32_t4 retrieveHex(const std::string_view& _data, system::logger_opt_ptr logger); + + public: + CPropertyElementManager(); +#if 0 + inline SPropertyElementData(const std::string& _type) : SPropertyElementData() + { + auto found = StringToType.find(_type); + if (found != StringToType.end()) + type = found->second; + } + SNamedPropertyElement(const std::string& _type) : SNamedPropertyElement() + { + auto found = SPropertyElementData::StringToType.find(_type); + if (found != SPropertyElementData::StringToType.end()) + type = found->second; + } +#endif + std::optional createPropertyData(const char* _el, const char** _atts, system::logger_opt_ptr logger) const; }; } -} -} - #endif \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/PropertyElement.cpp b/src/nbl/ext/MitsubaLoader/PropertyElement.cpp index 66f0e40343..d6144b6919 100644 --- a/src/nbl/ext/MitsubaLoader/PropertyElement.cpp +++ b/src/nbl/ext/MitsubaLoader/PropertyElement.cpp @@ -2,51 +2,23 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "quaternion.h" -#include "matrix3x4SIMD.h" -#include "matrix4SIMD.h" -#include "nbl/asset/format/decodePixels.h" + +//#include "quaternion.h" +//#include "matrix3x4SIMD.h" +//#include "matrix4SIMD.h" +//#include "nbl/asset/format/decodePixels.h" #include "nbl/ext/MitsubaLoader/PropertyElement.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ +#include "nbl/builtin/hlsl/math/linalg/transform.hlsl" +#include "glm/gtc/matrix_transform.hpp" -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return fvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return ivalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return bvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return svalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return vvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return vvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return vvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return vvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return vvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return mvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return mvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return mvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return mvalue; } -template<> const typename SPropertyElementData::get_typename::type& SPropertyElementData::getProperty() const -{ return mvalue; } -const core::unordered_map SPropertyElementData::StringToType = { +namespace nbl::ext::MitsubaLoader +{ + +CPropertyElementManager::CPropertyElementManager() : StringToType({ {"float", SPropertyElementData::Type::FLOAT}, {"integer", SPropertyElementData::Type::INTEGER}, {"boolean", SPropertyElementData::Type::BOOLEAN}, @@ -62,38 +34,31 @@ const core::unordered_map CPropertyElementManager::createPropertyData(const char* _el, const char** _atts) +std::optional CPropertyElementManager::createPropertyData(const char* _el, const char** _atts, system::logger_opt_ptr logger) const { - SNamedPropertyElement result(_el); + SNamedPropertyElement result = {}; + auto found = StringToType.find(_el); + if (found!=StringToType.end()) + result.type = found->second; + // initialization returns strings from `_atts` which match expected attributes const char* desiredAttributes[SPropertyElementData::MaxAttributes] = { nullptr }; - if (!result.initialize(_atts, desiredAttributes)) + if (!result.initialize(_atts,desiredAttributes)) { - _NBL_DEBUG_BREAK_IF(true); - return std::make_pair(false, SNamedPropertyElement()); + invalidXMLFileStructure(logger,"Failed to Intialize Named Property Element."); + return {}; } - bool success = true; - #define FAIL_IF_ATTRIBUTE_NULL(N) if (!desiredAttributes[N]) {success = false; break;} + auto printFailure = [&](const uint8_t attrId)->void{invalidXMLFileStructure(logger,"invalid element, name:\'"+result.name+"\' value:\'"+desiredAttributes[attrId]+"\'");}; + + #define FAIL_IF_ATTRIBUTE_NULL(N) if (!desiredAttributes[N]) \ + { \ + invalidXMLFileStructure(logger,"Invalid element, name:\'"+result.name+"\' Attribute #"+std::to_string(N)+"not found"); \ + return {}; \ + } switch (result.type) { case SPropertyElementData::Type::FLOAT: @@ -106,35 +71,52 @@ std::pair CPropertyElementManager::createPropertyDa break; case SPropertyElementData::Type::BOOLEAN: FAIL_IF_ATTRIBUTE_NULL(0u) - result.bvalue = retrieveBooleanValue(desiredAttributes[0],success); + if (auto ret=retrieveBooleanValue(desiredAttributes[0],logger); ret.has_value()) + result.bvalue = ret.value(); + else + { + printFailure(0); + return {}; + } break; case SPropertyElementData::Type::STRING: FAIL_IF_ATTRIBUTE_NULL(0u) { auto len = strlen(desiredAttributes[0]); - auto* tmp = (char*)_NBL_ALIGNED_MALLOC(len + 1u, 64u); - strcpy(tmp, desiredAttributes[0]); tmp[len] = 0; + auto* tmp = (char*)_NBL_ALIGNED_MALLOC(len+1u,64u); + strcpy(tmp,desiredAttributes[0]); tmp[len]=0; result.svalue = tmp; } break; case SPropertyElementData::Type::RGB: FAIL_IF_ATTRIBUTE_NULL(0u) - result.vvalue = retrieveVector(desiredAttributes[0], success); + result.vvalue = retrieveVector(desiredAttributes[0],logger); + if (core::isnan(result.vvalue[0])) + { + printFailure(0); + return {}; + } break; case SPropertyElementData::Type::SRGB: FAIL_IF_ATTRIBUTE_NULL(0u) { - bool tryVec = true; - result.vvalue = retrieveVector(desiredAttributes[0], tryVec); - if (!tryVec) - result.vvalue = retrieveHex(desiredAttributes[0], success); + result.vvalue = retrieveVector(desiredAttributes[0],logger); + if (core::isnan(result.vvalue[0])) + { + result.vvalue = retrieveHex(desiredAttributes[0],logger); + if (core::isnan(result.vvalue[0])) + { + printFailure(0); + return {}; + } + } for (auto i=0; i<3u; i++) result.vvalue[i] = core::srgb2lin(result.vvalue[i]); result.type = SPropertyElementData::Type::RGB; // now its an RGB value } break; case SPropertyElementData::Type::VECTOR: - result.vvalue.set(core::nan(),core::nan(),core::nan(),core::nan()); + result.vvalue = hlsl::float32_t4(core::nan()); for (auto i=0u; i<4u; i++) { if (desiredAttributes[i]) @@ -144,31 +126,46 @@ std::pair CPropertyElementManager::createPropertyDa // once a component is missing, the rest need to be missing too for (auto j=i+1; j<4u; j++) if (desiredAttributes[j]) - success = false; + { + printFailure(0); + return {}; + } break; } } break; case SPropertyElementData::Type::POINT: - result.vvalue.set(0.f, 0.f, 0.f); + result.vvalue = hlsl::float32_t4(0.f,0.f,0.f,core::nan()); for (auto i=0u; i<3u; i++) { if (desiredAttributes[i]) result.vvalue[i] = atof(desiredAttributes[i]); else { - success = false; - break; + printFailure(0); + return {}; } } break; case SPropertyElementData::Type::SPECTRUM: - assert(!desiredAttributes[1]); // no intent, TODO - assert(!desiredAttributes[2]); // does not come from a file + if (desiredAttributes[1]||desiredAttributes[2]) { - std::string data(desiredAttributes[0]); - assert(data.find(':')==std::string::npos); // no hand specified wavelengths - result.vvalue = retrieveVector(data,success); // TODO: convert between mitsuba spectral buckets and Rec. 709 + invalidXMLFileStructure(logger,"Spectrum intent and loading from file unsupported!"); + return {}; + } + { + std::string_view data(desiredAttributes[0]); + if (data.find(':')!=std::string::npos) + { + invalidXMLFileStructure(logger,"Manually specified wavelengths for spectral curve knots are unsupported!"); + return {}; + } + result.vvalue = retrieveVector(data,logger); // TODO: convert between mitsuba spectral buckets and Rec. 709 + if (core::isnan(result.vvalue[0])) + { + printFailure(0); + return {}; + } } break; case SPropertyElementData::Type::BLACKBODY: @@ -176,66 +173,78 @@ std::pair CPropertyElementManager::createPropertyDa break; case SPropertyElementData::Type::MATRIX: FAIL_IF_ATTRIBUTE_NULL(0u) - result.mvalue = retrieveMatrix(desiredAttributes[0],success); + result.mvalue = retrieveMatrix(desiredAttributes[0],logger); + if (core::isnan(result.mvalue[0][0])) + { + printFailure(0); + return {}; + } break; case SPropertyElementData::Type::TRANSLATE: - result.vvalue.set(0.f, 0.f, 0.f); + result.mvalue = hlsl::float32_t4x4(1.f); + // we're a bit more lax about what items we need present for (auto i=0u; i<3u; i++) if (desiredAttributes[i]) - result.vvalue[i] = atof(desiredAttributes[i]); - { - core::matrix3x4SIMD m; - m.setTranslation(result.vvalue); - result.mvalue = core::matrix4SIMD(m); - } + result.mvalue[i][3] = atof(desiredAttributes[i]); break; case SPropertyElementData::Type::ROTATE: FAIL_IF_ATTRIBUTE_NULL(0u) // have to have an angle - result.vvalue.set(0.f, 0.f, 0.f); - for (auto i=0u; i<3u; i++) - if (desiredAttributes[i+1]) - result.vvalue[i] = atof(desiredAttributes[i+1]); - if ((core::vectorSIMDf(0.f) == result.vvalue).all()) + result.mvalue = hlsl::float32_t4x4(1.f); { - success = false; - break; - } - result.vvalue = core::normalize(result.vvalue); - { - core::matrix3x4SIMD m; - m.setRotation(core::quaternion::fromAngleAxis(core::radians(atof(desiredAttributes[0])),result.vvalue)); - result.mvalue = core::matrix4SIMD(m); + auto axis = hlsl::float32_t3(0.f); + // again some laxness + for (auto i=0u; i<3u; i++) + if (desiredAttributes[i+1]) + axis[i] = atof(desiredAttributes[i+1]); + axis = hlsl::normalize(axis); + if (core::isnan(axis.x)) + { + invalidXMLFileStructure(logger,"Invalid element, name:\'"+result.name+"\' Axis can't be (0,0,0)"); + return {}; + } + using namespace nbl::hlsl::math;//::linalg; + result.mvalue = linalg::promote_affine<4,4>(linalg::rotation_mat(hlsl::radians(atof(desiredAttributes[0])),axis)); } break; case SPropertyElementData::Type::SCALE: - result.vvalue.set(1.f, 1.f, 1.f); + result.mvalue = hlsl::float32_t4x4(1.f); if (desiredAttributes[0]) { - float uniformScale = atof(desiredAttributes[0]); - result.vvalue.set(uniformScale, uniformScale, uniformScale); + const float uniformScale = atof(desiredAttributes[0]); + for (auto i=0u; i<3u; i++) + result.mvalue[i][i] = uniformScale; } else - for (auto i=0u; i<3u; i++) - if (desiredAttributes[i+1u]) - result.vvalue[i] = atof(desiredAttributes[i+1u]); { - core::matrix3x4SIMD m; - m.setScale(result.vvalue); - result.mvalue = core::matrix4SIMD(m); + for (auto i=0u; i<3u; i++) + if (desiredAttributes[i+1u]) + result.mvalue[i][i] = atof(desiredAttributes[i+1u]); } break; case SPropertyElementData::Type::LOOKAT: FAIL_IF_ATTRIBUTE_NULL(0u) FAIL_IF_ATTRIBUTE_NULL(1u) + result.mvalue = hlsl::float32_t4x4(1.f); { - core::vectorSIMDf origin,target,up; - origin = retrieveVector(desiredAttributes[0u], success); - target = retrieveVector(desiredAttributes[1u], success); + const hlsl::float32_t3 origin = retrieveVector(desiredAttributes[0u],logger).xyz; + if (core::isnan(origin.x)) + { + printFailure(0); + return {}; + } + const hlsl::float32_t3 target = retrieveVector(desiredAttributes[1u],logger).xyz; + if (core::isnan(target.x)) + { + printFailure(1); + return {}; + } + auto up = hlsl::float32_t3(core::nan()); if (desiredAttributes[2u]) - up = retrieveVector(desiredAttributes[2u],success); - else + up = retrieveVector(desiredAttributes[2u],logger).xyz; + if (core::isnan(up.x)) { - auto viewDirection = target - origin; + up = hlsl::float32_t3(0.f); + const auto viewDirection = target - origin; float maxDot = viewDirection[0]; uint32_t index = 0u; for (auto i = 1u; i < 3u; i++) @@ -247,74 +256,65 @@ std::pair CPropertyElementManager::createPropertyDa up[index] = 1.f; } // mitsuba understands look-at and right-handed camera little bit differently than I do - core::matrix4SIMD(core::matrix3x4SIMD::buildCameraLookAtMatrixLH(origin,target,up)).getInverseTransform(result.mvalue); + const auto actualLookAt = reinterpret_cast(glm::lookAtLH(origin,target,up)); + result.mvalue = hlsl::inverse(actualLookAt); } break; default: - success = false; - break; + invalidXMLFileStructure(logger,"Unsupported element type, name:\'"+result.name+"\'"); + return {}; } + #undef FAIL_IF_ATTRIBUTE_NULL - _NBL_DEBUG_BREAK_IF(!success); - if (success) - return std::make_pair(true, std::move(result)); - - ParserLog::invalidXMLFileStructure("invalid element, name:\'" + result.name + "\'"); // in the future print values - return std::make_pair(false, SNamedPropertyElement()); + return result; } -bool CPropertyElementManager::retrieveBooleanValue(const std::string& _data, bool& success) +std::optional CPropertyElementManager::retrieveBooleanValue(const std::string_view& _data, system::logger_opt_ptr logger) { - if (_data == "true") - { + if (_data=="true") return true; - } - else if (_data == "false") - { + else if (_data=="false") return false; - } else { - _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("Invalid boolean specified."); - success = false; - return false; // so GCC doesn't moan + invalidXMLFileStructure(logger,"Invalid boolean specified."); + return {}; } } -core::matrix4SIMD CPropertyElementManager::retrieveMatrix(const std::string& _data, bool& success) +hlsl::float32_t4x4 CPropertyElementManager::retrieveMatrix(const std::string_view& _data, system::logger_opt_ptr logger) { - std::string str = _data; - std::replace(str.begin(), str.end(), ',', ' '); + std::string str(_data); + std::replace(str.begin(),str.end(),',',' '); - core::matrix4SIMD matrixData; + hlsl::float32_t4x4 matrixData; std::stringstream ss; ss << str; - for (auto i=0u; i<16u; i++) + for (auto r=0u; r<16u; r++) + for (auto c=0u; c<16u; c++) { float f = std::numeric_limits::quiet_NaN(); ss >> f; - if (isnan(f)) + if (core::isnan(f)) { - _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("Invalid matrix specified."); - success = false; - return core::matrix4SIMD(); + invalidXMLFileStructure(logger,"Invalid matrix specified."); + matrixData[0][0] = f; + return matrixData; } - matrixData.pointer()[i] = f; + matrixData[r][c] = f; } return matrixData; } -core::vectorSIMDf CPropertyElementManager::retrieveVector(const std::string& _data, bool& success) +hlsl::float32_t4 CPropertyElementManager::retrieveVector(const std::string_view& _data, system::logger_opt_ptr logger) { - std::string str = _data; + std::string str(_data); std::replace(str.begin(), str.end(), ',', ' '); - float vectorData[4]; + hlsl::float32_t4 retval; std::stringstream ss; ss << str; @@ -323,55 +323,49 @@ core::vectorSIMDf CPropertyElementManager::retrieveVector(const std::string& _da float f = std::numeric_limits::quiet_NaN(); ss >> f; - vectorData[i] = f; + retval[i] = f; if (isnan(f)) { - if (i == 1) + if (i==1) // second not present { - vectorData[2] = vectorData[1] = vectorData[0]; - vectorData[3] = 0.0f; - break; + // make monochrome RGB or scalar XYZ + retval[2] = retval[1] = retval[0]; + retval[3] = 0.0f; } - else if (i == 3) + else if (i==3) // last not present { - vectorData[3] = 0.0f; - break; + // allow last coordinate to be 0 + retval[3] = 0.0f; } - success = false; - return core::vectorSIMDf(); + return retval; } } - return core::vectorSIMDf(vectorData); + return retval; } -core::vectorSIMDf CPropertyElementManager::retrieveHex(const std::string& _data, bool& success) +hlsl::float32_t4 CPropertyElementManager::retrieveHex(const std::string_view& _data, system::logger_opt_ptr logger) { - core::vectorSIMDf zero; auto ptr = _data.begin(); + const auto invalid = hlsl::float32_t4(std::numeric_limits::quiet_NaN()); + // not a hex if (_data.size()!=7u || *ptr!='#') - { - success = false; - return zero; - } + return invalid; - core::vectorSIMDf retval(0.f, 0.f, 0.f, 255.f); - for (auto i = 0; i < 3; i++) - for (auto j = 4; j >=0;j-=4) + hlsl::float32_t4 retval(0.f, 0.f, 0.f, 255.f); + for (auto i=0; i<3; i++) + for (auto j=4; j>=0;j-=4) { char c = *(++ptr); if (!isxdigit(c)) - { - success = false; - return zero; - } + return invalid; + // case insensitiveness int intval = (c >= 'A') ? (c - 'A' + 10) : (c - '0'); - retval[i] += float(intval < Date: Tue, 14 Oct 2025 17:14:54 +0200 Subject: [PATCH 062/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 29f64a283b..59a996222d 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 29f64a283b2ccaef76f169b997a47647631d84c9 +Subproject commit 59a996222da2232348a1a9e31c4484161b340fb0 From 99c5f706eeac2ec1152e5e754f9abd112ec774c5 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 14 Oct 2025 17:39:25 +0200 Subject: [PATCH 063/472] get rid of `CElementFactory`, rewrite ParserUtil to be threadsafe and DLL Delay Load compatible --- .../nbl/ext/MitsubaLoader/CElementFactory.h | 41 --- include/nbl/ext/MitsubaLoader/IElement.h | 18 +- include/nbl/ext/MitsubaLoader/ParserUtil.h | 164 ++++++----- .../nbl/ext/MitsubaLoader/PropertyElement.h | 2 +- src/nbl/ext/MitsubaLoader/CElementFactory.cpp | 80 ------ src/nbl/ext/MitsubaLoader/CMakeLists.txt | 34 ++- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 262 ++++++++++++------ 7 files changed, 288 insertions(+), 313 deletions(-) delete mode 100644 include/nbl/ext/MitsubaLoader/CElementFactory.h delete mode 100644 src/nbl/ext/MitsubaLoader/CElementFactory.cpp diff --git a/include/nbl/ext/MitsubaLoader/CElementFactory.h b/include/nbl/ext/MitsubaLoader/CElementFactory.h deleted file mode 100644 index 7543504b1d..0000000000 --- a/include/nbl/ext/MitsubaLoader/CElementFactory.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __I_ELEMENT_FACTORY_H_INCLUDED__ -#define __I_ELEMENT_FACTORY_H_INCLUDED__ - -#include "nbl/ext/MitsubaLoader/CElementSensor.h" -#include "nbl/ext/MitsubaLoader/CElementIntegrator.h" -#include "nbl/ext/MitsubaLoader/CElementShape.h" - -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ - -class ParserManager; - -class CElementFactory -{ - public: - using return_type = std::pair; - using element_creation_func = return_type(*)(const char**, ParserManager*); - const static core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> createElementTable; - - //constructs certain elements based on element's name and its attributes - template - static return_type createElement(const char** _atts, ParserManager* _util); - // - static return_type processAlias(const char** _atts, ParserManager* _util); - static return_type processRef(const char** _atts, ParserManager* _util); -}; - - -} -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/IElement.h b/include/nbl/ext/MitsubaLoader/IElement.h index 8f6fa24ea7..421e44f0f1 100644 --- a/include/nbl/ext/MitsubaLoader/IElement.h +++ b/include/nbl/ext/MitsubaLoader/IElement.h @@ -1,22 +1,15 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_I_ELEMENT_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_I_ELEMENT_H_INCLUDED_ -#ifndef __I_ELEMENT_H_INCLUDED__ -#define __I_ELEMENT_H_INCLUDED__ #include "nbl/asset/interchange/IAssetLoader.h" #include "nbl/ext/MitsubaLoader/PropertyElement.h" - -namespace nbl -{ -namespace ext +namespace nbl::ext::MitsubaLoader { -namespace MitsubaLoader -{ - - class CMitsubaMetadata; class IElement @@ -114,7 +107,4 @@ class IElement }; } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index 575fa037b2..c0e8aba0cd 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -7,7 +7,8 @@ #include "nbl/asset/interchange/IAssetLoader.h" -//#include "nbl/ext/MitsubaLoader/CElementFactory.h" +#include "nbl/ext/MitsubaLoader/PropertyElement.h" +#include "nbl/ext/MitsubaLoader/CElementShape.h" #include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" #include "expat/lib/expat.h" @@ -36,89 +37,112 @@ class ElementPool // similar to : public std::tuple...> }; //struct, which will be passed to expat handlers as user data (first argument) see: XML_StartElementHandler or XML_EndElementHandler in expat.h -class ParserManager +class ParserManager final { - protected: - // TODO: need per-file/per-parse contexts and per-load (one shapegroup, one metadata, one stack, etc. - basically the members of `ParserManager` now) - struct Context + public: + //! Constructor + ParserManager(); + + // + static void elementHandlerStart(void* _data, const char* _el, const char** _atts); + static void elementHandlerEnd(void* _data, const char* _el); + + struct Params + { + system::logger_opt_ptr logger; + // for opening included XML files + system::ISystem* system; + asset::IAssetLoader::IAssetLoaderOverride* _override; + }; + struct Result + { + explicit inline operator bool() const {return bool(metadata);} + + // note that its shared between per-file contexts + core::smart_refctd_ptr metadata = nullptr; + // + core::vector > shapegroups = {}; + }; + Result parse(system::IFile* _file, const Params& _params) const; + + // Properties are simple XML nodes which are not `IElement` and neither children of an` IElement` + // If we match any ` propertyElements; + const CPropertyElementManager propertyElementManager; + + private: + struct SNamedElement { - /*prints this message: - Mitsuba loader error: - Invalid .xml file structure: message */ - void invalidXMLFileStructure(const std::string& errorMessage) const; + IElement* element = nullptr; + core::string name = {}; + }; + // the XMLs can include each other, so this stores the stuff across files + struct SessionContext + { + // prints this message: + // Mitsuba loader error: + // Invalid .xml file structure: message + inline void invalidXMLFileStructure(const std::string& errorMessage) const + { + ::nbl::ext::MitsubaLoader::invalidXMLFileStructure(params->logger,errorMessage); + } + // meant for parsing one file in an include chain + bool parse(system::IFile* _file); + Result* const result; + const Params* const params; + const ParserManager* const manager; + // + uint32_t sceneDeclCount = 0; + // TODO: This leaks memory all over the place because destructors are not ran! + ElementPool objects = {}; + // aliases and names (in Mitsbua XML you can give nodes names and `ref` them) + core::unordered_map handles = {}; + // stack of currently processed elements, each element of index N is parent of the element of index N+1 + // the scene element is a parent of all elements of index 0 + core::stack elements = {}; + }; + // This is for a single XML File + struct XMLContext + { // inline void killParseWithError(const std::string& message) const { - invalidXMLFileStructure(message); + session->invalidXMLFileStructure(message); XML_StopParser(parser,false); } + void parseElement(const char* _el, const char** _atts); + void onEnd(const char* _el); - system::path currentXMLDir; + SessionContext* const session; // - ParserManager* manager; - system::logger_opt_ptr logger; + const system::path currentXMLDir; // XML_Parser parser; }; - - public: - //! Constructor - inline ParserManager(system::ISystem* _system, asset::IAssetLoader::IAssetLoaderOverride* _override) : - propertyElements({ - "float", "string", "boolean", "integer", - "rgb", "srgb", "spectrum", "blackbody", - "point", "vector", - "matrix", "rotate", "translate", "scale", "lookat" - }), m_system(_system), m_override(_override), m_metadata(core::make_smart_refctd_ptr()) {} - - // - static void elementHandlerStart(void* _data, const char* _el, const char** _atts); - static void elementHandlerEnd(void* _data, const char* _el); - - bool parse(system::IFile* _file, const system::logger_opt_ptr& _logger); - - void parseElement(const Context& ctx, const char* _el, const char** _atts); - - void onEnd(const Context& ctx, const char* _el); - -#if 0 - // - core::vector > shapegroups; -#endif - // note that its shared between per-file contexts - core::smart_refctd_ptr m_metadata; - - private: - // - void processProperty(const Context& ctx, const char* _el, const char** _atts); - - const core::unordered_set propertyElements; - // TODO: re-architect this and move into context so the PArserManager can be persistent - system::ISystem* m_system; - asset::IAssetLoader::IAssetLoaderOverride* m_override; + + struct SElementCreator + { + // we still push nullptr (failed creation) onto the stack, we only stop parse on catastrphic failure later on if a use of the element pops up + // this is why we don't need XMLCOntext for `killParseWithError` + using func_t = SNamedElement(*)(const char**/*attributes*/,SessionContext*); + func_t create; + bool retvalGoesOnStack; + }; + const core::unordered_map createElementTable; // - uint32_t m_sceneDeclCount = 0; - // TODO: This leaks memory all over the place because destructors are not ran! - ElementPool objects; - // aliases and names - core::unordered_map handles; - /*stack of currently processed elements - each element of index N is parent of the element of index N+1 - the scene element is a parent of all elements of index 0 */ - core::stack > elements; - - friend class CElementFactory; + static SNamedElement processAlias(const char** _atts, SessionContext* ctx); + static SNamedElement processRef(const char** _atts, SessionContext* ctx); }; } diff --git a/include/nbl/ext/MitsubaLoader/PropertyElement.h b/include/nbl/ext/MitsubaLoader/PropertyElement.h index f5a6e9266b..4453c59ad6 100644 --- a/include/nbl/ext/MitsubaLoader/PropertyElement.h +++ b/include/nbl/ext/MitsubaLoader/PropertyElement.h @@ -364,7 +364,7 @@ template<> const SPropertyElementData::get_type_t StringToType; diff --git a/src/nbl/ext/MitsubaLoader/CElementFactory.cpp b/src/nbl/ext/MitsubaLoader/CElementFactory.cpp deleted file mode 100644 index c7690089cd..0000000000 --- a/src/nbl/ext/MitsubaLoader/CElementFactory.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/ext/MitsubaLoader/CElementFactory.h" - -#include "nbl/ext/MitsubaLoader/ParserUtil.h" - -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ - - -CElementFactory::return_type CElementFactory::processAlias(const char** _atts, ParserManager* _util) -{ - const char* id = nullptr; - const char* as = nullptr; - std::string name; - if (IElement::areAttributesInvalid(_atts, 4u)) - return CElementFactory::return_type(nullptr,std::move(name)); - - while (*_atts) - { - if (core::strcmpi(_atts[0], "id")==0) - id = _atts[1]; - else if (core::strcmpi(_atts[0], "as")==0) - as = _atts[1]; - else if (core::strcmpi(_atts[0], "name")==0) - name = _atts[1]; - _atts += 2; - } - - if (!id || !as) - return CElementFactory::return_type(nullptr,std::move(name)); - - auto* original = _util->handles[id]; - _util->handles[as] = original; - return CElementFactory::return_type(original,std::move(name)); -} - -CElementFactory::return_type CElementFactory::processRef(const char** _atts, ParserManager* _util) -{ - const char* id; - std::string name; - if (!IElement::getIDAndName(id,name,_atts)) - { - os::Printer::log("[ERROR] Malformed `` element!", ELL_ERROR); - return CElementFactory::return_type(nullptr, std::move(name)); - } - auto* original = _util->handles[id]; - if (!original) - os::Printer::log(std::string("[ERROR] Used a `` element but referenced element not defined in preceeding XML!", ELL_ERROR); - return CElementFactory::return_type(original, std::move(name)); -} - - -const core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> CElementFactory::createElementTable = -{ - {"integrator", {CElementFactory::createElement,true}}, - {"sensor", {CElementFactory::createElement,true}}, - {"film", {CElementFactory::createElement,true}}, - {"rfilter", {CElementFactory::createElement,true}}, - {"sampler", {CElementFactory::createElement,true}}, - {"shape", {CElementFactory::createElement,true}}, - {"transform", {CElementFactory::createElement,true}}, - //{"animation", {CElementFactory::createElement,true}}, - {"bsdf", {CElementFactory::createElement,true}}, - {"texture", {CElementFactory::createElement,true}}, - {"emitter", {CElementFactory::createElement,true}}, - {"emissionprofile", {CElementFactory::createElement,true}}, - {"alias", {CElementFactory::processAlias,true}}, - {"ref", {CElementFactory::processRef,true}} -}; - -} -} -} \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CMakeLists.txt b/src/nbl/ext/MitsubaLoader/CMakeLists.txt index e4fc746049..d4c361f84d 100644 --- a/src/nbl/ext/MitsubaLoader/CMakeLists.txt +++ b/src/nbl/ext/MitsubaLoader/CMakeLists.txt @@ -4,29 +4,28 @@ set(NBL_EXT_INTERNAL_INCLUDE_DIR "${NBL_ROOT_PATH}/include/nbl/ext/MitsubaLoader set(NBL_EXT_MITSUBA_LOADER_H -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/PropertyElement.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/IElement.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaMetadata.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementIntegrator.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementSensor.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementFilm.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementRFilter.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementSampler.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementTransform.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementShape.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementBSDF.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementTexture.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementEmitter.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementFactory.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaSerializedMetadata.h -# ${NBL_EXT_INTERNAL_INCLUDE_DIR}/ParserUtil.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/PropertyElement.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/IElement.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaMetadata.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementIntegrator.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementSensor.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementFilm.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementRFilter.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementSampler.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementTransform.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementShape.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementBSDF.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementTexture.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementEmitter.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaSerializedMetadata.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/ParserUtil.h ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CSerializedLoader.h ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaLoader.h # ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaMaterialCompilerFrontend.h ) set(NBL_EXT_MITSUBA_LOADER_SRC -# PropertyElement.cpp + PropertyElement.cpp # CElementIntegrator.cpp # CElementSensor.cpp # CElementFilm.cpp @@ -37,7 +36,6 @@ set(NBL_EXT_MITSUBA_LOADER_SRC # CElementBSDF.cpp # CElementTexture.cpp # CElementEmitter.cpp -# CElementFactory.cpp ParserUtil.cpp CSerializedLoader.cpp CMitsubaLoader.cpp diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 4d21ea1f77..3eedeb6ae6 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -4,7 +4,7 @@ #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" +// TODO: all of the element types #include "expat/lib/expat.h" @@ -15,42 +15,42 @@ namespace nbl::ext::MitsubaLoader { using namespace nbl::system; -void ParserManager::Context::invalidXMLFileStructure(const std::string& errorMessage) const +auto ParserManager::parse(IFile* _file, const Params& _params) const -> Result { - std::string message = "Mitsuba loader error - Invalid .xml file structure: \'" + errorMessage + '\''; - - logger.log(message,ILogger::E_LOG_LEVEL::ELL_ERROR); - _NBL_DEBUG_BREAK_IF(true); -} - -void ParserManager::elementHandlerStart(void* _data, const char* _el, const char** _atts) -{ - auto ctx = *reinterpret_cast(_data); - - ctx.manager->parseElement(ctx, _el, _atts); + Result result = { + .metadata = core::make_smart_refctd_ptr() + }; + SessionContext ctx = { + .result = &result, + .params = &_params, + .manager = this + }; + + if (!ctx.parse(_file)) + return {}; + + return result; } -void ParserManager::elementHandlerEnd(void* _data, const char* _el) +bool ParserManager::SessionContext::parse(IFile* _file) { - auto ctx = *reinterpret_cast(_data); - - ctx.manager->onEnd(ctx,_el); -} - + auto logger = params->logger; -bool ParserManager::parse(IFile* _file, const logger_opt_ptr& _logger) -{ XML_Parser parser = XML_ParserCreate(nullptr); if (!parser) { - _logger.log("Could not create XML Parser!",ILogger::E_LOG_LEVEL::ELL_ERROR); + logger.log("Could not create XML Parser!",ILogger::E_LOG_LEVEL::ELL_ERROR); return false; } XML_SetElementHandler(parser,elementHandlerStart,elementHandlerEnd); //from now data (instance of ParserData struct) will be visible to expat handlers - Context ctx = {_file->getFileName().parent_path()/"",this,_logger,parser}; + XMLContext ctx = { + .session = this, + .currentXMLDir = _file->getFileName().parent_path()/"", + .parser = parser + }; XML_SetUserData(parser,&ctx); const size_t size = _file->getSize(); @@ -62,7 +62,7 @@ bool ParserManager::parse(IFile* _file, const logger_opt_ptr& _logger) _file->read(success,const_cast(buff),0u,size); if (!success) { - _logger.log("Could read the file into XML Parser Buffer!",ILogger::E_LOG_LEVEL::ELL_ERROR); + logger.log("Could read the file into XML Parser Buffer!",ILogger::E_LOG_LEVEL::ELL_ERROR); return false; } } @@ -75,16 +75,16 @@ bool ParserManager::parse(IFile* _file, const logger_opt_ptr& _logger) { case XML_STATUS_ERROR: { - _logger.log("Parse status: XML_STATUS_ERROR",ILogger::E_LOG_LEVEL::ELL_ERROR); + logger.log("Parse status: XML_STATUS_ERROR",ILogger::E_LOG_LEVEL::ELL_ERROR); return false; } break; case XML_STATUS_OK: - _logger.log("Parse status: XML_STATUS_OK",ILogger::E_LOG_LEVEL::ELL_INFO); + logger.log("Parse status: XML_STATUS_OK",ILogger::E_LOG_LEVEL::ELL_INFO); break; case XML_STATUS_SUSPENDED: { - _logger.log("Parse status: XML_STATUS_SUSPENDED",ILogger::E_LOG_LEVEL::ELL_INFO); + logger.log("Parse status: XML_STATUS_SUSPENDED",ILogger::E_LOG_LEVEL::ELL_INFO); return false; } break; @@ -93,38 +93,46 @@ bool ParserManager::parse(IFile* _file, const logger_opt_ptr& _logger) return true; } -void ParserManager::parseElement(const Context& ctx, const char* _el, const char** _atts) +void ParserManager::elementHandlerStart(void* _data, const char* _el, const char** _atts) +{ + auto& ctx = *reinterpret_cast(_data); + + ctx.parseElement(_el,_atts); +} + +void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts) { - if (core::strcmpi(_el, "scene")==0) + if (core::strcmpi(_el,"scene")==0) { auto count = 0u; while (_atts && _atts[count]) { count++; } if (count!=2u) { - ctx.killParseWithError("Wrong number of attributes for scene element"); + killParseWithError("Wrong number of attributes for scene element"); return; } if (core::strcmpi(_atts[0],"version")) { - ctx.invalidXMLFileStructure(std::string(_atts[0]) + " is not an attribute of scene element"); + session->invalidXMLFileStructure(core::string(_atts[0]) + " is not an attribute of scene element"); return; } else if (core::strcmpi(_atts[1],"0.5.0")) { - ctx.invalidXMLFileStructure("Version " + std::string(_atts[1]) + " is unsupported"); + session->invalidXMLFileStructure("Version " + core::string(_atts[1]) + " is unsupported"); return; } - m_sceneDeclCount++; + session->sceneDeclCount++; return; } - if (m_sceneDeclCount==0u) + if (session->sceneDeclCount==0u) { - ctx.killParseWithError("there is no scene element"); + killParseWithError("there is no scene element"); return; } + const ParserManager* manager = session->manager; if (core::strcmpi(_el,"include")==0) { core::smart_refctd_ptr file; @@ -136,7 +144,7 @@ void ParserManager::parseElement(const Context& ctx, const char* _el, const char auto flags = IFile::ECF_READ; if (i==0) flags |= IFile::ECF_MAPPABLE; - m_system->createFile(future,ctx.currentXMLDir/_atts[1],flags); + session->params->system->createFile(future,currentXMLDir/_atts[1],flags); if (future.wait()) future.acquire().move_into(file); if (file) @@ -145,114 +153,190 @@ void ParserManager::parseElement(const Context& ctx, const char* _el, const char return false; }; // first try as relative path, then as global - if (!tryOpen(ctx.currentXMLDir/_atts[1])) + if (!tryOpen(currentXMLDir/_atts[1])) if (!tryOpen(_atts[1])) { - ctx.invalidXMLFileStructure(std::string("Could not open include file: ")+_atts[1]); + session->invalidXMLFileStructure(std::string("Could not open include file: ")+_atts[1]); return; } - parse(file.get(),ctx.logger); + if (!session->parse(file.get())) + killParseWithError(core::string("Could not parse include file: ")+_atts[1]); return; } -#if 0 + + const auto& propertyElements = manager->propertyElements; if (propertyElements.find(_el)!=propertyElements.end()) { - processProperty(ctx, _el, _atts); + auto& elements = session->elements; + if (elements.empty()) + { + killParseWithError("cannot set a property with no element on the stack."); + return; + } + if (!elements.top().element) + { + session->invalidXMLFileStructure("cannot set property on element that failed to be created."); + return; + } + + auto optProperty = manager->propertyElementManager.createPropertyData(_el,_atts,session->params->logger); + if (!optProperty.has_value()) + { + session->invalidXMLFileStructure("could not create property data."); + return; + } + + elements.top().element->addProperty(std::move(optProperty.value())); return; } - const auto& _map = CElementFactory::createElementTable; + // TODO: don't have this table be a global + const auto& _map = manager->createElementTable; auto found = _map.find(_el); if (found==_map.end()) { - invalidXMLFileStructure(std::string("Could not process element ") + _el); - elements.push({nullptr,""}); + session->invalidXMLFileStructure(std::string("Could not process element ")+_el); + session->elements.push({nullptr,""}); return; } - auto el = found->second.first(_atts, this); - bool goesOnStack = found->second.second; - if (!goesOnStack) + auto created = found->second.create(_atts,session); + // we still push nullptr (failed creation) onto the stack, we only stop parse on catastrphic failure + if (!found->second.retvalGoesOnStack) return; - - - elements.push(el); - if (el.first && el.first->id.size()) - handles[el.first->id] = el.first; -#endif + if (created.element && created.name.size()) + session->handles[created.name] = created.element; + session->elements.push(std::move(created)); } -void ParserManager::processProperty(const Context& ctx, const char* _el, const char** _atts) +void ParserManager::elementHandlerEnd(void* _data, const char* _el) { - if (elements.empty()) - { - ctx.killParseWithError("cannot set a property with no element on the stack."); - return; - } - if (!elements.top().first) - { - ctx.invalidXMLFileStructure("cannot set property on element that failed to be created."); - return; - } - -#if 0 - auto optProperty = CPropertyElementManager::createPropertyData(_el,_atts); + auto& ctx = *reinterpret_cast(_data); - if (optProperty.first == false) - { - invalidXMLFileStructure("could not create property data."); - return; - } - - elements.top().first->addProperty(std::move(optProperty.second)); -#endif + ctx.onEnd(_el); } -void ParserManager::onEnd(const Context& ctx, const char* _el) +void ParserManager::XMLContext::onEnd(const char* _el) { + const auto& propertyElements = session->manager->propertyElements; if (propertyElements.find(_el)!=propertyElements.end()) return; - if (core::strcmpi(_el, "scene") == 0) + if (core::strcmpi(_el,"scene")==0) { - m_sceneDeclCount--; + session->sceneDeclCount--; return; } -#if 0 + + auto& elements = session->elements; if (elements.empty()) return; auto element = elements.top(); elements.pop(); - if (element.first && !element.first->onEndTag(m_override,m_metadata.get())) + auto& result = *session->result; + if (element.element && !element.element->onEndTag(session->params->_override,result.metadata.get())) { - killParseWithError(ctx,element.first->getLogName() + " could not onEndTag"); + killParseWithError(element.element->getLogName()+" could not onEndTag"); return; } if (!elements.empty()) { - IElement* parent = elements.top().first; - if (parent && !parent->processChildData(element.first, element.second)) + IElement* parent = elements.top().element; + if (parent && !parent->processChildData(element.element,element.name)) { - if (element.first) - killParseWithError(ctx,element.first->getLogName() + " could not processChildData with name: " + element.second); + if (element.element) + killParseWithError(element.element->getLogName()+" could not processChildData with name: "+element.name); else - killParseWithError(ctx,"Failed to add a nullptr child with name: " + element.second); + killParseWithError("Failed to add a nullptr child with name: "+element.name); } return; } - if (element.first && element.first->getType()==IElement::Type::SHAPE) + if (element.element && element.element->getType()==IElement::Type::SHAPE) { - auto shape = static_cast(element.first); + auto shape = static_cast(element.element); if (shape) - shapegroups.emplace_back(shape,std::move(element.second)); + result.shapegroups.emplace_back(shape,std::move(element.name)); } +} + +// +ParserManager::ParserManager() : propertyElements({ + "float", "string", "boolean", "integer", + "rgb", "srgb", "spectrum", "blackbody", + "point", "vector", + "matrix", "rotate", "translate", "scale", "lookat" +}), propertyElementManager(), createElementTable({ +#if 0 // TODO + {"integrator", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + {"sensor", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + {"film", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + {"rfilter", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + {"sampler", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + {"shape", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + {"transform", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + //{"animation", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + {"bsdf", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + {"texture", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + {"emitter", {CElementFactory::createElement,.retvalGoesOnStack=true}}, + {"emissionprofile", {CElementFactory::createElement,.retvalGoesOnStack=true}}, #endif + {"alias", {.create=processAlias,.retvalGoesOnStack=true}}, + {"ref", {.create=processRef,.retvalGoesOnStack=true}} +}){} + +auto ParserManager::processAlias(const char** _atts, SessionContext* ctx) -> SElementCreator +{ + const char* id = nullptr; + const char* as = nullptr; + if (IElement::areAttributesInvalid(_atts,4u)) + { + ctx->invalidXMLFileStructure("Invalid attributes for "); + return {}; + } + + core::string name; + while (*_atts) + { + if (core::strcmpi(_atts[0], "id")==0) + id = _atts[1]; + else if (core::strcmpi(_atts[0], "as")==0) + as = _atts[1]; + else if (core::strcmpi(_atts[0], "name")==0) + name = _atts[1]; + _atts += 2; + } + // not finding the alias doesn't kill XML parse + if (!id || !as) + { + ctx->invalidXMLFileStructure("Alias ID and what we're aliasing is not found"); + return {nullptr,std::move(name)}; + } + + auto& handles = ctx->handles; + auto* original = handles[id]; + handles[as] = original; + return {original,std::move(name)}; } +auto ParserManager::processRef(const char** _atts, SessionContext* ctx) -> SElementCreator +{ + const char* id; + std::string name; + if (!IElement::getIDAndName(id,name,_atts)) + { + ctx->invalidXMLFileStructure("Malformed `` element!"); + return {nullptr,std::move(name)}; + } + + auto* original = ctx->handles[id]; + if (!original) + ctx->invalidXMLFileStructure("Used a `` element but referenced element not defined in preceeding XML!"); + return {original, std::move(name)}; +} } \ No newline at end of file From 3446e19b94a7fe2b3e1b102fcba271c6c2285a2b Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 15 Oct 2025 09:09:55 +0200 Subject: [PATCH 064/472] add good header guards --- include/nbl/ext/MitsubaLoader/CElementBSDF.h | 18 ++---- .../MitsubaLoader/CElementEmissionProfile.h | 14 +---- .../nbl/ext/MitsubaLoader/CElementEmitter.h | 24 +++---- include/nbl/ext/MitsubaLoader/CElementFilm.h | 16 ++--- .../ext/MitsubaLoader/CElementIntegrator.h | 15 ++--- .../nbl/ext/MitsubaLoader/CElementRFilter.h | 17 ++--- .../nbl/ext/MitsubaLoader/CElementSampler.h | 15 ++--- .../nbl/ext/MitsubaLoader/CElementSensor.h | 13 +--- include/nbl/ext/MitsubaLoader/CElementShape.h | 15 ++--- .../nbl/ext/MitsubaLoader/CElementTexture.h | 18 ++---- .../nbl/ext/MitsubaLoader/CElementTransform.h | 15 ++--- .../nbl/ext/MitsubaLoader/CMitsubaLoader.h | 6 +- include/nbl/ext/MitsubaLoader/IElement.h | 1 + include/nbl/ext/MitsubaLoader/ParserUtil.h | 14 ++--- .../nbl/ext/MitsubaLoader/PropertyElement.h | 28 ++++----- src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 62 ++++++++----------- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 14 +++-- 17 files changed, 108 insertions(+), 197 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementBSDF.h b/include/nbl/ext/MitsubaLoader/CElementBSDF.h index 2b424e9a20..b1e8183d4b 100644 --- a/include/nbl/ext/MitsubaLoader/CElementBSDF.h +++ b/include/nbl/ext/MitsubaLoader/CElementBSDF.h @@ -1,19 +1,15 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_BSDF_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_BSDF_H_INCLUDED_ -#ifndef __C_ELEMENT_BSDF_H_INCLUDED__ -#define __C_ELEMENT_BSDF_H_INCLUDED__ #include "nbl/ext/MitsubaLoader/CElementTexture.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ +namespace nbl::ext::MitsubaLoader +{ class CElementBSDF : public IElement { @@ -422,9 +418,5 @@ class CElementBSDF : public IElement }; - -} } -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h index 972cf3915e..025b48f3d1 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h @@ -1,20 +1,15 @@ // Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_EMISSION_PROFILE_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_EMISSION_PROFILE_H_INCLUDED_ -#ifndef __C_ELEMENT_EMISSION_PROFILE_H_INCLUDED__ -#define __C_ELEMENT_EMISSION_PROFILE_H_INCLUDED__ -#include "vectorSIMD.h" #include "nbl/ext/MitsubaLoader/CElementTexture.h" #include "nbl/ext/MitsubaLoader/CElementTransform.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader +namespace nbl::ext::MitsubaLoader { struct CElementEmissionProfile : public IElement { @@ -71,7 +66,4 @@ struct CElementEmissionProfile : public IElement { }; } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CElementEmitter.h b/include/nbl/ext/MitsubaLoader/CElementEmitter.h index 87afdc860d..27e12b3718 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmitter.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmitter.h @@ -1,24 +1,19 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_EMITTER_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_EMITTER_H_INCLUDED_ -#ifndef __C_ELEMENT_EMITTER_H_INCLUDED__ -#define __C_ELEMENT_EMITTER_H_INCLUDED__ -#include - -#include "vectorSIMD.h" #include "nbl/ext/MitsubaLoader/CElementTexture.h" #include "nbl/ext/MitsubaLoader/CElementEmissionProfile.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader +#include + + +namespace nbl::ext::MitsubaLoader { - class CElementEmitter : public IElement { public: @@ -302,10 +297,5 @@ class CElementEmitter : public IElement }; }; - - } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CElementFilm.h b/include/nbl/ext/MitsubaLoader/CElementFilm.h index afe929180c..9e9c9f08fd 100644 --- a/include/nbl/ext/MitsubaLoader/CElementFilm.h +++ b/include/nbl/ext/MitsubaLoader/CElementFilm.h @@ -1,21 +1,16 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_FILM_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_FILM_H_INCLUDED_ -#ifndef __C_ELEMENT_FILM_H_INCLUDED__ -#define __C_ELEMENT_FILM_H_INCLUDED__ #include "nbl/macros.h" - #include "nbl/ext/MitsubaLoader/CElementRFilter.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ +namespace nbl::ext::MitsubaLoader +{ class CElementFilm : public IElement { @@ -149,7 +144,4 @@ class CElementFilm : public IElement } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h index 4a171f717b..df7aeac3fd 100644 --- a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h +++ b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h @@ -1,17 +1,14 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_INTEGRATOR_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_INTEGRATOR_H_INCLUDED_ -#ifndef __C_ELEMENT_INTEGRATOR_H_INCLUDED__ -#define __C_ELEMENT_INTEGRATOR_H_INCLUDED__ #include "nbl/ext/MitsubaLoader/IElement.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader + +namespace nbl::ext::MitsubaLoader { @@ -334,9 +331,5 @@ class CElementIntegrator : public IElement }; - } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CElementRFilter.h b/include/nbl/ext/MitsubaLoader/CElementRFilter.h index aa2da6dee3..0905e40a2f 100644 --- a/include/nbl/ext/MitsubaLoader/CElementRFilter.h +++ b/include/nbl/ext/MitsubaLoader/CElementRFilter.h @@ -1,19 +1,15 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_R_FILTER_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_R_FILTER_H_INCLUDED_ -#ifndef __C_ELEMENT_R_FILTER_H_INCLUDED__ -#define __C_ELEMENT_R_FILTER_H_INCLUDED__ #include "nbl/ext/MitsubaLoader/PropertyElement.h" - #include "nbl/ext/MitsubaLoader/IElement.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader + +namespace nbl::ext::MitsubaLoader { @@ -70,7 +66,4 @@ class CElementRFilter : public IElement } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CElementSampler.h b/include/nbl/ext/MitsubaLoader/CElementSampler.h index 621623770d..2df888cce6 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSampler.h +++ b/include/nbl/ext/MitsubaLoader/CElementSampler.h @@ -1,19 +1,15 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_SAMPLER_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_SAMPLER_H_INCLUDED_ -#ifndef __C_ELEMENT_SAMPLER_H_INCLUDED__ -#define __C_ELEMENT_SAMPLER_H_INCLUDED__ #include "nbl/ext/MitsubaLoader/IElement.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ +namespace nbl::ext::MitsubaLoader +{ class CGlobalMitsubaMetadata; class CElementSampler : public IElement @@ -50,7 +46,4 @@ class CElementSampler : public IElement } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CElementSensor.h b/include/nbl/ext/MitsubaLoader/CElementSensor.h index c8214dd64c..f8b69cc1c5 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSensor.h +++ b/include/nbl/ext/MitsubaLoader/CElementSensor.h @@ -1,9 +1,9 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_SENSOR_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_SENSOR_H_INCLUDED_ -#ifndef __C_ELEMENT_SENSOR_H_INCLUDED__ -#define __C_ELEMENT_SENSOR_H_INCLUDED__ #include "nbl/ext/MitsubaLoader/IElement.h" #include "nbl/ext/MitsubaLoader/CElementTransform.h" @@ -11,11 +11,7 @@ #include "nbl/ext/MitsubaLoader/CElementSampler.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader +namespace nbl::ext::MitsubaLoader { @@ -226,7 +222,4 @@ class CElementSensor : public IElement } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index 205023afea..41e7fdbc1c 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -1,9 +1,9 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_SHAPE_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_SHAPE_H_INCLUDED_ -#ifndef __C_ELEMENT_SHAPE_H_INCLUDED__ -#define __C_ELEMENT_SHAPE_H_INCLUDED__ #include "nbl/ext/MitsubaLoader/IElement.h" #include "nbl/ext/MitsubaLoader/CElementTransform.h" @@ -11,11 +11,7 @@ #include "nbl/ext/MitsubaLoader/CElementEmitter.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader +namespace nbl::ext::MitsubaLoader { @@ -281,7 +277,4 @@ class CElementShape : public IElement } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CElementTexture.h b/include/nbl/ext/MitsubaLoader/CElementTexture.h index 1f3dc3ad7a..3bcd92001a 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTexture.h +++ b/include/nbl/ext/MitsubaLoader/CElementTexture.h @@ -1,20 +1,16 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_TEXTURE_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_TEXTURE_H_INCLUDED_ -#ifndef __C_ELEMENT_TEXTURE_H_INCLUDED__ -#define __C_ELEMENT_TEXTURE_H_INCLUDED__ #include "nbl/ext/MitsubaLoader/PropertyElement.h" #include "nbl/ext/MitsubaLoader/IElement.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ +namespace nbl::ext::MitsubaLoader +{ class CElementTexture : public IElement { @@ -256,9 +252,5 @@ class CElementTexture : public IElement }; - -} } -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CElementTransform.h b/include/nbl/ext/MitsubaLoader/CElementTransform.h index d518f69e6c..c1ca0203ea 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTransform.h +++ b/include/nbl/ext/MitsubaLoader/CElementTransform.h @@ -1,18 +1,14 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_TRANSFORM_H_INCLUDED_ +#define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_TRANSFORM_H_INCLUDED_ -#ifndef __C_ELEMENT_TRANSFORM_H_INCLUDED__ -#define __C_ELEMENT_TRANSFORM_H_INCLUDED__ #include "nbl/ext/MitsubaLoader/IElement.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader +namespace nbl::ext::MitsubaLoader { @@ -39,7 +35,4 @@ class CElementTransform : public IElement }; } -} -} - #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h index f43b88c8a4..95b2f45c41 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h @@ -8,7 +8,7 @@ #include "nbl/asset/asset.h" #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" -//#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" +#include "nbl/ext/MitsubaLoader/ParserUtil.h" //#include "nbl/ext/MitsubaLoader/CElementShape.h" #include "nbl/ext/MitsubaLoader/SContext.h" @@ -51,6 +51,8 @@ using instance_data_t = nbl_glsl_ext_Mitsuba_Loader_instance_data_t; class CMitsubaLoader final : public asset::ISceneLoader { // friend class CMitsubaMaterialCompilerFrontend; + + const ParserManager m_parser; core::smart_refctd_ptr m_system; //! Destructor @@ -72,7 +74,7 @@ class CMitsubaLoader final : public asset::ISceneLoader #endif public: //! Constructor - inline CMitsubaLoader(core::smart_refctd_ptr&& _system) : m_system(std::move(_system)) {} + inline CMitsubaLoader(core::smart_refctd_ptr&& _system) : m_parser(), m_system(std::move(_system)) {} bool isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger=nullptr) const override; diff --git a/include/nbl/ext/MitsubaLoader/IElement.h b/include/nbl/ext/MitsubaLoader/IElement.h index 421e44f0f1..c3a4eb6e20 100644 --- a/include/nbl/ext/MitsubaLoader/IElement.h +++ b/include/nbl/ext/MitsubaLoader/IElement.h @@ -8,6 +8,7 @@ #include "nbl/asset/interchange/IAssetLoader.h" #include "nbl/ext/MitsubaLoader/PropertyElement.h" + namespace nbl::ext::MitsubaLoader { class CMitsubaMetadata; diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index c0e8aba0cd..a0dc8a9c27 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -7,14 +7,16 @@ #include "nbl/asset/interchange/IAssetLoader.h" +#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" #include "nbl/ext/MitsubaLoader/PropertyElement.h" #include "nbl/ext/MitsubaLoader/CElementShape.h" -#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" - -#include "expat/lib/expat.h" #include +// don't leak expat headers +struct XML_ParserStruct; +typedef struct XML_ParserStruct* XML_Parser; + namespace nbl::ext::MitsubaLoader { @@ -116,11 +118,7 @@ class ParserManager final struct XMLContext { // - inline void killParseWithError(const std::string& message) const - { - session->invalidXMLFileStructure(message); - XML_StopParser(parser,false); - } + void killParseWithError(const std::string& message) const; void parseElement(const char* _el, const char** _atts); void onEnd(const char* _el); diff --git a/include/nbl/ext/MitsubaLoader/PropertyElement.h b/include/nbl/ext/MitsubaLoader/PropertyElement.h index 4453c59ad6..8ecdce7fdd 100644 --- a/include/nbl/ext/MitsubaLoader/PropertyElement.h +++ b/include/nbl/ext/MitsubaLoader/PropertyElement.h @@ -334,33 +334,33 @@ template<> struct SPropertyElementData::get_type struct SPropertyElementData::get_type { using type = void; }; // TODO: rewrite rest to be less `::` verbose -template<> auto SPropertyElementData::getProperty() const -> const get_type_t& +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return fvalue; } -template<> auto SPropertyElementData::getProperty() const -> const get_type_t& +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return ivalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return bvalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return svalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return vvalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return vvalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return vvalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return vvalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return vvalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return mvalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return mvalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return mvalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return mvalue; } -template<> const SPropertyElementData::get_type_t& SPropertyElementData::getProperty() const +template<> inline auto SPropertyElementData::getProperty() const -> const get_type_t& { return mvalue; } diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index 6f75d0c110..df9d8c776e 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -135,22 +135,22 @@ static core::smart_refctd_ptr createFragmentShader #endif #if 0 -static core::smart_refctd_ptr createImageView(core::smart_refctd_ptr&& _img) // TODO: this should seriously be a utility somewhere +static core::smart_refctd_ptr createImageView(core::smart_refctd_ptr&& _img) // TODO: this should seriously be a utility somewhere { const auto& iparams = _img->getCreationParameters(); - asset::ICPUImageView::SCreationParams params; + ICPUImageView::SCreationParams params; params.format = iparams.format; params.subresourceRange.baseArrayLayer = 0u; params.subresourceRange.layerCount = iparams.arrayLayers; assert(params.subresourceRange.layerCount == 1u); params.subresourceRange.baseMipLevel = 0u; params.subresourceRange.levelCount = iparams.mipLevels; - params.viewType = asset::IImageView::ET_2D; - params.flags = static_cast::E_CREATE_FLAGS>(0); + params.viewType = IImageView::ET_2D; + params.flags = static_cast::E_CREATE_FLAGS>(0); params.image = std::move(_img); - return asset::ICPUImageView::create(std::move(params)); + return ICPUImageView::create(std::move(params)); } static core::smart_refctd_ptr createDerivMap(SContext& ctx, asset::ICPUImage* _heightMap, const ICPUSampler::SParams& _samplerParams, bool fromNormalMap) { @@ -296,36 +296,30 @@ bool CMitsubaLoader::isALoadableFileFormat(system::IFile* _file, const system::l return false; } -asset::SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) +SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader::SAssetLoadParams& _params, IAssetLoader::IAssetLoaderOverride* _override, uint32_t _hierarchyLevel) { - ParserManager parserManager(m_system.get(),_override); - if (!parserManager.parse(_file,_params.logger)) + auto result = m_parser.parse(_file,{.logger=_params.logger,.system=m_system.get(),._override=_override}); + if (!result) return {}; - //if (_params.loaderFlags&IAssetLoader::ELPF_LOAD_METADATA_ONLY) + auto scene = core::make_smart_refctd_ptr(); + if (_params.loaderFlags&IAssetLoader::ELPF_LOAD_METADATA_ONLY) { - auto emptyScene = core::make_smart_refctd_ptr(); - return SAssetBundle(std::move(parserManager.m_metadata),{std::move(emptyScene)}); + return SAssetBundle(std::move(result.metadata),{std::move(scene)}); } -#if 0 else { - // - auto currentDir = io::IFileSystem::getFileDir(_file->getFileName()) + "/"; +#if 0 SContext ctx( - m_assetMgr->getGeometryCreator(), - m_assetMgr->getMeshManipulator(), - asset::IAssetLoader::SAssetLoadContext{ - asset::IAssetLoader::SAssetLoadParams(_params.decryptionKeyLen, _params.decryptionKey, _params.cacheFlags, currentDir.c_str()), +// m_assetMgr->getGeometryCreator(), +// m_assetMgr->getMeshManipulator(), + IAssetLoader::SAssetLoadContext{ + IAssetLoader::SAssetLoadParams(_params.decryptionKeyLen,_params.decryptionKey,_params.cacheFlags,_params.logger,_file->getFileName().parent_path()), _file }, _override, parserManager.m_metadata.get() ); - if (!getBuiltinAsset(VERTEX_SHADER_CACHE_KEY, m_assetMgr)) - { - createAndCacheVertexShader(m_assetMgr, DUMMY_VERTEX_SHADER); - } core::map,std::pair> meshes; for (auto& shapepair : parserManager.shapegroups) @@ -356,7 +350,9 @@ asset::SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const asset: for (auto mb : mesh.first.get()->getMeshBuffers()) mb->setInstanceCount(instanceCount); } +#endif +#if 0 // TODO: put IR and stuff in metadata so that we can recompile the materials after load auto compResult = ctx.backend.compile(&ctx.backend_ctx, ctx.ir.get(), decltype(ctx.backend)::EGST_PRESENT_WITH_AOV_EXTRACTION); ctx.backend_ctx.vt.commitAll(); @@ -434,10 +430,9 @@ asset::SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const asset: parserManager.m_metadata->m_global.m_envMapImages.push_back(core::smart_refctd_ptr_static_cast(*contentRange.begin())); } } - - return asset::SAssetBundle(std::move(parserManager.m_metadata),std::move(meshSmartPtrArray)); - } #endif + return asset::SAssetBundle(std::move(result.metadata),{std::move(scene)}); + } } #if 0 @@ -1192,27 +1187,20 @@ inline core::smart_refctd_ptr CMitsubaLoader::createDS return ds0; } +#endif using namespace std::string_literals; SContext::SContext( - const asset::IGeometryCreator* _geomCreator, - const asset::IMeshManipulator* _manipulator, +// const asset::IGeometryCreator* _geomCreator, +// const asset::IMeshManipulator* _manipulator, const asset::IAssetLoader::SAssetLoadContext& _ctx, asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* _metadata -) : creator(_geomCreator), manipulator(_manipulator), inner(_ctx), override_(_override), meta(_metadata), - ir(core::make_smart_refctd_ptr()), frontend(this) +) : /*creator(_geomCreator), manipulator(_manipulator),*/ inner(_ctx), override_(_override), meta(_metadata) +//,ir(core::make_smart_refctd_ptr()), frontend(this) { - backend_ctx.vt = core::make_smart_refctd_ptr( - [](asset::E_FORMAT_CLASS) -> uint32_t { return VT_PHYSICAL_PAGE_TEX_TILES_PER_DIM_LOG2; }, // 16x16 tiles per layer for all dynamically created storages - VT_PAGE_SZ_LOG2, - VT_PAGE_PADDING, - VT_MAX_ALLOCATABLE_TEX_SZ_LOG2 - ); - meta->m_global.m_VT = core::smart_refctd_ptr(backend_ctx.vt.getCPUVirtualTexture()); } -#endif } } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 3eedeb6ae6..4dce78c475 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -100,6 +100,12 @@ void ParserManager::elementHandlerStart(void* _data, const char* _el, const char ctx.parseElement(_el,_atts); } +void ParserManager::XMLContext::killParseWithError(const std::string& message) const +{ + session->invalidXMLFileStructure(message); + XML_StopParser(parser,false); +} + void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts) { if (core::strcmpi(_el,"scene")==0) @@ -289,7 +295,7 @@ ParserManager::ParserManager() : propertyElements({ {"ref", {.create=processRef,.retvalGoesOnStack=true}} }){} -auto ParserManager::processAlias(const char** _atts, SessionContext* ctx) -> SElementCreator +auto ParserManager::processAlias(const char** _atts, SessionContext* ctx) -> SNamedElement { const char* id = nullptr; const char* as = nullptr; @@ -323,7 +329,7 @@ auto ParserManager::processAlias(const char** _atts, SessionContext* ctx) -> SEl return {original,std::move(name)}; } -auto ParserManager::processRef(const char** _atts, SessionContext* ctx) -> SElementCreator +auto ParserManager::processRef(const char** _atts, SessionContext* ctx) -> SNamedElement { const char* id; std::string name; @@ -335,8 +341,8 @@ auto ParserManager::processRef(const char** _atts, SessionContext* ctx) -> SElem auto* original = ctx->handles[id]; if (!original) - ctx->invalidXMLFileStructure("Used a `` element but referenced element not defined in preceeding XML!"); - return {original, std::move(name)}; + ctx->invalidXMLFileStructure(core::string("Used a `` element but referenced element not defined in preceeding XML!"); + return {original,std::move(name)}; } } \ No newline at end of file From 36c6aa6a312715a830acde85abe95d952c2b6574 Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 15 Oct 2025 09:21:03 +0200 Subject: [PATCH 065/472] really need that new PCH and lib breakdown --- include/nbl/asset/interchange/IAssetLoader.h | 2 ++ include/nbl/system/declarations.h | 1 + src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 1 + 3 files changed, 4 insertions(+) diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 35bff3acab..3658f67026 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -4,6 +4,7 @@ #ifndef _NBL_ASSET_I_ASSET_LOADER_H_INCLUDED_ #define _NBL_ASSET_I_ASSET_LOADER_H_INCLUDED_ + #include "nbl/system/declarations.h" #include "nbl/system/ISystem.h" @@ -11,6 +12,7 @@ #include "nbl/asset/interchange/SAssetBundle.h" + namespace nbl::asset { diff --git a/include/nbl/system/declarations.h b/include/nbl/system/declarations.h index 2e66498a61..ebc5a890ae 100644 --- a/include/nbl/system/declarations.h +++ b/include/nbl/system/declarations.h @@ -5,6 +5,7 @@ #define _NBL_SYSTEM_DECLARATIONS_H_INCLUDED_ #include "nbl/core/declarations.h" +#include "nbl/core/definitions.h" // basic stuff #include "nbl/system/DynamicLibraryFunctionPointer.h" diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 4dce78c475..d34a2524fd 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -17,6 +17,7 @@ using namespace nbl::system; auto ParserManager::parse(IFile* _file, const Params& _params) const -> Result { +// CMitsubaMetadata* obj = new CMitsubaMetadata(); Result result = { .metadata = core::make_smart_refctd_ptr() }; From 9ed5aee21aa409601a9670e69c1b3cc7a158c735 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 15 Oct 2025 13:39:30 +0200 Subject: [PATCH 066/472] ah IES was not added to asset manager, also correct some aspectMask subresource bitflags to not trigger asserts, update examples_tests submodule --- examples_tests | 2 +- src/nbl/asset/IAssetManager.cpp | 4 ++-- src/nbl/asset/utils/CIESProfile.cpp | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples_tests b/examples_tests index 59a996222d..02924fbac8 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 59a996222da2232348a1a9e31c4484161b340fb0 +Subproject commit 02924fbac84775596dd553b210016f5fb46834fc diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index dc67ed8d01..5f48170c37 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -84,7 +84,7 @@ #endif #include "nbl/asset/interchange/CBufferLoaderBIN.h" -//#include "nbl/asset/interchange/CIESProfileLoader.h" +#include "nbl/asset/interchange/CIESProfileLoader.h" #include "nbl/asset/utils/CGeometryCreator.h" @@ -181,7 +181,7 @@ void IAssetManager::addLoadersAndWriters() #ifdef _NBL_COMPILE_WITH_GLI_WRITER_ addAssetWriter(core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system))); #endif -// addAssetLoader(core::make_smart_refctd_ptr()); +addAssetLoader(core::make_smart_refctd_ptr()); for (auto& loader : m_loaders.vector) loader->initialize(); diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index 5f89019ad7..332e35fb87 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -136,6 +136,7 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu region.imageSubresource.baseArrayLayer = 0u; region.imageSubresource.layerCount = 1u; region.imageSubresource.mipLevel = 0u; + region.imageSubresource.aspectMask = core::bitflag(asset::IImage::EAF_COLOR_BIT); region.bufferImageHeight = 0u; region.bufferOffset = 0u; @@ -152,7 +153,7 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu CFillImageFilter::state_type state; state.outImage = outImg.get(); - state.subresource.aspectMask = static_cast(0); + state.subresource.aspectMask = core::bitflag(asset::IImage::EAF_COLOR_BIT); state.subresource.baseArrayLayer = 0u; state.subresource.layerCount = 1u; state.outRange.extent = creationParams.extent; @@ -199,7 +200,7 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu viewParams.flags = static_cast(0); viewParams.viewType = IImageView::ET_2D; viewParams.format = viewParams.image->getCreationParameters().format; - viewParams.subresourceRange.aspectMask = static_cast(0); + viewParams.subresourceRange.aspectMask = core::bitflag(asset::IImage::EAF_COLOR_BIT); viewParams.subresourceRange.levelCount = viewParams.image->getCreationParameters().mipLevels; viewParams.subresourceRange.layerCount = 1u; From f850ce7881f3a852a584cf4c46ba60c7b8a8ac21 Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 15 Oct 2025 14:13:12 +0200 Subject: [PATCH 067/472] enable a few elements and get stuff parsing (with unhandled element errors) --- include/nbl/core/hash/blake.h | 3 + include/nbl/ext/MitsubaLoader/CElementBSDF.h | 2 +- .../MitsubaLoader/CElementEmissionProfile.h | 28 ++++----- .../nbl/ext/MitsubaLoader/CElementEmitter.h | 2 +- include/nbl/ext/MitsubaLoader/CElementFilm.h | 25 ++++---- .../ext/MitsubaLoader/CElementIntegrator.h | 14 ++--- .../nbl/ext/MitsubaLoader/CElementRFilter.h | 14 ++--- .../nbl/ext/MitsubaLoader/CElementSampler.h | 12 ++-- .../nbl/ext/MitsubaLoader/CElementSensor.h | 21 +++---- include/nbl/ext/MitsubaLoader/CElementShape.h | 23 +++---- .../nbl/ext/MitsubaLoader/CElementTexture.h | 2 +- .../nbl/ext/MitsubaLoader/CElementTransform.h | 15 +++-- include/nbl/ext/MitsubaLoader/IElement.h | 7 ++- include/nbl/ext/MitsubaLoader/ParserUtil.h | 3 + .../MitsubaLoader/CElementEmissionProfile.cpp | 62 ++++++++++--------- .../ext/MitsubaLoader/CElementIntegrator.cpp | 22 +++---- src/nbl/ext/MitsubaLoader/CElementSampler.cpp | 13 +--- src/nbl/ext/MitsubaLoader/CElementSensor.cpp | 34 +++++----- .../ext/MitsubaLoader/CElementTransform.cpp | 32 ++++------ src/nbl/ext/MitsubaLoader/CMakeLists.txt | 4 +- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 41 +++++++----- 21 files changed, 186 insertions(+), 193 deletions(-) diff --git a/include/nbl/core/hash/blake.h b/include/nbl/core/hash/blake.h index 801b867766..fb91c9969f 100644 --- a/include/nbl/core/hash/blake.h +++ b/include/nbl/core/hash/blake.h @@ -4,10 +4,13 @@ #ifndef _NBL_CORE_HASH_BLAKE3_H_INCLUDED_ #define _NBL_CORE_HASH_BLAKE3_H_INCLUDED_ + +#include "nbl/config/BuildConfigOptions.h" #include "blake3.h" #include + namespace nbl::core { struct blake3_hash_t final diff --git a/include/nbl/ext/MitsubaLoader/CElementBSDF.h b/include/nbl/ext/MitsubaLoader/CElementBSDF.h index b1e8183d4b..e11b2421a8 100644 --- a/include/nbl/ext/MitsubaLoader/CElementBSDF.h +++ b/include/nbl/ext/MitsubaLoader/CElementBSDF.h @@ -369,7 +369,7 @@ class CElementBSDF : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property) override; + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; IElement::Type getType() const override { return IElement::Type::BSDF; } std::string getLogName() const override { return "bsdf"; } diff --git a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h index 025b48f3d1..12491a9a70 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h @@ -12,15 +12,15 @@ namespace nbl::ext::MitsubaLoader { -struct CElementEmissionProfile : public IElement { - - CElementEmissionProfile(const char* id) : IElement(id), normalization(EN_NONE), flatten(0.0) /*no blending by default*/ {} - CElementEmissionProfile() : IElement(""), normalization(EN_NONE) {} - CElementEmissionProfile(const CElementEmissionProfile& other) : IElement("") +struct CElementEmissionProfile final : public IElement +{ + inline CElementEmissionProfile(const char* id) : IElement(id), normalization(EN_NONE), flatten(0.0) /*no blending by default*/ {} + inline CElementEmissionProfile() : IElement(""), normalization(EN_NONE) {} + inline CElementEmissionProfile(const CElementEmissionProfile& other) : IElement("") { operator=(other); } - CElementEmissionProfile(CElementEmissionProfile&& other) : IElement("") + inline CElementEmissionProfile(CElementEmissionProfile&& other) : IElement("") { operator=(std::move(other)); } @@ -39,22 +39,22 @@ struct CElementEmissionProfile : public IElement { return *this; } - virtual ~CElementEmissionProfile() + inline ~CElementEmissionProfile() { } - bool addProperty(SNamedPropertyElement&& _property) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override { + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; + inline bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override { return true; } bool processChildData(IElement* _child, const std::string& name) override; - IElement::Type getType() const override { return IElement::Type::EMISSION_PROFILE; } - std::string getLogName() const override { return "emissionprofile "; } + inline IElement::Type getType() const override { return IElement::Type::EMISSION_PROFILE; } + inline std::string getLogName() const override { return "emissionprofile "; } - enum E_NORMALIZE + enum E_NORMALIZE : uint8_t { EN_UNIT_MAX, //! normalize the intensity by dividing out the maximum intensity - EN_UNIT_AVERAGE_OVER_IMPLIED_DOMAIN, //! normlize by energy - integrate the profile over the hemisphere as well as the solid angles where the profile has emission above 0. + EN_UNIT_AVERAGE_OVER_IMPLIED_DOMAIN, //! normalize by energy - integrate the profile over the hemisphere as well as the solid angles where the profile has emission above 0. EN_UNIT_AVERAGE_OVER_FULL_DOMAIN, //! similar to UNIT_AVERAGE_OVER_IMPLIED_DOMAIN but in this case we presume the soild angle of the domain is (CIESProfile::vAngles.front()-CIESProfile::vAngles.back())*4.f EN_NONE //! no normalization @@ -62,7 +62,7 @@ struct CElementEmissionProfile : public IElement { std::string filename; E_NORMALIZE normalization; - float flatten; + float flatten; // TODO: why is this named this way? }; } diff --git a/include/nbl/ext/MitsubaLoader/CElementEmitter.h b/include/nbl/ext/MitsubaLoader/CElementEmitter.h index 27e12b3718..eb80657c54 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmitter.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmitter.h @@ -200,7 +200,7 @@ class CElementEmitter : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property) override; + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; IElement::Type getType() const override { return IElement::Type::EMITTER; } std::string getLogName() const override { return "emitter"; } diff --git a/include/nbl/ext/MitsubaLoader/CElementFilm.h b/include/nbl/ext/MitsubaLoader/CElementFilm.h index 9e9c9f08fd..a16ff5d7ac 100644 --- a/include/nbl/ext/MitsubaLoader/CElementFilm.h +++ b/include/nbl/ext/MitsubaLoader/CElementFilm.h @@ -5,17 +5,16 @@ #define _NBL_EXT_MISTUBA_LOADER_C_ELEMENT_FILM_H_INCLUDED_ -#include "nbl/macros.h" #include "nbl/ext/MitsubaLoader/CElementRFilter.h" namespace nbl::ext::MitsubaLoader { -class CElementFilm : public IElement +class CElementFilm final : public IElement { public: - enum Type + enum Type : uint8_t { INVALID, HDR_FILM, @@ -23,7 +22,7 @@ class CElementFilm : public IElement LDR_FILM, MFILM }; - enum PixelFormat + enum PixelFormat : uint8_t { LUMINANCE, LUMINANCE_ALPHA, @@ -34,7 +33,7 @@ class CElementFilm : public IElement SPECTRUM, SPECTRUM_ALPHA }; - enum FileFormat + enum FileFormat : uint8_t { OPENEXR, RGBE, @@ -45,7 +44,7 @@ class CElementFilm : public IElement MATHEMATICA, NUMPY }; - enum ComponentFormat + enum ComponentFormat : uint8_t { FLOAT16, FLOAT32, @@ -79,11 +78,11 @@ class CElementFilm : public IElement variable[4] = 0; } int32_t digits; - _NBL_STATIC_INLINE_CONSTEXPR size_t MaxVarNameLen = 63; // matlab + constexpr static inline size_t MaxVarNameLen = 63; // matlab char variable[MaxVarNameLen+1]; }; - CElementFilm(const char* id) : IElement(id), type(Type::HDR_FILM), + inline CElementFilm(const char* id) : IElement(id), type(Type::HDR_FILM), width(768), height(576), cropOffsetX(0), cropOffsetY(0), cropWidth(INT_MAX), cropHeight(INT_MAX), fileFormat(OPENEXR), pixelFormat(RGB), componentFormat(FLOAT16), banner(true), highQualityEdges(false), rfilter("") @@ -94,10 +93,10 @@ class CElementFilm : public IElement { } - bool addProperty(SNamedPropertyElement&& _property) override; + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; - IElement::Type getType() const override { return IElement::Type::FILM; } - std::string getLogName() const override { return "film"; } + inline IElement::Type getType() const override { return IElement::Type::FILM; } + inline std::string getLogName() const override { return "film"; } inline bool processChildData(IElement* _child, const std::string& name) override { @@ -129,7 +128,7 @@ class CElementFilm : public IElement M mfilm; }; - _NBL_STATIC_INLINE_CONSTEXPR size_t MaxPathLen = 256; + constexpr static inline size_t MaxPathLen = 256; char outputFilePath[MaxPathLen+1] = {0}; char denoiserBloomFilePath[MaxPathLen+1] = {0}; int32_t cascadeCount = 1; @@ -137,7 +136,7 @@ class CElementFilm : public IElement float cascadeLuminanceStart = core::nan(); float denoiserBloomScale = 0.0f; float denoiserBloomIntensity = 0.0f; - _NBL_STATIC_INLINE_CONSTEXPR size_t MaxTonemapperArgsLen = 128; + constexpr static inline size_t MaxTonemapperArgsLen = 128; char denoiserTonemapperArgs[MaxTonemapperArgsLen+1] = {0}; float envmapRegularizationFactor = 0.5f; // 1.0f means based envmap luminance, 0.0f means uniform }; diff --git a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h index df7aeac3fd..72201f6cbb 100644 --- a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h +++ b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h @@ -12,7 +12,7 @@ namespace nbl::ext::MitsubaLoader { -class CElementIntegrator : public IElement +class CElementIntegrator final : public IElement { public: enum Type @@ -191,10 +191,10 @@ class CElementIntegrator : public IElement { }; - CElementIntegrator(const char* id) : IElement(id), type(Type::INVALID) + inline CElementIntegrator(const char* id) : IElement(id), type(Type::INVALID) { } - virtual ~CElementIntegrator() + inline ~CElementIntegrator() { } @@ -264,12 +264,12 @@ class CElementIntegrator : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property) override; + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; - IElement::Type getType() const override { return IElement::Type::INTEGRATOR; } - std::string getLogName() const override { return "integrator"; } + inline IElement::Type getType() const override { return IElement::Type::INTEGRATOR; } + inline std::string getLogName() const override { return "integrator"; } - bool processChildData(IElement* _child, const std::string& name) override + inline bool processChildData(IElement* _child, const std::string& name) override { if (!_child) return true; diff --git a/include/nbl/ext/MitsubaLoader/CElementRFilter.h b/include/nbl/ext/MitsubaLoader/CElementRFilter.h index 0905e40a2f..631dc4e82c 100644 --- a/include/nbl/ext/MitsubaLoader/CElementRFilter.h +++ b/include/nbl/ext/MitsubaLoader/CElementRFilter.h @@ -13,10 +13,10 @@ namespace nbl::ext::MitsubaLoader { -class CElementRFilter : public IElement +class CElementRFilter final : public IElement { public: - enum Type + enum Type : uint8_t { INVALID, BOX, @@ -40,16 +40,16 @@ class CElementRFilter : public IElement int32_t lobes = 3; }; - CElementRFilter(const char* id) : IElement(id), type(GAUSSIAN) + inline CElementRFilter(const char* id) : IElement(id), type(GAUSSIAN) { gaussian = Gaussian(); } - virtual ~CElementRFilter() {} + inline ~CElementRFilter() {} - bool addProperty(SNamedPropertyElement&& _property) override; + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; - IElement::Type getType() const override { return IElement::Type::RFILTER; } - std::string getLogName() const override { return "rfilter"; } + inline IElement::Type getType() const override { return IElement::Type::RFILTER; } + inline std::string getLogName() const override { return "rfilter"; } // make these public Type type; diff --git a/include/nbl/ext/MitsubaLoader/CElementSampler.h b/include/nbl/ext/MitsubaLoader/CElementSampler.h index 2df888cce6..313e649b2e 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSampler.h +++ b/include/nbl/ext/MitsubaLoader/CElementSampler.h @@ -15,7 +15,7 @@ class CGlobalMitsubaMetadata; class CElementSampler : public IElement { public: - enum Type + enum Type : uint8_t { INVALID, INDEPENDENT, @@ -26,13 +26,13 @@ class CElementSampler : public IElement SOBOL }; - CElementSampler(const char* id) : IElement(id), type(INVALID), sampleCount(4) {} - virtual ~CElementSampler() {} + inline CElementSampler(const char* id) : IElement(id), type(INVALID), sampleCount(4) {} + inline ~CElementSampler() {} - bool addProperty(SNamedPropertyElement&& _property) override; + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; - IElement::Type getType() const override { return IElement::Type::SAMPLER; } - std::string getLogName() const override { return "sampler"; } + inline IElement::Type getType() const override { return IElement::Type::SAMPLER; } + inline std::string getLogName() const override { return "sampler"; } // make these public Type type; diff --git a/include/nbl/ext/MitsubaLoader/CElementSensor.h b/include/nbl/ext/MitsubaLoader/CElementSensor.h index f8b69cc1c5..fa8bc07509 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSensor.h +++ b/include/nbl/ext/MitsubaLoader/CElementSensor.h @@ -14,8 +14,7 @@ namespace nbl::ext::MitsubaLoader { - -class CElementSensor : public IElement +class CElementSensor final : public IElement { public: enum Type @@ -35,8 +34,8 @@ class CElementSensor : public IElement struct ShutterSensor { - core::vectorSIMDf up = core::vectorSIMDf(0,1,0); - core::vectorSIMDf clipPlanes[MaxClipPlanes] = {}; + hlsl::float32_t3 up = hlsl::float32_t3(0,1,0); + hlsl::float32_t3 clipPlanes[MaxClipPlanes] = {}; float moveSpeed = core::nan(); float zoomSpeed = core::nan(); float rotateSpeed = core::nan(); @@ -101,14 +100,14 @@ class CElementSensor : public IElement kc; };*/ - CElementSensor(const char* id) : IElement(id), type(Type::INVALID), /*toWorldType(IElement::Type::TRANSFORM),*/ transform(), film(""), sampler("") + inline CElementSensor(const char* id) : IElement(id), type(Type::INVALID), /*toWorldType(IElement::Type::TRANSFORM),*/ transform(), film(""), sampler("") { } - CElementSensor(const CElementSensor& other) : IElement(""), transform(), film(""), sampler("") + inline CElementSensor(const CElementSensor& other) : IElement(""), transform(), film(""), sampler("") { operator=(other); } - virtual ~CElementSensor() + inline ~CElementSensor() { } @@ -151,12 +150,12 @@ class CElementSensor : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property) override; + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; - IElement::Type getType() const override { return IElement::Type::SENSOR; } - std::string getLogName() const override { return "sensor"; } + inline IElement::Type getType() const override { return IElement::Type::SENSOR; } + inline std::string getLogName() const override { return "sensor"; } - bool processChildData(IElement* _child, const std::string& name) override + inline bool processChildData(IElement* _child, const std::string& name) override { if (!_child) return true; diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index 41e7fdbc1c..39e163a5c4 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -15,7 +15,7 @@ namespace nbl::ext::MitsubaLoader { -class CElementShape : public IElement +class CElementShape final : public IElement { public: enum Type @@ -99,18 +99,18 @@ class CElementShape : public IElement CElementTexture* texture; };*/ - CElementShape(const char* id) : IElement(id), type(Type::INVALID), /*toWorldType(IElement::Type::TRANSFORM),*/ transform(), bsdf(nullptr), emitter(nullptr) + inline CElementShape(const char* id) : IElement(id), type(Type::INVALID), /*toWorldType(IElement::Type::TRANSFORM),*/ transform(), bsdf(nullptr), emitter(nullptr) { } - CElementShape(const CElementShape& other) : IElement(""), transform(), bsdf(nullptr), emitter(nullptr) + inline CElementShape(const CElementShape& other) : IElement(""), transform(), bsdf(nullptr), emitter(nullptr) { operator=(other); } - CElementShape(CElementShape&& other) : IElement(""), transform(), bsdf(nullptr), emitter(nullptr) + inline CElementShape(CElementShape&& other) : IElement(""), transform(), bsdf(nullptr), emitter(nullptr) { operator=(std::move(other)); } - virtual ~CElementShape() + inline ~CElementShape() { } @@ -215,20 +215,21 @@ class CElementShape : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property) override; + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; - IElement::Type getType() const override { return IElement::Type::SHAPE; } - std::string getLogName() const override { return "shape"; } + inline IElement::Type getType() const override { return IElement::Type::SHAPE; } + inline std::string getLogName() const override { return "shape"; } - inline core::matrix3x4SIMD getAbsoluteTransform() const + inline hlsl::float32_t3x4 getAbsoluteTransform() const { - auto local = transform.matrix.extractSub3x4(); + // explicit truncation + auto local = hlsl::float32_t3x4(transform.matrix); // TODO restore at some point (and make it actually work??) // note: INSTANCE can only contain SHAPEGROUP and the latter doesnt have its own transform //if (type==CElementShape::INSTANCE && instance.parent) - // return core::concatenateBFollowedByA(local,instance.parent->getAbsoluteTransform()); + // return mul(instance.parent->getAbsoluteTransform(),local); return local; } diff --git a/include/nbl/ext/MitsubaLoader/CElementTexture.h b/include/nbl/ext/MitsubaLoader/CElementTexture.h index 3bcd92001a..8a4f2b4c87 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTexture.h +++ b/include/nbl/ext/MitsubaLoader/CElementTexture.h @@ -235,7 +235,7 @@ class CElementTexture : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property) override; + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; IElement::Type getType() const override { return IElement::Type::TEXTURE; } std::string getLogName() const override { return "texture"; } diff --git a/include/nbl/ext/MitsubaLoader/CElementTransform.h b/include/nbl/ext/MitsubaLoader/CElementTransform.h index c1ca0203ea..701296664b 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTransform.h +++ b/include/nbl/ext/MitsubaLoader/CElementTransform.h @@ -11,17 +11,16 @@ namespace nbl::ext::MitsubaLoader { - -class CElementTransform : public IElement +class CElementTransform final : public IElement { public: - CElementTransform() : IElement(""), matrix() {} - virtual ~CElementTransform() {} + inline CElementTransform() : IElement(""), matrix() {} + inline ~CElementTransform() {} - bool addProperty(SNamedPropertyElement&& _property) override; + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override { return true; } - IElement::Type getType() const override { return IElement::Type::TRANSFORM; } - std::string getLogName() const override { return "transform"; } + inline IElement::Type getType() const override { return IElement::Type::TRANSFORM; } + inline std::string getLogName() const override { return "transform"; } /* inline CElementTransform& operator=(const CElementTransform& other) { @@ -31,7 +30,7 @@ class CElementTransform : public IElement } */ - core::matrix4SIMD matrix; + hlsl::float32_t4x4 matrix; }; } diff --git a/include/nbl/ext/MitsubaLoader/IElement.h b/include/nbl/ext/MitsubaLoader/IElement.h index c3a4eb6e20..9128e1b274 100644 --- a/include/nbl/ext/MitsubaLoader/IElement.h +++ b/include/nbl/ext/MitsubaLoader/IElement.h @@ -6,6 +6,7 @@ #include "nbl/asset/interchange/IAssetLoader.h" + #include "nbl/ext/MitsubaLoader/PropertyElement.h" @@ -47,7 +48,7 @@ class IElement virtual IElement::Type getType() const = 0; virtual std::string getLogName() const = 0; - virtual bool addProperty(SNamedPropertyElement&& _property) = 0; + virtual bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) = 0; virtual bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) = 0; //! default implementation for elements that doesnt have any children virtual bool processChildData(IElement* _child, const std::string& name) @@ -55,7 +56,7 @@ class IElement return !_child; } // - static inline bool getTypeIDAndNameStrings(std::add_lvalue_reference::type outType, std::add_lvalue_reference::type outID, std::string& name, const char** _atts) + static inline bool getTypeIDAndNameStrings(std::add_lvalue_reference_t outType, std::add_lvalue_reference_t outID, std::string& name, const char** _atts) { outType = nullptr; outID = nullptr; @@ -75,7 +76,7 @@ class IElement } return outType; } - static inline bool getIDAndName(std::add_lvalue_reference::type id, std::string& name, const char** _atts) + static inline bool getIDAndName(std::add_lvalue_reference_t id, std::string& name, const char** _atts) { const char* thrownAwayType; getTypeIDAndNameStrings(thrownAwayType,id,name,_atts); diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index a0dc8a9c27..11d2d30170 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -139,6 +139,9 @@ class ParserManager final }; const core::unordered_map createElementTable; // + template requires std::is_base_of_v + static SNamedElement createElement(const char** _atts, SessionContext* ctx); + // static SNamedElement processAlias(const char** _atts, SessionContext* ctx); static SNamedElement processRef(const char** _atts, SessionContext* ctx); }; diff --git a/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp b/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp index 2bc7fc727a..11e68d619d 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp @@ -1,72 +1,78 @@ -// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" #include "nbl/ext/MitsubaLoader/CElementEmissionProfile.h" +#include "nbl/ext/MitsubaLoader/ParserUtil.h" #include -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader + +namespace nbl::ext::MitsubaLoader { template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) +auto ParserManager::createElement(const char** _atts, SessionContext* ctx) -> SNamedElement { const char* type; const char* id; std::string name; if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr, ""); + return {}; - CElementEmissionProfile* obj = _util->objects.construct(id); + CElementEmissionProfile* obj = ctx->objects.construct(id); if (!obj) - return CElementFactory::return_type(nullptr, ""); + return {}; - return CElementFactory::return_type(obj, std::move(name)); + return {obj,std::move(name)}; } -bool CElementEmissionProfile::addProperty(SNamedPropertyElement&& _property) { - if (_property.name == "filename") { - if (_property.type != SPropertyElementData::Type::STRING) { +bool CElementEmissionProfile::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) +{ + if (_property.name=="filename") + { + if (_property.type!=SPropertyElementData::Type::STRING) + { + invalidXMLFileStructure(logger,"'s `filename` must be a string type, instead it's: "+_property.type); return false; } filename = _property.getProperty(); return true; } - else if (_property.name == "normalization") { - if (_property.type != SPropertyElementData::Type::STRING) + else if (_property.name=="normalization") + { + if (_property.type!=SPropertyElementData::Type::STRING) + { + invalidXMLFileStructure(logger,"'s `normalization` must be a string type, instead it's: "+_property.type); return false; + } const auto normalizeS = std::string(_property.getProperty()); - if (normalizeS == "UNIT_MAX") + if (normalizeS=="UNIT_MAX") normalization = EN_UNIT_MAX; - else if(normalizeS == "UNIT_AVERAGE_OVER_IMPLIED_DOMAIN") + else if(normalizeS=="UNIT_AVERAGE_OVER_IMPLIED_DOMAIN") normalization = EN_UNIT_AVERAGE_OVER_IMPLIED_DOMAIN; - else if(normalizeS == "UNIT_AVERAGE_OVER_FULL_DOMAIN") + else if(normalizeS=="UNIT_AVERAGE_OVER_FULL_DOMAIN") normalization = EN_UNIT_AVERAGE_OVER_FULL_DOMAIN; else + { + invalidXMLFileStructure(logger,"'s `normalization` is unrecognized: "+ normalizeS); normalization = EN_NONE; + } return true; } - else if (_property.name == "flatten") + else if (_property.name=="flatten") { - if (_property.type != SPropertyElementData::Type::FLOAT) + if (_property.type!=SPropertyElementData::Type::FLOAT) return false; flatten = _property.getProperty(); - return true; } - else { - ParserLog::invalidXMLFileStructure("No emission profile can have such property set with name: " + _property.name); + else + { + invalidXMLFileStructure(logger,"No emission profile can have such property set with name: "+_property.name); return false; } } @@ -78,6 +84,4 @@ bool CElementEmissionProfile::processChildData(IElement* _child, const std::stri return false; } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index 7843cd30eb..dbff6ed1c9 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -1,30 +1,24 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - +#include "nbl/ext/MitsubaLoader/CElementIntegrator.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" - #include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" #include -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ +namespace nbl::ext::MitsubaLoader +{ template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) +auto ParserManager::createElement(const char** _atts, SessionContext* ctx) -> SNamedElement { const char* type; const char* id; std::string name; if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr,""); + return {}; static const core::unordered_map StringToType = { @@ -53,12 +47,12 @@ CElementFactory::return_type CElementFactory::createElement( { ParserLog::invalidXMLFileStructure("unknown type"); _NBL_DEBUG_BREAK_IF(false); - return CElementFactory::return_type(nullptr, ""); + return {}; } CElementIntegrator* obj = _util->objects.construct(id); if (!obj) - return CElementFactory::return_type(nullptr, ""); + return {}; obj->type = found->second; // defaults @@ -430,6 +424,4 @@ bool CElementIntegrator::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _ov return true; } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementSampler.cpp b/src/nbl/ext/MitsubaLoader/CElementSampler.cpp index 01306d2201..aff64d6b24 100644 --- a/src/nbl/ext/MitsubaLoader/CElementSampler.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementSampler.cpp @@ -1,17 +1,12 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - +#include "nbl/ext/MitsubaLoader/CElementSampler.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ +namespace nbl::ext::MitsubaLoader +{ template<> CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) @@ -133,6 +128,4 @@ bool CElementSampler::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _overr return true; } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementSensor.cpp b/src/nbl/ext/MitsubaLoader/CElementSensor.cpp index 06d8c53737..d2f336cad2 100644 --- a/src/nbl/ext/MitsubaLoader/CElementSensor.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementSensor.cpp @@ -2,28 +2,27 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/ext/MitsubaLoader/CElementSensor.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" #include -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader + +namespace nbl::ext::MitsubaLoader { - + template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) +auto ParserManager::createElement(const char** _atts, SessionContext* ctx) -> SNamedElement { const char* type; const char* id; std::string name; if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr,""); + return {}; - static const core::unordered_map StringToType = + // TODO: initialize this separately + static const core::unordered_map StringToType = { {"perspective", CElementSensor::Type::PERSPECTIVE}, {"thinlens", CElementSensor::Type::THINLENS}, @@ -39,14 +38,13 @@ CElementFactory::return_type CElementFactory::createElement(cons auto found = StringToType.find(type); if (found==StringToType.end()) { - ParserLog::invalidXMLFileStructure("unknown type"); - _NBL_DEBUG_BREAK_IF(false); - return CElementFactory::return_type(nullptr, ""); + ctx->invalidXMLFileStructure("unknown type"); + return {}; } - CElementSensor* obj = _util->objects.construct(id); + CElementSensor* obj = ctx->objects.construct(id); if (!obj) - return CElementFactory::return_type(nullptr, ""); + return {}; obj->type = found->second; // defaults @@ -79,7 +77,7 @@ CElementFactory::return_type CElementFactory::createElement(cons default: break; } - return CElementFactory::return_type(obj, std::move(name)); + return {obj,std::move(name)}; } bool CElementSensor::addProperty(SNamedPropertyElement&& _property) @@ -242,7 +240,7 @@ bool CElementSensor::addProperty(SNamedPropertyElement&& _property) bool CElementSensor::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* meta) { - if (type == Type::INVALID) + if (type==Type::INVALID) { ParserLog::invalidXMLFileStructure(getLogName() + ": type not specified"); _NBL_DEBUG_BREAK_IF(true); @@ -257,6 +255,4 @@ bool CElementSensor::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _overri return true; } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementTransform.cpp b/src/nbl/ext/MitsubaLoader/CElementTransform.cpp index 7d7f2922f1..6520cd8c90 100644 --- a/src/nbl/ext/MitsubaLoader/CElementTransform.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementTransform.cpp @@ -1,30 +1,25 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - +#include "nbl/ext/MitsubaLoader/CElementTransform.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ +namespace nbl::ext::MitsubaLoader +{ template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) +auto ParserManager::createElement(const char** _atts, SessionContext* ctx) -> SNamedElement { - if (IElement::invalidAttributeCount(_atts, 2u)) - return CElementFactory::return_type(nullptr,""); - if (core::strcmpi(_atts[0], "name")) - return CElementFactory::return_type(nullptr,""); + if (IElement::invalidAttributeCount(_atts,2u)) + return {}; + if (core::strcmpi(_atts[0],"name")) + return {}; - return CElementFactory::return_type(_util->objects.construct(),_atts[1]); + return {ctx->objects.construct(),_atts[1]}; } -bool CElementTransform::addProperty(SNamedPropertyElement&& _property) +bool CElementTransform::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { switch (_property.type) { @@ -37,12 +32,11 @@ bool CElementTransform::addProperty(SNamedPropertyElement&& _property) case SNamedPropertyElement::Type::SCALE: [[fallthrough]]; case SNamedPropertyElement::Type::LOOKAT: - matrix = core::concatenateBFollowedByA(_property.mvalue, matrix); + matrix = hlsl::mul(matrix,_property.mvalue); break; default: { - ParserLog::invalidXMLFileStructure("The transform element does not take child property: "+_property.type); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,"The transform element does not take child property: "+_property.type); return false; } break; @@ -51,6 +45,4 @@ bool CElementTransform::addProperty(SNamedPropertyElement&& _property) return true; } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CMakeLists.txt b/src/nbl/ext/MitsubaLoader/CMakeLists.txt index d4c361f84d..2adc31d7b6 100644 --- a/src/nbl/ext/MitsubaLoader/CMakeLists.txt +++ b/src/nbl/ext/MitsubaLoader/CMakeLists.txt @@ -17,6 +17,7 @@ set(NBL_EXT_MITSUBA_LOADER_H ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementBSDF.h ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementTexture.h ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementEmitter.h + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CElementEmissionProfile.h ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CMitsubaSerializedMetadata.h ${NBL_EXT_INTERNAL_INCLUDE_DIR}/ParserUtil.h ${NBL_EXT_INTERNAL_INCLUDE_DIR}/CSerializedLoader.h @@ -31,11 +32,12 @@ set(NBL_EXT_MITSUBA_LOADER_SRC # CElementFilm.cpp # CElementRFilter.cpp # CElementSampler.cpp -# CElementTransform.cpp + CElementTransform.cpp # CElementShape.cpp # CElementBSDF.cpp # CElementTexture.cpp # CElementEmitter.cpp + CElementEmissionProfile.cpp ParserUtil.cpp CSerializedLoader.cpp CMitsubaLoader.cpp diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index d34a2524fd..5dbf6a7e93 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -4,7 +4,18 @@ #include "nbl/ext/MitsubaLoader/ParserUtil.h" -// TODO: all of the element types +#include "nbl/ext/MitsubaLoader/CElementIntegrator.h" +#include "nbl/ext/MitsubaLoader/CElementSensor.h" +#include "nbl/ext/MitsubaLoader/CElementFilm.h" +#include "nbl/ext/MitsubaLoader/CElementRFilter.h" +#include "nbl/ext/MitsubaLoader/CElementSampler.h" +//#include "nbl/ext/MitsubaLoader/CElementShape.h" +#include "nbl/ext/MitsubaLoader/CElementTransform.h" +//#include "nbl/ext/MitsubaLoader/CElementAnimation.h" +//#include "nbl/ext/MitsubaLoader/CElementBSDF.h" +//#include "nbl/ext/MitsubaLoader/CElementTexture.h" +//#include "nbl/ext/MitsubaLoader/CElementEmitter.h" +#include "nbl/ext/MitsubaLoader/CElementEmissionProfile.h" #include "expat/lib/expat.h" @@ -193,7 +204,7 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts return; } - elements.top().element->addProperty(std::move(optProperty.value())); + elements.top().element->addProperty(std::move(optProperty.value()),session->params->logger); return; } @@ -278,20 +289,18 @@ ParserManager::ParserManager() : propertyElements({ "point", "vector", "matrix", "rotate", "translate", "scale", "lookat" }), propertyElementManager(), createElementTable({ -#if 0 // TODO - {"integrator", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - {"sensor", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - {"film", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - {"rfilter", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - {"sampler", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - {"shape", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - {"transform", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - //{"animation", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - {"bsdf", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - {"texture", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - {"emitter", {CElementFactory::createElement,.retvalGoesOnStack=true}}, - {"emissionprofile", {CElementFactory::createElement,.retvalGoesOnStack=true}}, -#endif +// {"integrator", {.create=createElement,.retvalGoesOnStack=true}}, +// {"sensor", {.create=createElement,.retvalGoesOnStack=true}}, +// {"film", {.create=createElement,.retvalGoesOnStack=true}}, +// {"rfilter", {.create=createElement,.retvalGoesOnStack=true}}, +// {"sampler", {.create=createElement,.retvalGoesOnStack=true}}, +// {"shape", {.create=createElement,.retvalGoesOnStack=true}}, + {"transform", {.create=createElement,.retvalGoesOnStack=true}}, +// {"animation", {.create=createElement,.retvalGoesOnStack=true}}, +// {"bsdf", {.create=createElement,.retvalGoesOnStack=true}}, +// {"texture", {.create=createElement,.retvalGoesOnStack=true}}, +// {"emitter", {.create=createElement,.retvalGoesOnStack=true}}, + {"emissionprofile", {.create=createElement,.retvalGoesOnStack=true}}, {"alias", {.create=processAlias,.retvalGoesOnStack=true}}, {"ref", {.create=processRef,.retvalGoesOnStack=true}} }){} From 35931bd29a3fe66c1a76d644400cddd34a7d4718 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 15 Oct 2025 15:51:26 +0200 Subject: [PATCH 068/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 02924fbac8..7e4399a48b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 02924fbac84775596dd553b210016f5fb46834fc +Subproject commit 7e4399a48b6f47892b8406ff7853489653a6acaf From 1eb3eb988b7c878509c7a27bb007061af6445c93 Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 15 Oct 2025 17:14:15 +0200 Subject: [PATCH 069/472] get some parsing going --- include/nbl/ext/MitsubaLoader/CElementBSDF.h | 22 +-- .../MitsubaLoader/CElementEmissionProfile.h | 6 +- .../nbl/ext/MitsubaLoader/CElementEmitter.h | 4 +- include/nbl/ext/MitsubaLoader/CElementFilm.h | 33 ++++- .../ext/MitsubaLoader/CElementIntegrator.h | 95 +++++++++---- .../nbl/ext/MitsubaLoader/CElementRFilter.h | 49 ++++++- .../nbl/ext/MitsubaLoader/CElementSampler.h | 42 +++++- .../nbl/ext/MitsubaLoader/CElementSensor.h | 66 ++++++--- include/nbl/ext/MitsubaLoader/CElementShape.h | 2 +- .../nbl/ext/MitsubaLoader/CElementTexture.h | 9 +- .../nbl/ext/MitsubaLoader/CElementTransform.h | 2 +- .../nbl/ext/MitsubaLoader/CMitsubaMetadata.h | 13 +- include/nbl/ext/MitsubaLoader/IElement.h | 19 ++- include/nbl/ext/MitsubaLoader/ParserUtil.h | 4 +- .../nbl/ext/MitsubaLoader/PropertyElement.h | 1 + src/nbl/ext/MitsubaLoader/CElementBSDF.cpp | 7 +- .../MitsubaLoader/CElementEmissionProfile.cpp | 16 --- src/nbl/ext/MitsubaLoader/CElementFilm.cpp | 108 ++++----------- .../ext/MitsubaLoader/CElementIntegrator.cpp | 131 ++---------------- src/nbl/ext/MitsubaLoader/CElementRFilter.cpp | 92 ++---------- src/nbl/ext/MitsubaLoader/CElementSampler.cpp | 86 ++---------- src/nbl/ext/MitsubaLoader/CElementSensor.cpp | 88 ++---------- src/nbl/ext/MitsubaLoader/CElementShape.cpp | 9 +- src/nbl/ext/MitsubaLoader/CElementTexture.cpp | 21 +-- .../ext/MitsubaLoader/CElementTransform.cpp | 11 -- src/nbl/ext/MitsubaLoader/CMakeLists.txt | 10 +- src/nbl/ext/MitsubaLoader/ElementMacros.h | 119 ++++++++++++++++ src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 106 ++++++++++++-- 28 files changed, 574 insertions(+), 597 deletions(-) create mode 100644 src/nbl/ext/MitsubaLoader/ElementMacros.h diff --git a/include/nbl/ext/MitsubaLoader/CElementBSDF.h b/include/nbl/ext/MitsubaLoader/CElementBSDF.h index e11b2421a8..1d4a823e30 100644 --- a/include/nbl/ext/MitsubaLoader/CElementBSDF.h +++ b/include/nbl/ext/MitsubaLoader/CElementBSDF.h @@ -370,7 +370,7 @@ class CElementBSDF : public IElement } bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; + bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; IElement::Type getType() const override { return IElement::Type::BSDF; } std::string getLogName() const override { return "bsdf"; } @@ -380,16 +380,16 @@ class CElementBSDF : public IElement { switch (type) { - case COATING: [[fallthrough]]; - case ROUGHCOATING: [[fallthrough]]; - case TWO_SIDED: [[fallthrough]]; - case MASK: [[fallthrough]]; - case BLEND_BSDF: [[fallthrough]]; - case MIXTURE_BSDF: [[fallthrough]]; - case BUMPMAP: - return true; - default: - return false; + case COATING: [[fallthrough]]; + case ROUGHCOATING: [[fallthrough]]; + case TWO_SIDED: [[fallthrough]]; + case MASK: [[fallthrough]]; + case BLEND_BSDF: [[fallthrough]]; + case MIXTURE_BSDF: [[fallthrough]]; + case BUMPMAP: + return true; + default: + return false; } } diff --git a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h index 12491a9a70..a4b1d22485 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h @@ -44,9 +44,7 @@ struct CElementEmissionProfile final : public IElement } bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - inline bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override { - return true; - } + inline bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override {return true;} bool processChildData(IElement* _child, const std::string& name) override; inline IElement::Type getType() const override { return IElement::Type::EMISSION_PROFILE; } inline std::string getLogName() const override { return "emissionprofile "; } @@ -60,7 +58,7 @@ struct CElementEmissionProfile final : public IElement }; - std::string filename; + std::string filename; // TODO: test destructor runs E_NORMALIZE normalization; float flatten; // TODO: why is this named this way? }; diff --git a/include/nbl/ext/MitsubaLoader/CElementEmitter.h b/include/nbl/ext/MitsubaLoader/CElementEmitter.h index eb80657c54..389ec31fd5 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmitter.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmitter.h @@ -87,7 +87,7 @@ class CElementEmitter : public IElement };*/ struct EnvMap : SampledEmitter { - SPropertyElementData filename; + SPropertyElementData filename; // TODO: make sure destructor runs float scale = 1.f; float gamma = NAN; //bool cache = false; @@ -201,7 +201,7 @@ class CElementEmitter : public IElement } bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; + bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; IElement::Type getType() const override { return IElement::Type::EMITTER; } std::string getLogName() const override { return "emitter"; } diff --git a/include/nbl/ext/MitsubaLoader/CElementFilm.h b/include/nbl/ext/MitsubaLoader/CElementFilm.h index a16ff5d7ac..dcd7402132 100644 --- a/include/nbl/ext/MitsubaLoader/CElementFilm.h +++ b/include/nbl/ext/MitsubaLoader/CElementFilm.h @@ -22,6 +22,16 @@ class CElementFilm final : public IElement LDR_FILM, MFILM }; + static inline core::unordered_map compStringToTypeMap() + { + return { + {"hdrfilm", Type::HDR_FILM}, + {"tiledhdrfilm",Type::TILED_HDR}, + {"ldrfilm", Type::LDR_FILM}, + {"mfilm", Type::MFILM} + }; + } + enum PixelFormat : uint8_t { LUMINANCE, @@ -93,8 +103,29 @@ class CElementFilm final : public IElement { } + inline void initialize() + { + switch (type) + { + case CElementFilm::Type::LDR_FILM: + fileFormat = CElementFilm::FileFormat::PNG; + //componentFormat = UINT8; + ldrfilm = CElementFilm::LDR(); + break; + case CElementFilm::Type::MFILM: + width = 1; + height = 1; + fileFormat = CElementFilm::FileFormat::MATLAB; + pixelFormat = CElementFilm::PixelFormat::LUMINANCE; + mfilm = CElementFilm::M(); + break; + default: + break; + } + } + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; + bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; inline IElement::Type getType() const override { return IElement::Type::FILM; } inline std::string getLogName() const override { return "film"; } diff --git a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h index 72201f6cbb..d683204439 100644 --- a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h +++ b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h @@ -15,9 +15,8 @@ namespace nbl::ext::MitsubaLoader class CElementIntegrator final : public IElement { public: - enum Type + enum Type : uint8_t { - INVALID, AO, DIRECT, PATH, @@ -35,8 +34,33 @@ class CElementIntegrator final : public IElement VPL, IRR_CACHE, MULTI_CHANNEL, - FIELD_EXTRACT + FIELD_EXTRACT, + INVALID }; + static inline core::unordered_map compStringToTypeMap() + { + return { + {"ao", Type::AO}, + {"direct", Type::DIRECT}, + {"path", Type::PATH}, + {"volpath_simple", Type::VOL_PATH_SIMPLE}, + {"volpath", Type::VOL_PATH}, + {"bdpt", Type::BDPT}, + {"photonmapper", Type::PHOTONMAPPER}, + {"ppm", Type::PPM}, + {"sppm", Type::SPPM}, + {"pssmlt", Type::PSSMLT}, + {"mlt", Type::MLT}, + {"erpt", Type::ERPT}, + {"ptracer", Type::ADJ_P_TRACER}, + {"adaptive", Type::ADAPTIVE}, + {"vpl", Type::VPL}, + {"irrcache", Type::IRR_CACHE}, + {"multichannel", Type::MULTI_CHANNEL}, + {"field", Type::FIELD_EXTRACT} + }; + } + struct AmbientOcclusion { int32_t shadingSamples = 1; @@ -49,8 +73,8 @@ class CElementIntegrator final : public IElement }; struct DirectIllumination : EmitterHideableBase { - int32_t emitterSamples = 0xdeadbeefu; - int32_t bsdfSamples = 0xdeadbeefu; + int32_t emitterSamples = static_cast(0xdeadbeefu); + int32_t bsdfSamples = static_cast(0xdeadbeefu); bool strictNormals = false; }; struct MonteCarloTracingBase @@ -161,7 +185,7 @@ class CElementIntegrator final : public IElement } Type field; - SPropertyElementData undefined; + SPropertyElementData undefined; // TODO: test destructor runs }; struct MetaIntegrator { @@ -198,74 +222,89 @@ class CElementIntegrator final : public IElement { } - inline CElementIntegrator& operator=(const CElementIntegrator& other) + template + inline void visit(Visitor&& visitor) { - IElement::operator=(other); - type = other.type; switch (type) { case CElementIntegrator::Type::AO: - ao = other.ao; + visitor(ao); break; case CElementIntegrator::Type::DIRECT: - direct = other.direct; + visitor(direct); break; case CElementIntegrator::Type::PATH: - path = other.path; + visitor(path); break; case CElementIntegrator::Type::VOL_PATH_SIMPLE: - volpath_simple = other.volpath_simple; + visitor(volpath_simple); break; case CElementIntegrator::Type::VOL_PATH: - volpath = other.volpath; + visitor(volpath); break; case CElementIntegrator::Type::BDPT: - bdpt = other.bdpt; + visitor(bdpt); break; case CElementIntegrator::Type::PHOTONMAPPER: - photonmapper = other.photonmapper; + visitor(photonmapper); break; case CElementIntegrator::Type::PPM: - ppm = other.ppm; + visitor(ppm); break; case CElementIntegrator::Type::SPPM: - sppm = other.sppm; + visitor(sppm); break; case CElementIntegrator::Type::PSSMLT: - pssmlt = other.pssmlt; + visitor(pssmlt); break; case CElementIntegrator::Type::MLT: - mlt = other.mlt; + visitor(mlt); break; case CElementIntegrator::Type::ERPT: - erpt = other.erpt; + visitor(erpt); break; case CElementIntegrator::Type::ADJ_P_TRACER: - ptracer = other.ptracer; + visitor(ptracer); break; case CElementIntegrator::Type::ADAPTIVE: - adaptive = other.adaptive; + visitor(adaptive); break; case CElementIntegrator::Type::VPL: - vpl = other.vpl; + visitor(vpl); break; case CElementIntegrator::Type::IRR_CACHE: - irrcache = other.irrcache; + visitor(irrcache); break; case CElementIntegrator::Type::MULTI_CHANNEL: - multichannel = other.multichannel; + visitor(multichannel); break; case CElementIntegrator::Type::FIELD_EXTRACT: - field = other.field; + visitor(field); break; default: break; } + } + template + inline void visit(Visitor&& visitor) const + { + const_cast(this)->visit([&](T& var)->void + { + visitor(const_cast(var)); + } + ); + } + + inline CElementIntegrator& operator=(const CElementIntegrator& other) + { + IElement::operator=(other); + type = other.type; + IElement::copyVariant(this,&other); return *this; } bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; + bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; inline IElement::Type getType() const override { return IElement::Type::INTEGRATOR; } inline std::string getLogName() const override { return "integrator"; } diff --git a/include/nbl/ext/MitsubaLoader/CElementRFilter.h b/include/nbl/ext/MitsubaLoader/CElementRFilter.h index 631dc4e82c..0ca2426d73 100644 --- a/include/nbl/ext/MitsubaLoader/CElementRFilter.h +++ b/include/nbl/ext/MitsubaLoader/CElementRFilter.h @@ -26,6 +26,18 @@ class CElementRFilter final : public IElement CATMULLROM, LANCZOS }; + static inline core::unordered_map compStringToTypeMap() + { + return { + std::make_pair("box", Type::BOX), + std::make_pair("tent", Type::TENT), + std::make_pair("gaussian", Type::GAUSSIAN), + std::make_pair("mitchell", Type::MITCHELL), + std::make_pair("catmullrom", Type::CATMULLROM), + std::make_pair("lanczos", Type::LANCZOS) + }; + } + struct Gaussian { float sigma = NAN; // can't look at mitsuba source to figure out the default it uses @@ -46,8 +58,43 @@ class CElementRFilter final : public IElement } inline ~CElementRFilter() {} + template + inline void visit(Visitor&& visitor) + { + switch (type) + { + case Type::BOX: + [[fallthrough]]; + case Type::TENT: + break; + case Type::GAUSSIAN: + visit(gaussian); + break; + case Type::MITCHELL: + visit(mitchell); + break; + case Type::CATMULLROM: + visit(catmullrom); + break; + case Type::LANCZOS: + visit(lanczos); + break; + default: + break; + } + } + template + inline void visit(Visitor&& visitor) const + { + const_cast(this)->visit([&](T& var)->void + { + visitor(const_cast(var)); + } + ); + } + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; + bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; inline IElement::Type getType() const override { return IElement::Type::RFILTER; } inline std::string getLogName() const override { return "rfilter"; } diff --git a/include/nbl/ext/MitsubaLoader/CElementSampler.h b/include/nbl/ext/MitsubaLoader/CElementSampler.h index 313e649b2e..02e3ae6f6b 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSampler.h +++ b/include/nbl/ext/MitsubaLoader/CElementSampler.h @@ -17,29 +17,65 @@ class CElementSampler : public IElement public: enum Type : uint8_t { - INVALID, INDEPENDENT, STRATIFIED, LDSAMPLER, HALTON, HAMMERSLEY, - SOBOL + SOBOL, + INVALID }; + static inline core::unordered_map compStringToTypeMap() + { + return { + {"independent", Type::INDEPENDENT}, + {"stratified", Type::STRATIFIED}, + {"ldsampler", Type::LDSAMPLER}, + {"halton", Type::HALTON}, + {"hammersley", Type::HAMMERSLEY}, + {"sobol", Type::SOBOL} + }; + } inline CElementSampler(const char* id) : IElement(id), type(INVALID), sampleCount(4) {} inline ~CElementSampler() {} + inline void initialize() + { + sampleCount = 4; + switch (type) + { + case CElementSampler::Type::STRATIFIED: + [[fallthrough]]; + case CElementSampler::Type::LDSAMPLER: + dimension = 4; + break; + case CElementSampler::Type::HALTON: + [[fallthrough]]; + case CElementSampler::Type::HAMMERSLEY: + scramble = -1; + break; + case CElementSampler::Type::SOBOL: + scramble = 0; + break; + default: + break; + } + } + bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; + bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; inline IElement::Type getType() const override { return IElement::Type::SAMPLER; } inline std::string getLogName() const override { return "sampler"; } // make these public + // TODO: these should be bitfields of a uint64_t, or pack into 8 bytes somehow Type type; int32_t sampleCount; union { int32_t dimension; + // TODO: document scramble seed? int32_t scramble; }; }; diff --git a/include/nbl/ext/MitsubaLoader/CElementSensor.h b/include/nbl/ext/MitsubaLoader/CElementSensor.h index fa8bc07509..655a50b3e8 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSensor.h +++ b/include/nbl/ext/MitsubaLoader/CElementSensor.h @@ -17,9 +17,8 @@ namespace nbl::ext::MitsubaLoader class CElementSensor final : public IElement { public: - enum Type + enum Type : uint8_t { - INVALID, PERSPECTIVE, THINLENS, ORTHOGRAPHIC, @@ -28,8 +27,24 @@ class CElementSensor final : public IElement IRRADIANCEMETER, RADIANCEMETER, FLUENCEMETER, - PERSPECTIVE_RDIST + PERSPECTIVE_RDIST, + INVALID }; + static inline core::unordered_map compStringToTypeMap() + { + return { + {"perspective", Type::PERSPECTIVE}, + {"thinlens", Type::THINLENS}, + {"orthographic", Type::ORTHOGRAPHIC}, + {"telecentric", Type::TELECENTRIC}, + {"spherical", Type::SPHERICAL}, + {"irradiancemeter", Type::IRRADIANCEMETER}, + {"radiancemeter", Type::RADIANCEMETER}, + {"fluencemeter", Type::FLUENCEMETER}/*, + {"perspective_rdist", PERSPECTIVE_RDIST}*/ + }; + } + constexpr static inline uint8_t MaxClipPlanes = 6u; struct ShutterSensor @@ -111,47 +126,62 @@ class CElementSensor final : public IElement { } - inline CElementSensor& operator=(const CElementSensor& other) + template + inline void visit(Visitor&& visitor) { - IElement::operator=(other); - type = other.type; - transform = other.transform; switch (type) { case CElementSensor::Type::PERSPECTIVE: - perspective = other.perspective; + visitor(perspective); break; case CElementSensor::Type::THINLENS: - thinlens = other.thinlens; + visitor(thinlens); break; case CElementSensor::Type::ORTHOGRAPHIC: - orthographic = other.orthographic; + visitor(orthographic); break; case CElementSensor::Type::TELECENTRIC: - telecentric = other.telecentric; + visitor(telecentric); break; case CElementSensor::Type::SPHERICAL: - spherical = other.spherical; + visitor(spherical); break; case CElementSensor::Type::IRRADIANCEMETER: - irradiancemeter = other.irradiancemeter; + visitor(irradiancemeter); break; case CElementSensor::Type::RADIANCEMETER: - radiancemeter = other.radiancemeter; + visitor(radiancemeter); break; case CElementSensor::Type::FLUENCEMETER: - fluencemeter = other.fluencemeter; + visitor(fluencemeter); break; default: break; } + } + template + inline void visit(Visitor&& visitor) const + { + const_cast(this)->visit([&](T& var)->void + { + visitor(const_cast(var)); + } + ); + } + + inline CElementSensor& operator=(const CElementSensor& other) + { + IElement::operator=(other); + type = other.type; + transform = other.transform; + IElement::copyVariant(this,&other); film = other.film; sampler = other.sampler; return *this; } bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; + bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; inline IElement::Type getType() const override { return IElement::Type::SENSOR; } inline std::string getLogName() const override { return "sensor"; } @@ -181,12 +211,12 @@ class CElementSensor final : public IElement break;*/ case IElement::Type::FILM: film = *static_cast(_child); - if (film.type != CElementFilm::Type::INVALID) + if (film.type!=CElementFilm::Type::INVALID) return true; break; case IElement::Type::SAMPLER: sampler = *static_cast(_child); - if (sampler.type != CElementSampler::Type::INVALID) + if (sampler.type!=CElementSampler::Type::INVALID) return true; break; } diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index 39e163a5c4..08dfa75783 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -216,7 +216,7 @@ class CElementShape final : public IElement } bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; + bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; inline IElement::Type getType() const override { return IElement::Type::SHAPE; } inline std::string getLogName() const override { return "shape"; } diff --git a/include/nbl/ext/MitsubaLoader/CElementTexture.h b/include/nbl/ext/MitsubaLoader/CElementTexture.h index 8a4f2b4c87..ec2ff2d1bf 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTexture.h +++ b/include/nbl/ext/MitsubaLoader/CElementTexture.h @@ -133,7 +133,7 @@ class CElementTexture : public IElement Z*/ }; - SPropertyElementData filename; + SPropertyElementData filename; // TODO: make sure destructor runs WRAP_MODE wrapModeU = REPEAT; WRAP_MODE wrapModeV = REPEAT; float gamma = NAN; @@ -236,9 +236,10 @@ class CElementTexture : public IElement } bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override; - IElement::Type getType() const override { return IElement::Type::TEXTURE; } - std::string getLogName() const override { return "texture"; } + bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; + + inline IElement::Type getType() const override { return IElement::Type::TEXTURE; } + inline std::string getLogName() const override { return "texture"; } bool processChildData(IElement* _child, const std::string& name) override; diff --git a/include/nbl/ext/MitsubaLoader/CElementTransform.h b/include/nbl/ext/MitsubaLoader/CElementTransform.h index 701296664b..292a081bba 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTransform.h +++ b/include/nbl/ext/MitsubaLoader/CElementTransform.h @@ -18,7 +18,7 @@ class CElementTransform final : public IElement inline ~CElementTransform() {} bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; - bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) override { return true; } + inline bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override {return true;} inline IElement::Type getType() const override { return IElement::Type::TRANSFORM; } inline std::string getLogName() const override { return "transform"; } /* diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h index bf24e9d1ff..6f24951c50 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h @@ -10,8 +10,8 @@ //#include "nbl/ext/MitsubaLoader/SContext.h" //#include "nbl/ext/MitsubaLoader/CElementEmitter.h" -//#include "nbl/ext/MitsubaLoader/CElementIntegrator.h" -//#include "nbl/ext/MitsubaLoader/CElementSensor.h" +#include "nbl/ext/MitsubaLoader/CElementIntegrator.h" +#include "nbl/ext/MitsubaLoader/CElementSensor.h" //#include "nbl/ext/MitsubaLoader/CElementShape.h" @@ -36,17 +36,16 @@ class CMitsubaMetadata : public asset::IAssetMetadata CElementShape::Type type; }; +#endif struct SGlobal { public: - SGlobal() : m_integrator("invalid") {}// TODO - - inline uint32_t getVTStorageViewCount() const { return m_VT->getFloatViews().size(); } + inline SGlobal() : m_integrator("invalid") {}// TODO CElementIntegrator m_integrator; core::vector m_sensors; } m_global; -#endif + inline CMitsubaMetadata() : IAssetMetadata()/*, m_metaMeshStorage(), m_metaMeshInstanceStorage(), m_metaMeshInstanceAuxStorage(), m_meshStorageIt(nullptr), m_instanceStorageIt(nullptr), m_instanceAuxStorageIt(nullptr)*/ { @@ -63,7 +62,7 @@ class CMitsubaMetadata : public asset::IAssetMetadata } #endif private: - friend class CMitsubaLoader; +// friend class CMitsubaLoader; #if 0 meta_container_t m_metaMeshStorage; CMesh* m_meshStorageIt; diff --git a/include/nbl/ext/MitsubaLoader/IElement.h b/include/nbl/ext/MitsubaLoader/IElement.h index 9128e1b274..b0f2d34c3b 100644 --- a/include/nbl/ext/MitsubaLoader/IElement.h +++ b/include/nbl/ext/MitsubaLoader/IElement.h @@ -49,12 +49,13 @@ class IElement virtual std::string getLogName() const = 0; virtual bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) = 0; - virtual bool onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) = 0; + virtual bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) = 0; //! default implementation for elements that doesnt have any children virtual bool processChildData(IElement* _child, const std::string& name) { return !_child; } + // static inline bool getTypeIDAndNameStrings(std::add_lvalue_reference_t outType, std::add_lvalue_reference_t outID, std::string& name, const char** _atts) { @@ -106,6 +107,22 @@ class IElement return _atts[attrCount]; } + + // + template + static inline void copyVariant(Derived* to, const Derived* from) + { + to->visit([from](auto& selfEl)->void + { + from->visit([&selfEl](const auto& otherEl)->void + { + if constexpr (std::is_same_v,std::decay_t>) + selfEl = otherEl; + } + ); + } + ); + } }; } diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index 11d2d30170..997d95f68a 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -139,8 +139,8 @@ class ParserManager final }; const core::unordered_map createElementTable; // - template requires std::is_base_of_v - static SNamedElement createElement(const char** _atts, SessionContext* ctx); + template + struct CreateElement; // static SNamedElement processAlias(const char** _atts, SessionContext* ctx); static SNamedElement processRef(const char** _atts, SessionContext* ctx); diff --git a/include/nbl/ext/MitsubaLoader/PropertyElement.h b/include/nbl/ext/MitsubaLoader/PropertyElement.h index 8ecdce7fdd..320e12712e 100644 --- a/include/nbl/ext/MitsubaLoader/PropertyElement.h +++ b/include/nbl/ext/MitsubaLoader/PropertyElement.h @@ -14,6 +14,7 @@ namespace nbl::ext::MitsubaLoader // maybe move somewhere inline void invalidXMLFileStructure(system::logger_opt_ptr logger, const std::string& errorMessage) { + // TODO: print the line in the XML or something std::string message = "Mitsuba loader error - Invalid .xml file structure: \'" + errorMessage + '\''; logger.log(message,system::ILogger::E_LOG_LEVEL::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); diff --git a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp index a9620c18a2..93187d723f 100644 --- a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp @@ -807,12 +807,7 @@ bool CElementBSDF::processChildData(IElement* _child, const std::string& name) bool CElementBSDF::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* metadata) { - if (type == Type::INVALID) - { - ParserLog::invalidXMLFileStructure(getLogName() + ": type not specified"); - _NBL_DEBUG_BREAK_IF(true); - return true; - } + NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(true); // TODO: Validation { diff --git a/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp b/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp index 11e68d619d..0f360ccba0 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp @@ -10,22 +10,6 @@ namespace nbl::ext::MitsubaLoader { -template<> -auto ParserManager::createElement(const char** _atts, SessionContext* ctx) -> SNamedElement -{ - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return {}; - - CElementEmissionProfile* obj = ctx->objects.construct(id); - if (!obj) - return {}; - - return {obj,std::move(name)}; -} - bool CElementEmissionProfile::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { if (_property.name=="filename") diff --git a/src/nbl/ext/MitsubaLoader/CElementFilm.cpp b/src/nbl/ext/MitsubaLoader/CElementFilm.cpp index ba22c58fb9..7f17cbe64d 100644 --- a/src/nbl/ext/MitsubaLoader/CElementFilm.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementFilm.cpp @@ -1,75 +1,18 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - +#include "nbl/ext/MitsubaLoader/CElementFilm.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" #include -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ - - -template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) +namespace nbl::ext::MitsubaLoader { - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr, ""); - - static const core::unordered_map StringToType = - { - {"hdrfilm", CElementFilm::Type::HDR_FILM}, - {"tiledhdrfilm",CElementFilm::Type::TILED_HDR}, - {"ldrfilm", CElementFilm::Type::LDR_FILM}, - {"mfilm", CElementFilm::Type::MFILM} - }; - - auto found = StringToType.find(type); - if (found==StringToType.end()) - { - ParserLog::invalidXMLFileStructure("unknown type"); - _NBL_DEBUG_BREAK_IF(false); - return CElementFactory::return_type(nullptr, ""); - } - CElementFilm* obj = _util->objects.construct(id); - if (!obj) - return CElementFactory::return_type(nullptr, ""); - - obj->type = found->second; - // defaults - switch (obj->type) - { - case CElementFilm::Type::LDR_FILM: - obj->fileFormat = CElementFilm::FileFormat::PNG; - //obj->componentFormat = UINT8; - obj->ldrfilm = CElementFilm::LDR(); - break; - case CElementFilm::Type::MFILM: - obj->width = 1; - obj->height = 1; - obj->fileFormat = CElementFilm::FileFormat::MATLAB; - obj->pixelFormat = CElementFilm::PixelFormat::LUMINANCE; - obj->mfilm = CElementFilm::M(); - break; - default: - break; - } - return CElementFactory::return_type(obj, std::move(name)); -} - - -bool CElementFilm::addProperty(SNamedPropertyElement&& _property) +bool CElementFilm::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { +#if 0 bool error = type==Type::INVALID; #define SET_PROPERTY(MEMBER,PROPERTY_TYPE) [&]() -> void { \ if (_property.type!=PROPERTY_TYPE) { \ @@ -311,16 +254,19 @@ bool CElementFilm::addProperty(SNamedPropertyElement&& _property) auto found = SetPropertyMap.find(_property.name); if (found == SetPropertyMap.end()) { - _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("No Film can have such property set with name: " + _property.name+"\nRemember we don't support \"render-time annotations\""); + + invalidXMLFileStructure(logger,"No Film can have such property set with name: " + _property.name+"\nRemember we don't support \"render-time annotations\""); return false; } found->second(); return !error; +#endif + assert(false); + return false; } -bool CElementFilm::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* metadata) +bool CElementFilm::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) { cropOffsetX = std::max(cropOffsetX,0); cropOffsetY = std::max(cropOffsetY,0); @@ -339,8 +285,8 @@ bool CElementFilm::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override case PFM: break; default: - ParserLog::invalidXMLFileStructure(getLogName() + ": film type does not support this file format"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,getLogName() + ": film type does not support this file format"); + return false; }; break; @@ -350,8 +296,8 @@ bool CElementFilm::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override case OPENEXR: break; default: - ParserLog::invalidXMLFileStructure(getLogName() + ": film type does not support this file format"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,getLogName() + ": film type does not support this file format"); + return false; }; break; @@ -363,8 +309,8 @@ bool CElementFilm::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override case JPEG: break; default: - ParserLog::invalidXMLFileStructure(getLogName() + ": film type does not support this file format"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,getLogName() + ": film type does not support this file format"); + return false; }; switch (pixelFormat) @@ -378,8 +324,8 @@ bool CElementFilm::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override case XYZ: [[fallthrough]]; case XYZA: - ParserLog::invalidXMLFileStructure(getLogName() + ": film type does not support this pixel format"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,getLogName() + ": film type does not support this pixel format"); + return false; break; default: @@ -396,8 +342,8 @@ bool CElementFilm::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override case NUMPY: break; default: - ParserLog::invalidXMLFileStructure(getLogName() + ": film type does not support this file format"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,getLogName() + ": film type does not support this file format"); + return false; }; switch (pixelFormat) @@ -405,8 +351,8 @@ bool CElementFilm::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override case XYZ: [[fallthrough]]; case XYZA: - ParserLog::invalidXMLFileStructure(getLogName() + ": film type does not support this pixel format"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,getLogName() + ": film type does not support this pixel format"); + return false; break; default: @@ -417,14 +363,14 @@ bool CElementFilm::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override case FLOAT32: break; default: - ParserLog::invalidXMLFileStructure(getLogName() + ": film type does not support this component format"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,getLogName() + ": film type does not support this component format"); + return false; }; break; default: - ParserLog::invalidXMLFileStructure(getLogName() + ": type not specified"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,getLogName() + ": type not specified"); + return false; } @@ -432,6 +378,4 @@ bool CElementFilm::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index dbff6ed1c9..964bbe4211 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -2,125 +2,20 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/ext/MitsubaLoader/CElementIntegrator.h" -#include "nbl/ext/MitsubaLoader/ParserUtil.h" #include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" +#include "nbl/ext/MitsubaLoader/ElementMacros.h" + #include namespace nbl::ext::MitsubaLoader { -template<> -auto ParserManager::createElement(const char** _atts, SessionContext* ctx) -> SNamedElement -{ - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return {}; - - static const core::unordered_map StringToType = - { - {"ao", CElementIntegrator::Type::AO}, - {"direct", CElementIntegrator::Type::DIRECT}, - {"path", CElementIntegrator::Type::PATH}, - {"volpath_simple", CElementIntegrator::Type::VOL_PATH_SIMPLE}, - {"volpath", CElementIntegrator::Type::VOL_PATH}, - {"bdpt", CElementIntegrator::Type::BDPT}, - {"photonmapper", CElementIntegrator::Type::PHOTONMAPPER}, - {"ppm", CElementIntegrator::Type::PPM}, - {"sppm", CElementIntegrator::Type::SPPM}, - {"pssmlt", CElementIntegrator::Type::PSSMLT}, - {"mlt", CElementIntegrator::Type::MLT}, - {"erpt", CElementIntegrator::Type::ERPT}, - {"ptracer", CElementIntegrator::Type::ADJ_P_TRACER}, - {"adaptive", CElementIntegrator::Type::ADAPTIVE}, - {"vpl", CElementIntegrator::Type::VPL}, - {"irrcache", CElementIntegrator::Type::IRR_CACHE}, - {"multichannel", CElementIntegrator::Type::MULTI_CHANNEL}, - {"field", CElementIntegrator::Type::FIELD_EXTRACT} - }; - - auto found = StringToType.find(type); - if (found==StringToType.end()) - { - ParserLog::invalidXMLFileStructure("unknown type"); - _NBL_DEBUG_BREAK_IF(false); - return {}; - } - - CElementIntegrator* obj = _util->objects.construct(id); - if (!obj) - return {}; - - obj->type = found->second; - // defaults - switch (obj->type) - { - case CElementIntegrator::Type::AO: - obj->ao = CElementIntegrator::AmbientOcclusion(); - break; - case CElementIntegrator::Type::DIRECT: - obj->direct = CElementIntegrator::DirectIllumination(); - break; - case CElementIntegrator::Type::PATH: - obj->path = CElementIntegrator::PathTracing(); - break; - case CElementIntegrator::Type::VOL_PATH_SIMPLE: - obj->volpath_simple = CElementIntegrator::SimpleVolumetricPathTracing(); - break; - case CElementIntegrator::Type::VOL_PATH: - obj->volpath = CElementIntegrator::ExtendedVolumetricPathTracing(); - break; - case CElementIntegrator::Type::BDPT: - obj->bdpt = CElementIntegrator::BiDirectionalPathTracing(); - break; - case CElementIntegrator::Type::PHOTONMAPPER: - obj->photonmapper = CElementIntegrator::PhotonMapping(); - break; - case CElementIntegrator::Type::PPM: - obj->ppm = CElementIntegrator::ProgressivePhotonMapping(); - break; - case CElementIntegrator::Type::SPPM: - obj->sppm = CElementIntegrator::StochasticProgressivePhotonMapping(); - break; - case CElementIntegrator::Type::PSSMLT: - obj->pssmlt = CElementIntegrator::PrimarySampleSpaceMetropolisLightTransport(); - break; - case CElementIntegrator::Type::MLT: - obj->mlt = CElementIntegrator::PathSpaceMetropolisLightTransport(); - break; - case CElementIntegrator::Type::ERPT: - obj->erpt = CElementIntegrator::EnergyRedistributionPathTracing(); - break; - case CElementIntegrator::Type::ADJ_P_TRACER: - obj->ptracer = CElementIntegrator::AdjointParticleTracing(); - break; - case CElementIntegrator::Type::ADAPTIVE: - obj->adaptive = CElementIntegrator::AdaptiveIntegrator(); - break; - case CElementIntegrator::Type::VPL: - obj->vpl = CElementIntegrator::VirtualPointLights(); - break; - case CElementIntegrator::Type::IRR_CACHE: - obj->irrcache = CElementIntegrator::IrradianceCacheIntegrator(); - break; - case CElementIntegrator::Type::MULTI_CHANNEL: - obj->multichannel = CElementIntegrator::MultiChannelIntegrator(); - break; - case CElementIntegrator::Type::FIELD_EXTRACT: - obj->field = CElementIntegrator::FieldExtraction(); - break; - default: - break; - } - return CElementFactory::return_type(obj, std::move(name)); -} - -bool CElementIntegrator::addProperty(SNamedPropertyElement&& _property) +bool CElementIntegrator::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { bool error = false; +#if 0 auto dispatch = [&](auto func) -> void { switch (type) @@ -391,23 +286,20 @@ bool CElementIntegrator::addProperty(SNamedPropertyElement&& _property) auto found = SetPropertyMap.find(_property.name); if (found==SetPropertyMap.end()) { - _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("No Integrator can have such property set with name: "+_property.name); + invalidXMLFileStructure("No Integrator can have such property set with name: "+_property.name); return false; } found->second(); return !error; +#endif + assert(false); + return false; } -bool CElementIntegrator::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* metadata) +bool CElementIntegrator::onEndTag(CMitsubaMetadata* metadata, system::logger_opt_ptr logger) { - if (type == Type::INVALID) - { - ParserLog::invalidXMLFileStructure(getLogName() + ": type not specified"); - _NBL_DEBUG_BREAK_IF(true); - return true; - } + NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(true); // TODO: Validation { @@ -415,8 +307,7 @@ bool CElementIntegrator::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _ov if (metadata->m_global.m_integrator.type!=Type::INVALID) { - ParserLog::invalidXMLFileStructure(getLogName() + ": already specified an integrator"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,"already specified an integrator, NOT overwriting."); return true; } metadata->m_global.m_integrator = *this; diff --git a/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp b/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp index 160fd5aadb..de5bdf2255 100644 --- a/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp @@ -1,92 +1,30 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #include "nbl/core/string/stringutil.h" +#include "nbl/ext/MitsubaLoader/CElementRFilter.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ +#include "nbl/ext/MitsubaLoader/ElementMacros.h" -template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) +namespace nbl::ext::MitsubaLoader { - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr, ""); - - static const core::unordered_map StringToType = - { - std::make_pair("box", CElementRFilter::Type::BOX), - std::make_pair("tent", CElementRFilter::Type::TENT), - std::make_pair("gaussian", CElementRFilter::Type::GAUSSIAN), - std::make_pair("mitchell", CElementRFilter::Type::MITCHELL), - std::make_pair("catmullrom", CElementRFilter::Type::CATMULLROM), - std::make_pair("lanczos", CElementRFilter::Type::LANCZOS) - }; - - auto found = StringToType.find(type); - if (found==StringToType.end()) - { - ParserLog::invalidXMLFileStructure("unknown type"); - _NBL_DEBUG_BREAK_IF(false); - return CElementFactory::return_type(nullptr, ""); - } - - CElementRFilter* obj = _util->objects.construct(id); - if (!obj) - return CElementFactory::return_type(nullptr, ""); - - obj->type = found->second; - //validation - switch (obj->type) - { - case CElementRFilter::Type::BOX: - [[fallthrough]]; - case CElementRFilter::Type::TENT: - break; - case CElementRFilter::Type::GAUSSIAN: - obj->gaussian = CElementRFilter::Gaussian(); - break; - case CElementRFilter::Type::MITCHELL: - obj->mitchell = CElementRFilter::MitchellNetravali(); - break; - case CElementRFilter::Type::CATMULLROM: - obj->catmullrom = CElementRFilter::MitchellNetravali(); - break; - case CElementRFilter::Type::LANCZOS: - obj->lanczos = CElementRFilter::LanczosSinc(); - break; - default: - break; - } - return CElementFactory::return_type(obj, std::move(name)); -} -bool CElementRFilter::addProperty(SNamedPropertyElement&& _property) +bool CElementRFilter::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { if (_property.type == SNamedPropertyElement::Type::INTEGER) { if (core::strcmpi(_property.name,std::string("lobes"))) { - ParserLog::invalidXMLFileStructure("\"lobes\" must be an integer property"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,"\"lobes\" must be an integer property"); return false; } lanczos.lobes = _property.ivalue; return true; } - else - if (_property.type == SNamedPropertyElement::Type::FLOAT) + else if (_property.type == SNamedPropertyElement::Type::FLOAT) { if (core::strcmpi(_property.name,std::string("b"))==0) { @@ -109,31 +47,21 @@ bool CElementRFilter::addProperty(SNamedPropertyElement&& _property) return true; } else - ParserLog::invalidXMLFileStructure("unsupported rfilter property called: "+_property.name); + invalidXMLFileStructure(logger,"unsupported rfilter property called: "+_property.name); } else - { - ParserLog::invalidXMLFileStructure("this reconstruction filter type does not take this parameter type for parameter: " + _property.name); - _NBL_DEBUG_BREAK_IF(true); - } + invalidXMLFileStructure(logger,"this reconstruction filter type does not take this parameter type for parameter: " + _property.name); return false; } -bool CElementRFilter::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) +bool CElementRFilter::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) { - if (type == Type::INVALID) - { - ParserLog::invalidXMLFileStructure(getLogName() + ": type not specified"); - _NBL_DEBUG_BREAK_IF(true); - return true; - } + NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(true); // TODO: Validation return true; } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementSampler.cpp b/src/nbl/ext/MitsubaLoader/CElementSampler.cpp index aff64d6b24..3dbb2db9f7 100644 --- a/src/nbl/ext/MitsubaLoader/CElementSampler.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementSampler.cpp @@ -2,71 +2,16 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/ext/MitsubaLoader/CElementSampler.h" -#include "nbl/ext/MitsubaLoader/ParserUtil.h" +#include "nbl/ext/MitsubaLoader/ElementMacros.h" namespace nbl::ext::MitsubaLoader { -template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) -{ - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr, ""); - - static const core::unordered_map StringToType = - { - std::make_pair("independent", CElementSampler::Type::INDEPENDENT), - std::make_pair("stratified", CElementSampler::Type::STRATIFIED), - std::make_pair("ldsampler", CElementSampler::Type::LDSAMPLER), - std::make_pair("halton", CElementSampler::Type::HALTON), - std::make_pair("hammersley", CElementSampler::Type::HAMMERSLEY), - std::make_pair("sobol", CElementSampler::Type::SOBOL) - }; - - auto found = StringToType.find(type); - if (found==StringToType.end()) - { - ParserLog::invalidXMLFileStructure("unknown type"); - _NBL_DEBUG_BREAK_IF(false); - return CElementFactory::return_type(nullptr, ""); - } - CElementSampler* obj = _util->objects.construct(id); - if (!obj) - return CElementFactory::return_type(nullptr, ""); - - obj->type = found->second; - obj->sampleCount = 4; - //validation - switch (obj->type) - { - case CElementSampler::Type::STRATIFIED: - [[fallthrough]]; - case CElementSampler::Type::LDSAMPLER: - obj->dimension = 4; - break; - case CElementSampler::Type::HALTON: - [[fallthrough]]; - case CElementSampler::Type::HAMMERSLEY: - obj->scramble = -1; - break; - case CElementSampler::Type::SOBOL: - obj->scramble = 0; - break; - default: - break; - } - return CElementFactory::return_type(obj, std::move(name)); -} - -bool CElementSampler::addProperty(SNamedPropertyElement&& _property) +bool CElementSampler::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { - if (_property.type == SNamedPropertyElement::Type::INTEGER && - _property.name == "sampleCount") + if (_property.type==SNamedPropertyElement::Type::INTEGER && _property.name=="sampleCount") { sampleCount = _property.ivalue; switch (type) @@ -81,47 +26,36 @@ bool CElementSampler::addProperty(SNamedPropertyElement&& _property) break; } } - else - if (_property.type == SNamedPropertyElement::Type::INTEGER && - _property.name == "dimension") + else if (_property.type == SNamedPropertyElement::Type::INTEGER && _property.name == "dimension") { dimension = _property.ivalue; if (type == Type::INDEPENDENT || type == Type::HALTON || type == Type::HAMMERSLEY) { - ParserLog::invalidXMLFileStructure("this sampler type does not take these parameters"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,"this sampler type ("+std::to_string(type)+") does not take these parameters"); return false; } } - else - if (_property.type == SNamedPropertyElement::Type::INTEGER && - _property.name == "scramble") + else if (_property.type == SNamedPropertyElement::Type::INTEGER && _property.name == "scramble") { scramble = _property.ivalue; if (type==Type::INDEPENDENT || type==Type::STRATIFIED || type == Type::LDSAMPLER) { - ParserLog::invalidXMLFileStructure("this sampler type does not take these parameters"); - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,"this sampler type ("+std::to_string(type)+") does not take these parameters"); return false; } } else { - _NBL_DEBUG_BREAK_IF(true); + invalidXMLFileStructure(logger,"unknown property named `"+_property.name+"` of type "+std::to_string(_property.type)); return false; } return true; } -bool CElementSampler::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) +bool CElementSampler::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) { - if (type == Type::INVALID) - { - ParserLog::invalidXMLFileStructure(getLogName() + ": type not specified"); - _NBL_DEBUG_BREAK_IF(true); - return true; - } + NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(true); // TODO: Validation diff --git a/src/nbl/ext/MitsubaLoader/CElementSensor.cpp b/src/nbl/ext/MitsubaLoader/CElementSensor.cpp index d2f336cad2..ccc0cfb40e 100644 --- a/src/nbl/ext/MitsubaLoader/CElementSensor.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementSensor.cpp @@ -5,6 +5,7 @@ #include "nbl/ext/MitsubaLoader/CElementSensor.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" +#include "nbl/ext/MitsubaLoader/ElementMacros.h" #include @@ -12,77 +13,10 @@ namespace nbl::ext::MitsubaLoader { -template<> -auto ParserManager::createElement(const char** _atts, SessionContext* ctx) -> SNamedElement -{ - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return {}; - - // TODO: initialize this separately - static const core::unordered_map StringToType = - { - {"perspective", CElementSensor::Type::PERSPECTIVE}, - {"thinlens", CElementSensor::Type::THINLENS}, - {"orthographic", CElementSensor::Type::ORTHOGRAPHIC}, - {"telecentric", CElementSensor::Type::TELECENTRIC}, - {"spherical", CElementSensor::Type::SPHERICAL}, - {"irradiancemeter", CElementSensor::Type::IRRADIANCEMETER}, - {"radiancemeter", CElementSensor::Type::RADIANCEMETER}, - {"fluencemeter", CElementSensor::Type::FLUENCEMETER}/*, - {"perspective_rdist", CElementSensor::PERSPECTIVE_RDIST}*/ - }; - - auto found = StringToType.find(type); - if (found==StringToType.end()) - { - ctx->invalidXMLFileStructure("unknown type"); - return {}; - } - - CElementSensor* obj = ctx->objects.construct(id); - if (!obj) - return {}; - - obj->type = found->second; - // defaults - switch (obj->type) - { - case CElementSensor::Type::PERSPECTIVE: - obj->perspective = CElementSensor::PerspectivePinhole(); - break; - case CElementSensor::Type::THINLENS: - obj->thinlens = CElementSensor::PerspectiveThinLens(); - break; - case CElementSensor::Type::ORTHOGRAPHIC: - obj->orthographic = CElementSensor::Orthographic(); - break; - case CElementSensor::Type::TELECENTRIC: - obj->telecentric = CElementSensor::TelecentricLens(); - break; - case CElementSensor::Type::SPHERICAL: - obj->spherical = CElementSensor::SphericalCamera(); - break; - case CElementSensor::Type::IRRADIANCEMETER: - obj->irradiancemeter = CElementSensor::IrradianceMeter(); - break; - case CElementSensor::Type::RADIANCEMETER: - obj->radiancemeter = CElementSensor::RadianceMeter(); - break; - case CElementSensor::Type::FLUENCEMETER: - obj->fluencemeter = CElementSensor::FluenceMeter(); - break; - default: - break; - } - return {obj,std::move(name)}; -} - -bool CElementSensor::addProperty(SNamedPropertyElement&& _property) +bool CElementSensor::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { bool error = false; +#if 0 auto dispatch = [&](auto func) -> void { switch (type) @@ -221,8 +155,8 @@ bool CElementSensor::addProperty(SNamedPropertyElement&& _property) {"nearClip", setNearClip}, {"farClip", setFarClip}, {"focusDistance", setFocusDistance}, - {"apertureRadius", setApertureRadius}/*, - {"kc", setKc}*/ + {"apertureRadius", setApertureRadius} +//, {"kc", setKc} }; @@ -236,16 +170,14 @@ bool CElementSensor::addProperty(SNamedPropertyElement&& _property) found->second(); return !error; +#endif + assert(false); + return false; } -bool CElementSensor::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* meta) +bool CElementSensor::onEndTag(CMitsubaMetadata* meta, system::logger_opt_ptr logger) { - if (type==Type::INVALID) - { - ParserLog::invalidXMLFileStructure(getLogName() + ": type not specified"); - _NBL_DEBUG_BREAK_IF(true); - return true; - } + NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(true); // TODO: some validation diff --git a/src/nbl/ext/MitsubaLoader/CElementShape.cpp b/src/nbl/ext/MitsubaLoader/CElementShape.cpp index 8e6c468821..0af5b97172 100644 --- a/src/nbl/ext/MitsubaLoader/CElementShape.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementShape.cpp @@ -274,14 +274,9 @@ bool CElementShape::processChildData(IElement* _child, const std::string& name) return false; } -bool CElementShape::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) +bool CElementShape::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) { - if (type == Type::INVALID) - { - ParserLog::invalidXMLFileStructure(getLogName() + ": type not specified"); - _NBL_DEBUG_BREAK_IF(true); - return true; - } + NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(true); // TODO: some validation diff --git a/src/nbl/ext/MitsubaLoader/CElementTexture.cpp b/src/nbl/ext/MitsubaLoader/CElementTexture.cpp index 1ae16b8c07..410ab8508f 100644 --- a/src/nbl/ext/MitsubaLoader/CElementTexture.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementTexture.cpp @@ -1,17 +1,13 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #include "nbl/ext/MitsubaLoader/ParserUtil.h" #include "nbl/ext/MitsubaLoader/CElementFactory.h" #include -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader + +namespace nbl::ext::MitsubaLoader { @@ -284,14 +280,9 @@ bool CElementTexture::processChildData(IElement* _child, const std::string& name return true; } -bool CElementTexture::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* globalMetadata) +bool CElementTexture::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) { - if (type == Type::INVALID) - { - ParserLog::invalidXMLFileStructure(getLogName() + ": type not specified"); - _NBL_DEBUG_BREAK_IF(true); - return true; - } + NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(true); // TODO: Validation { @@ -300,6 +291,4 @@ bool CElementTexture::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _overr return true; } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementTransform.cpp b/src/nbl/ext/MitsubaLoader/CElementTransform.cpp index 6520cd8c90..60496ad714 100644 --- a/src/nbl/ext/MitsubaLoader/CElementTransform.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementTransform.cpp @@ -7,17 +7,6 @@ namespace nbl::ext::MitsubaLoader { - -template<> -auto ParserManager::createElement(const char** _atts, SessionContext* ctx) -> SNamedElement -{ - if (IElement::invalidAttributeCount(_atts,2u)) - return {}; - if (core::strcmpi(_atts[0],"name")) - return {}; - - return {ctx->objects.construct(),_atts[1]}; -} bool CElementTransform::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { diff --git a/src/nbl/ext/MitsubaLoader/CMakeLists.txt b/src/nbl/ext/MitsubaLoader/CMakeLists.txt index 2adc31d7b6..7bedf035b8 100644 --- a/src/nbl/ext/MitsubaLoader/CMakeLists.txt +++ b/src/nbl/ext/MitsubaLoader/CMakeLists.txt @@ -27,11 +27,11 @@ set(NBL_EXT_MITSUBA_LOADER_H set(NBL_EXT_MITSUBA_LOADER_SRC PropertyElement.cpp -# CElementIntegrator.cpp -# CElementSensor.cpp -# CElementFilm.cpp -# CElementRFilter.cpp -# CElementSampler.cpp + CElementIntegrator.cpp + CElementSensor.cpp + CElementFilm.cpp + CElementRFilter.cpp + CElementSampler.cpp CElementTransform.cpp # CElementShape.cpp # CElementBSDF.cpp diff --git a/src/nbl/ext/MitsubaLoader/ElementMacros.h b/src/nbl/ext/MitsubaLoader/ElementMacros.h new file mode 100644 index 0000000000..4f87e8ebc1 --- /dev/null +++ b/src/nbl/ext/MitsubaLoader/ElementMacros.h @@ -0,0 +1,119 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/ext/MitsubaLoader/IElement.h" +#include "nbl/ext/MitsubaLoader/ParserUtil.h" + +/* +template<> +CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) +{ + const char* type; + const char* id; + std::string name; + if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) + return CElementFactory::return_type(nullptr, ""); + + static const core::unordered_map StringToType = + { + {"independent", CElementSampler::Type::INDEPENDENT}, + {"stratified", CElementSampler::Type::STRATIFIED}, + {"ldsampler", CElementSampler::Type::LDSAMPLER}, + {"halton", CElementSampler::Type::HALTON}, + {"hammersley", CElementSampler::Type::HAMMERSLEY}, + {"sobol", CElementSampler::Type::SOBOL} + }; + + auto found = StringToType.find(type); + if (found==StringToType.end()) + { + ParserLog::invalidXMLFileStructure("unknown type"); + _NBL_DEBUG_BREAK_IF(false); + return CElementFactory::return_type(nullptr, ""); + } + + CElementSampler* obj = _util->objects.construct(id); + if (!obj) + return CElementFactory::return_type(nullptr, ""); + + obj->type = found->second; + obj->sampleCount = 4; + //validation + switch (obj->type) + { + case CElementSampler::Type::STRATIFIED: + [[fallthrough]]; + case CElementSampler::Type::LDSAMPLER: + obj->dimension = 4; + break; + case CElementSampler::Type::HALTON: + [[fallthrough]]; + case CElementSampler::Type::HAMMERSLEY: + obj->scramble = -1; + break; + case CElementSampler::Type::SOBOL: + obj->scramble = 0; + break; + default: + break; + } + return CElementFactory::return_type(obj, std::move(name)); +} + +bool CElementSampler::addProperty(SNamedPropertyElement&& _property) +{ + if (_property.type == SNamedPropertyElement::Type::INTEGER && + _property.name == "sampleCount") + { + sampleCount = _property.ivalue; + switch (type) + { + case Type::STRATIFIED: + sampleCount = ceilf(sqrtf(sampleCount)); + break; + case Type::LDSAMPLER: + //sampleCount = core::roundUpToPoT(sampleCount); + break; + default: + break; + } + } + else + if (_property.type == SNamedPropertyElement::Type::INTEGER && + _property.name == "dimension") + { + dimension = _property.ivalue; + if (type == Type::INDEPENDENT || type == Type::HALTON || type == Type::HAMMERSLEY) + { + ParserLog::invalidXMLFileStructure("this sampler type does not take these parameters"); + _NBL_DEBUG_BREAK_IF(true); + return false; + } + } + else + if (_property.type == SNamedPropertyElement::Type::INTEGER && + _property.name == "scramble") + { + scramble = _property.ivalue; + if (type==Type::INDEPENDENT || type==Type::STRATIFIED || type == Type::LDSAMPLER) + { + ParserLog::invalidXMLFileStructure("this sampler type does not take these parameters"); + _NBL_DEBUG_BREAK_IF(true); + return false; + } + } + else + { + _NBL_DEBUG_BREAK_IF(true); + return false; + } + + return true; +} +*/ + +#define NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(NON_FATAL) if (type==Type::INVALID) \ +{ \ + invalidXMLFileStructure(logger,getLogName()+": type not specified"); \ + return NON_FATAL; \ +} \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 5dbf6a7e93..5f01edf95a 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -254,7 +254,7 @@ void ParserManager::XMLContext::onEnd(const char* _el) elements.pop(); auto& result = *session->result; - if (element.element && !element.element->onEndTag(session->params->_override,result.metadata.get())) + if (element.element && !element.element->onEndTag(result.metadata.get(),session->params->logger)) { killParseWithError(element.element->getLogName()+" could not onEndTag"); return; @@ -282,6 +282,84 @@ void ParserManager::XMLContext::onEnd(const char* _el) } } +// special specs +template<> +struct ParserManager::CreateElement +{ + static inline SNamedElement __call(const char** _atts, SessionContext* ctx) + { + if (IElement::invalidAttributeCount(_atts,2u)) + return {}; + if (core::strcmpi(_atts[0],"name")) + return {}; + + return {ctx->objects.construct(),_atts[1]}; + }; +}; +template<> +struct ParserManager::CreateElement +{ + static inline SNamedElement __call(const char** _atts, SessionContext* ctx) + { + const char* type; + const char* id; + std::string name; + if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) + return {}; + + CElementEmissionProfile* obj = ctx->objects.construct(id); + if (!obj) + return {}; + + return { obj,std::move(name) }; + }; +}; + +// default spec +template +concept HasTypeMap = requires() { + { T::compStringToTypeMap() } -> std::same_as>; +}; +template +concept HasVisit = requires() { + { std::declval().visit([](auto& selfV)->void {}) } -> std::same_as; +}; +template requires HasTypeMap +struct ParserManager::CreateElement +{ + static inline SNamedElement __call(const char** _atts, SessionContext* ctx) + { + const char* type; + const char* id; + std::string name; + if (!IElement::getTypeIDAndNameStrings(type,id,name,_atts)) + return {}; + + static const auto StringToTypeMap = Element::compStringToTypeMap(); // TODO: make a const member cause of DLL delay load + auto found = StringToTypeMap.find(type); + if (found==StringToTypeMap.end()) + { + ctx->invalidXMLFileStructure("unknown type"); + return {}; + } + + Element* obj = ctx->objects.construct(id); + if (!obj) + return {}; + + obj->type = found->second; + if constexpr (HasVisit) + obj->visit([](auto& selfV)->void + { + selfV = {}; + } + ); + else + obj->initialize(); + return {obj,std::move(name)}; + } +}; + // ParserManager::ParserManager() : propertyElements({ "float", "string", "boolean", "integer", @@ -289,21 +367,21 @@ ParserManager::ParserManager() : propertyElements({ "point", "vector", "matrix", "rotate", "translate", "scale", "lookat" }), propertyElementManager(), createElementTable({ -// {"integrator", {.create=createElement,.retvalGoesOnStack=true}}, -// {"sensor", {.create=createElement,.retvalGoesOnStack=true}}, -// {"film", {.create=createElement,.retvalGoesOnStack=true}}, -// {"rfilter", {.create=createElement,.retvalGoesOnStack=true}}, -// {"sampler", {.create=createElement,.retvalGoesOnStack=true}}, -// {"shape", {.create=createElement,.retvalGoesOnStack=true}}, - {"transform", {.create=createElement,.retvalGoesOnStack=true}}, -// {"animation", {.create=createElement,.retvalGoesOnStack=true}}, -// {"bsdf", {.create=createElement,.retvalGoesOnStack=true}}, -// {"texture", {.create=createElement,.retvalGoesOnStack=true}}, -// {"emitter", {.create=createElement,.retvalGoesOnStack=true}}, - {"emissionprofile", {.create=createElement,.retvalGoesOnStack=true}}, + {"integrator", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, + {"sensor", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, + {"film", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, + {"rfilter", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, + {"sampler", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, +// {"shape", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, + {"transform", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, +// {"animation", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, +// {"bsdf", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, +// {"texture", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, +// {"emitter", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, + {"emissionprofile", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, {"alias", {.create=processAlias,.retvalGoesOnStack=true}}, {"ref", {.create=processRef,.retvalGoesOnStack=true}} -}){} +}) {} auto ParserManager::processAlias(const char** _atts, SessionContext* ctx) -> SNamedElement { From 9745682ae0e430b4be9cacc4d48b454e94cf94ab Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 16 Oct 2025 15:32:27 +0200 Subject: [PATCH 070/472] I am very sick in the head. --- include/nbl/core/algorithm/utility.h | 24 +++ .../nbl/ext/MitsubaLoader/CElementSensor.h | 55 +++++-- include/nbl/ext/MitsubaLoader/IElement.h | 85 +++++++++- include/nbl/ext/MitsubaLoader/ParserUtil.h | 28 ++-- .../ext/MitsubaLoader/CElementIntegrator.cpp | 81 ++-------- src/nbl/ext/MitsubaLoader/CElementSensor.cpp | 149 +++++++----------- src/nbl/ext/MitsubaLoader/ElementMacros.h | 3 + src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 37 ++++- 8 files changed, 270 insertions(+), 192 deletions(-) diff --git a/include/nbl/core/algorithm/utility.h b/include/nbl/core/algorithm/utility.h index 136f1047cc..2ad3920839 100644 --- a/include/nbl/core/algorithm/utility.h +++ b/include/nbl/core/algorithm/utility.h @@ -18,6 +18,30 @@ struct type_list_size> : std::integral_constant inline constexpr size_t type_list_size_v = type_list_size::value; +template class, typename TypeList> +struct filter; +template class Pred, typename... T> +struct filter> +{ + using type = type_list<>; +}; + +template class Pred, typename T, typename... Ts> +struct filter> +{ + template + struct Cons; + template + struct Cons> + { + using type = type_list; + }; + + using type = std::conditional_t::value,typename Cons>::type>::type,typename filter>::type>; +}; +template class Pred, typename TypeList> +using filter_t = filter::type; + template class ListLikeOutT, template class X, typename ListLike> struct list_transform { diff --git a/include/nbl/ext/MitsubaLoader/CElementSensor.h b/include/nbl/ext/MitsubaLoader/CElementSensor.h index 655a50b3e8..d40b19bc5e 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSensor.h +++ b/include/nbl/ext/MitsubaLoader/CElementSensor.h @@ -30,27 +30,13 @@ class CElementSensor final : public IElement PERSPECTIVE_RDIST, INVALID }; - static inline core::unordered_map compStringToTypeMap() - { - return { - {"perspective", Type::PERSPECTIVE}, - {"thinlens", Type::THINLENS}, - {"orthographic", Type::ORTHOGRAPHIC}, - {"telecentric", Type::TELECENTRIC}, - {"spherical", Type::SPHERICAL}, - {"irradiancemeter", Type::IRRADIANCEMETER}, - {"radiancemeter", Type::RADIANCEMETER}, - {"fluencemeter", Type::FLUENCEMETER}/*, - {"perspective_rdist", PERSPECTIVE_RDIST}*/ - }; - } constexpr static inline uint8_t MaxClipPlanes = 6u; struct ShutterSensor { hlsl::float32_t3 up = hlsl::float32_t3(0,1,0); - hlsl::float32_t3 clipPlanes[MaxClipPlanes] = {}; + hlsl::float32_t4 clipPlanes[MaxClipPlanes] = {}; float moveSpeed = core::nan(); float zoomSpeed = core::nan(); float rotateSpeed = core::nan(); @@ -64,6 +50,8 @@ class CElementSensor final : public IElement }; struct PerspectivePinhole : CameraBase { + constexpr static inline Type VariantType = Type::PERSPECTIVE; + enum class FOVAxis { INVALID, @@ -86,6 +74,7 @@ class CElementSensor final : public IElement }; struct Orthographic : CameraBase { + constexpr static inline Type VariantType = Type::ORTHOGRAPHIC; }; struct DepthOfFieldBase { @@ -94,27 +83,59 @@ class CElementSensor final : public IElement }; struct PerspectiveThinLens : PerspectivePinhole, DepthOfFieldBase { + constexpr static inline Type VariantType = Type::THINLENS; }; struct TelecentricLens : Orthographic, DepthOfFieldBase { + constexpr static inline Type VariantType = Type::TELECENTRIC; }; struct SphericalCamera : CameraBase { + constexpr static inline Type VariantType = Type::SPHERICAL; }; struct IrradianceMeter : ShutterSensor { + constexpr static inline Type VariantType = Type::IRRADIANCEMETER; }; struct RadianceMeter : ShutterSensor { + constexpr static inline Type VariantType = Type::RADIANCEMETER; }; struct FluenceMeter : ShutterSensor { + constexpr static inline Type VariantType = Type::FLUENCEMETER; };/* struct PerspectivePinholeRadialDistortion : PerspectivePinhole { kc; };*/ + using variant_list_t = core::type_list< + PerspectivePinhole, + PerspectiveThinLens, + Orthographic, + TelecentricLens, + SphericalCamera, + IrradianceMeter, + RadianceMeter, + FluenceMeter + >; + static inline core::unordered_map compStringToTypeMap() + { + return { + {"perspective", Type::PERSPECTIVE}, + {"thinlens", Type::THINLENS}, + {"orthographic", Type::ORTHOGRAPHIC}, + {"telecentric", Type::TELECENTRIC}, + {"spherical", Type::SPHERICAL}, + {"irradiancemeter", Type::IRRADIANCEMETER}, + {"radiancemeter", Type::RADIANCEMETER}, + {"fluencemeter", Type::FLUENCEMETER}/*, + {"perspective_rdist", PERSPECTIVE_RDIST}*/ + }; + } + static AddPropertyMap compAddPropertyMap(); + inline CElementSensor(const char* id) : IElement(id), type(Type::INVALID), /*toWorldType(IElement::Type::TRANSFORM),*/ transform(), film(""), sampler("") { } @@ -182,7 +203,9 @@ class CElementSensor final : public IElement bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; - inline IElement::Type getType() const override { return IElement::Type::SENSOR; } + + constexpr static inline auto ElementType = IElement::Type::SENSOR; + inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "sensor"; } inline bool processChildData(IElement* _child, const std::string& name) override diff --git a/include/nbl/ext/MitsubaLoader/IElement.h b/include/nbl/ext/MitsubaLoader/IElement.h index b0f2d34c3b..20780a21c4 100644 --- a/include/nbl/ext/MitsubaLoader/IElement.h +++ b/include/nbl/ext/MitsubaLoader/IElement.h @@ -5,6 +5,7 @@ #define _NBL_EXT_MISTUBA_LOADER_I_ELEMENT_H_INCLUDED_ +#include "nbl/core/algorithm/utility.h" #include "nbl/asset/interchange/IAssetLoader.h" #include "nbl/ext/MitsubaLoader/PropertyElement.h" @@ -14,6 +15,21 @@ namespace nbl::ext::MitsubaLoader { class CMitsubaMetadata; +namespace impl +{ +template class Pred, typename... Args> +struct ToUnaryPred +{ + template + struct type : bool_constant::value> {}; +}; + +template +struct mpl_of_passing; +template +using mpl_of_passing_t = mpl_of_passing::type; +} + class IElement { public: @@ -108,7 +124,12 @@ class IElement return _atts[attrCount]; } - // + // if we used `variant` instead of union we could default implement this + //template + //static inline void defaultVisit(Derived* this) + //{ + // generated switch / visit of `Variant` + //} template static inline void copyVariant(Derived* to, const Derived* from) { @@ -123,7 +144,69 @@ class IElement } ); } + + // could move it to `nbl/builtin/hlsl/mpl` + template + struct mpl_array + { + constexpr static inline Type data[] = { values... }; + }; + // + template + struct AddPropertyCallback + { + using element_t = Derived; + // TODO: list or map of supported variants (if `visit` is present) + using func_t = bool(*)(Derived*,SNamedPropertyElement&&,const system::logger_opt_ptr); + + inline bool operator()(Derived* d, SNamedPropertyElement&& p, const system::logger_opt_ptr l) const {return func(d,std::move(p),l);} + + func_t func; + // will usually point at + std::span allowedVariantTypes = {}; + }; + template + using PropertyNameCallbackMap = core::unordered_map,core::CaseInsensitiveHash,core::CaseInsensitiveEquals>; + template + class AddPropertyMap + { + template + inline void registerCallback(const SNamedPropertyElement::Type type, std::string&& propertyName, AddPropertyCallback cb) + { + if constexpr (sizeof...(types)) + cb.allowedVariantTypes = mpl_array::data; + registerCallback(type,std::move(propertyName),cb); + } + + public: + inline void registerCallback(const SNamedPropertyElement::Type type, std::string&& propertyName, const AddPropertyCallback& cb) + { + auto [nameIt,inserted] = byPropertyType[type].emplace(std::move(propertyName),cb); + assert(inserted); + } + template class Pred, typename... Args> + inline void registerCallback(const SNamedPropertyElement::Type type, std::string&& propertyName, AddPropertyCallback::func_t cb) + { + AddPropertyCallback callback = {.func=cb}; + using UnaryPred = impl::ToUnaryPred; + using passing_types = core::filter_t; + if constexpr (core::type_list_size_v) + callback.allowedVariantTypes = impl::mpl_of_passing_t::data; + registerCallback(type,std::move(propertyName),callback); + } + + std::array,SNamedPropertyElement::Type::INVALID> byPropertyType = {}; + }; +}; + +namespace impl +{ +template +struct mpl_of_passing> +{ + using type = IElement::mpl_array; }; +} } #endif \ No newline at end of file diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index 997d95f68a..5787cc6c62 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -9,6 +9,7 @@ #include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" #include "nbl/ext/MitsubaLoader/PropertyElement.h" +#include "nbl/ext/MitsubaLoader/CElementSensor.h" #include "nbl/ext/MitsubaLoader/CElementShape.h" #include @@ -72,7 +73,22 @@ class ParserManager final const core::unordered_set propertyElements; const CPropertyElementManager propertyElementManager; + using supported_elements_t = core::type_list< +// CElementIntegrator, + CElementSensor +// CElementFilm, +// CElementRFilter, +// CElementSampler, +/// CElementShape, +/// CElementBSDF, +/// CElementTexture, +/// CElementEmitter, +// CElementEmissionProfile + >; + private: + const core::tuple_transform_t addPropertyMaps; + struct SNamedElement { IElement* element = nullptr; @@ -97,17 +113,7 @@ class ParserManager final // uint32_t sceneDeclCount = 0; // TODO: This leaks memory all over the place because destructors are not ran! - ElementPool objects = {}; + ElementPool<> objects = {}; // aliases and names (in Mitsbua XML you can give nodes names and `ref` them) core::unordered_map handles = {}; // stack of currently processed elements, each element of index N is parent of the element of index N+1 diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index 964bbe4211..04227927be 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -6,6 +6,7 @@ #include "nbl/ext/MitsubaLoader/ElementMacros.h" +#include "nbl/type_traits.h" // legacy stuff for `is_any_of` #include @@ -14,74 +15,12 @@ namespace nbl::ext::MitsubaLoader bool CElementIntegrator::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { + if (type>=Type::INVALID) + return false; bool error = false; #if 0 - auto dispatch = [&](auto func) -> void - { - switch (type) - { - case CElementIntegrator::Type::AO: - func(ao); - break; - case CElementIntegrator::Type::DIRECT: - func(direct); - break; - case CElementIntegrator::Type::PATH: - func(path); - break; - case CElementIntegrator::Type::VOL_PATH_SIMPLE: - func(volpath_simple); - break; - case CElementIntegrator::Type::VOL_PATH: - func(volpath); - break; - case CElementIntegrator::Type::BDPT: - func(bdpt); - break; - case CElementIntegrator::Type::PHOTONMAPPER: - func(photonmapper); - break; - case CElementIntegrator::Type::PPM: - func(ppm); - break; - case CElementIntegrator::Type::SPPM: - func(sppm); - break; - case CElementIntegrator::Type::PSSMLT: - func(pssmlt); - break; - case CElementIntegrator::Type::MLT: - func(mlt); - break; - case CElementIntegrator::Type::ERPT: - func(erpt); - break; - case CElementIntegrator::Type::ADJ_P_TRACER: - func(ptracer); - break; - case CElementIntegrator::Type::ADAPTIVE: - func(adaptive); - break; - case CElementIntegrator::Type::VPL: - func(vpl); - break; - case CElementIntegrator::Type::IRR_CACHE: - func(irrcache); - break; - case CElementIntegrator::Type::MULTI_CHANNEL: - func(multichannel); - break; - case CElementIntegrator::Type::FIELD_EXTRACT: - func(field); - break; - default: - error = true; - break; - } - }; - #define SET_PROPERTY_TEMPLATE(MEMBER,PROPERTY_TYPE, ... ) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ + visit([&](auto& state) -> void { \ if constexpr (is_any_of::type,__VA_ARGS__>::value) \ { \ if (_property.type!=PROPERTY_TYPE) { \ @@ -98,7 +37,7 @@ bool CElementIntegrator::addProperty(SNamedPropertyElement&& _property, system:: auto processBSDFSamples = SET_PROPERTY_TEMPLATE(bsdfSamples,SNamedPropertyElement::Type::INTEGER,DirectIllumination); auto processShadingSamples = [&]() -> void { - dispatch([&](auto& state) -> void { + visit([&](auto& state) -> void { using state_type = std::remove_reference::type; if constexpr (std::is_same::value) @@ -165,7 +104,7 @@ bool CElementIntegrator::addProperty(SNamedPropertyElement&& _property, system:: auto processClamping = SET_PROPERTY_TEMPLATE(clamping,SNamedPropertyElement::Type::FLOAT,VirtualPointLights); auto processField = [&]() -> void { - dispatch([&](auto& state) -> void + visit([&](auto& state) -> void { using state_type = std::remove_reference::type; if constexpr (std::is_same::value) @@ -197,7 +136,7 @@ bool CElementIntegrator::addProperty(SNamedPropertyElement&& _property, system:: }; auto processUndefined = [&]() -> void { - dispatch([&](auto& state) -> void { + visit([&](auto& state) -> void { using state_type = std::remove_reference::type; if constexpr (std::is_same::value) @@ -286,15 +225,13 @@ bool CElementIntegrator::addProperty(SNamedPropertyElement&& _property, system:: auto found = SetPropertyMap.find(_property.name); if (found==SetPropertyMap.end()) { - invalidXMLFileStructure("No Integrator can have such property set with name: "+_property.name); + invalidXMLFileStructure(logger,"No Integrator can have such property set with name: "+_property.name); return false; } found->second(); - return !error; #endif - assert(false); - return false; + return !error; } bool CElementIntegrator::onEndTag(CMitsubaMetadata* metadata, system::logger_opt_ptr logger) diff --git a/src/nbl/ext/MitsubaLoader/CElementSensor.cpp b/src/nbl/ext/MitsubaLoader/CElementSensor.cpp index ccc0cfb40e..86724403b4 100644 --- a/src/nbl/ext/MitsubaLoader/CElementSensor.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementSensor.cpp @@ -9,47 +9,74 @@ #include +template +struct derived_from : std::is_base_of {}; namespace nbl::ext::MitsubaLoader { +auto CElementSensor::compAddPropertyMap() -> AddPropertyMap +{ + using this_t = CElementSensor; + AddPropertyMap retval; + +// auto setUp = SET_PROPERTY_TEMPLATE(up, SNamedPropertyElement::Type::VECTOR, ShutterSensor); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(VECTOR,"clipPlane") + { + if (_property.getVectorDimension()!=4) + { + return false; + } + constexpr std::string_view Name = "clipPlane"; + const std::string_view sv(_property.name); + if (sv.length()!=Name.length()+1 || sv.find(Name)!=0) + { + return false; + } + const auto index = std::atoi(sv.data()+Name.length()); + if (index>MaxClipPlanes) + { + return false; + } + // everyone inherits from this + _this->perspective.clipPlanes[index] = _property.vvalue; + return true; + } + }); + +// auto setShiftX = SET_PROPERTY_TEMPLATE(shiftX, SNamedPropertyElement::Type::FLOAT, PerspectivePinhole); +// auto setShiftY = SET_PROPERTY_TEMPLATE(shiftY, SNamedPropertyElement::Type::FLOAT, PerspectivePinhole); +// auto setFov = SET_PROPERTY_TEMPLATE(fov, SNamedPropertyElement::Type::FLOAT, PerspectivePinhole); + retval.template registerCallback(SNamedPropertyElement::Type::STRING,"fovAxis",[](CElementSensor* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool + { + auto& state = _this->perspective; + static const core::unordered_map StringToType = + { + {"x", PerspectivePinhole::FOVAxis::X}, + {"y", PerspectivePinhole::FOVAxis::Y}, + {"diagonal",PerspectivePinhole::FOVAxis::DIAGONAL}, + {"smaller", PerspectivePinhole::FOVAxis::SMALLER}, + {"larger", PerspectivePinhole::FOVAxis::LARGER} + }; + auto found = StringToType.find(_property.svalue); + if (found!=StringToType.end()) + state.fovAxis = found->second; + else + state.fovAxis = PerspectivePinhole::FOVAxis::INVALID; + return true; + } + ); + + return retval; +} + bool CElementSensor::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { + if (type >= Type::INVALID) + return false; bool error = false; + #if 0 - auto dispatch = [&](auto func) -> void - { - switch (type) - { - case CElementSensor::Type::PERSPECTIVE: - func(perspective); - break; - case CElementSensor::Type::THINLENS: - func(thinlens); - break; - case CElementSensor::Type::ORTHOGRAPHIC: - func(orthographic); - break; - case CElementSensor::Type::TELECENTRIC: - func(telecentric); - break; - case CElementSensor::Type::SPHERICAL: - func(spherical); - break; - case CElementSensor::Type::IRRADIANCEMETER: - func(irradiancemeter); - break; - case CElementSensor::Type::RADIANCEMETER: - func(radiancemeter); - break; - case CElementSensor::Type::FLUENCEMETER: - func(fluencemeter); - break; - default: - error = true; - break; - } - }; #define SET_PROPERTY_TEMPLATE(MEMBER,PROPERTY_TYPE,BASE) [&]() -> void { \ dispatch([&](auto& state) -> void { \ @@ -65,62 +92,6 @@ bool CElementSensor::addProperty(SNamedPropertyElement&& _property, system::logg } auto setUp = SET_PROPERTY_TEMPLATE(up,SNamedPropertyElement::Type::VECTOR,ShutterSensor); - auto setClipPlane = [&]() -> void - { - dispatch([&](auto& state) -> void - { - if (_property.type!=SNamedPropertyElement::Type::VECTOR || _property.getVectorDimension()==4) - { - error = true; - return; - } - constexpr std::string_view Name = "clipPlane"; - const std::string_view sv(_property.name); - if (sv.length()!=Name.length()+1 || sv.find(Name)!=0) - { - error = true; - return; - } - const auto index = std::atoi(sv.data()+Name.length()); - if (index>MaxClipPlanes) - { - error = true; - return; - } - state.clipPlanes[index] = _property.vvalue; - }); - }; - auto setShiftX = SET_PROPERTY_TEMPLATE(shiftX,SNamedPropertyElement::Type::FLOAT,PerspectivePinhole); - auto setShiftY = SET_PROPERTY_TEMPLATE(shiftY,SNamedPropertyElement::Type::FLOAT,PerspectivePinhole); - auto setFov = SET_PROPERTY_TEMPLATE(fov,SNamedPropertyElement::Type::FLOAT,PerspectivePinhole); - auto setFovAxis = [&]() -> void - { - dispatch([&](auto& state) -> void - { - using state_type = std::remove_reference::type; - if constexpr (std::is_base_of::value) - { - if (_property.type!=SNamedPropertyElement::Type::STRING) - { - error = true; - return; - } - static const core::unordered_map StringToType = - { - {"x", PerspectivePinhole::FOVAxis::X}, - {"y", PerspectivePinhole::FOVAxis::Y}, - {"diagonal",PerspectivePinhole::FOVAxis::DIAGONAL}, - {"smaller", PerspectivePinhole::FOVAxis::SMALLER}, - {"larger", PerspectivePinhole::FOVAxis::LARGER} - }; - auto found = StringToType.find(_property.svalue); - if (found!=StringToType.end()) - state.fovAxis = found->second; - else - state.fovAxis = PerspectivePinhole::FOVAxis::INVALID; - } - }); - }; auto setShutterOpen = SET_PROPERTY_TEMPLATE(shutterOpen,SNamedPropertyElement::Type::FLOAT,ShutterSensor); auto setShutterClose = SET_PROPERTY_TEMPLATE(shutterClose,SNamedPropertyElement::Type::FLOAT,ShutterSensor); auto setMoveSpeed = SET_PROPERTY_TEMPLATE(moveSpeed,SNamedPropertyElement::Type::FLOAT,ShutterSensor); diff --git a/src/nbl/ext/MitsubaLoader/ElementMacros.h b/src/nbl/ext/MitsubaLoader/ElementMacros.h index 4f87e8ebc1..9492032ca1 100644 --- a/src/nbl/ext/MitsubaLoader/ElementMacros.h +++ b/src/nbl/ext/MitsubaLoader/ElementMacros.h @@ -4,6 +4,9 @@ #include "nbl/ext/MitsubaLoader/IElement.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" + +#define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(PROP_TYPE,NAME) retval.registerCallback(SNamedPropertyElement::Type::PROP_TYPE,NAME,{.func=[](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool + /* template<> CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 5f01edf95a..468fc92ce6 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -191,7 +191,8 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts killParseWithError("cannot set a property with no element on the stack."); return; } - if (!elements.top().element) + auto* element = elements.top().element; + if (!element) { session->invalidXMLFileStructure("cannot set property on element that failed to be created."); return; @@ -204,7 +205,35 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts return; } - elements.top().element->addProperty(std::move(optProperty.value()),session->params->logger); + bool unsupportedElement = true; + auto run = [&](const auto& map)->void + { + using element_t = std::remove_cvref_t::mapped_type::mapped_type::element_t; + if (element_t::ElementType==element->getType()) + { + unsupportedElement = false; + auto& property = optProperty.value(); + auto typeIt = map.find(property.type); + if (typeIt==map.end()) + { + session->invalidXMLFileStructure("There's no property supported by ElementType (TODO) with PropertyType (TODO)"); + return; + } + auto nameIt = typeIt->second.find(property.name); + if (nameIt==typeIt->second.end()) + { + session->invalidXMLFileStructure("There's no Property named (TODO) of Type (TODO) supported by ElementType (TODO)"); + return; + } + // TODO: visit variant type checks + nameIt->second(static_cast(element),std::move(property),session->params->logger); + } + }; + std::apply([&run](const auto&... maps)->void + { + (run(maps), ...); + },manager->addPropertyMaps + ); return; } @@ -381,7 +410,9 @@ ParserManager::ParserManager() : propertyElements({ {"emissionprofile", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, {"alias", {.create=processAlias,.retvalGoesOnStack=true}}, {"ref", {.create=processRef,.retvalGoesOnStack=true}} -}) {} +}), addPropertyMaps({ + CElementSensor::compAddPropertyMap() +}) { } auto ParserManager::processAlias(const char** _atts, SessionContext* ctx) -> SNamedElement { From 35d4a0a1261b1752c7c3f225c4ff3fa353a6306c Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 16 Oct 2025 15:48:17 +0200 Subject: [PATCH 071/472] now get it to compile --- include/nbl/ext/MitsubaLoader/IElement.h | 2 ++ src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 26 +++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/IElement.h b/include/nbl/ext/MitsubaLoader/IElement.h index 20780a21c4..68c2958fb2 100644 --- a/include/nbl/ext/MitsubaLoader/IElement.h +++ b/include/nbl/ext/MitsubaLoader/IElement.h @@ -179,6 +179,8 @@ class IElement } public: + using element_type = Derived; + inline void registerCallback(const SNamedPropertyElement::Type type, std::string&& propertyName, const AddPropertyCallback& cb) { auto [nameIt,inserted] = byPropertyType[type].emplace(std::move(propertyName),cb); diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 468fc92ce6..0ba2bad8d0 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -208,25 +208,32 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts bool unsupportedElement = true; auto run = [&](const auto& map)->void { - using element_t = std::remove_cvref_t::mapped_type::mapped_type::element_t; + using element_t = std::remove_cvref_t::element_type; if (element_t::ElementType==element->getType()) { unsupportedElement = false; auto& property = optProperty.value(); - auto typeIt = map.find(property.type); - if (typeIt==map.end()) + const auto& typeMap = map.byPropertyType[property.type]; + if (typeMap.empty()) { session->invalidXMLFileStructure("There's no property supported by ElementType (TODO) with PropertyType (TODO)"); return; } - auto nameIt = typeIt->second.find(property.name); - if (nameIt==typeIt->second.end()) + auto nameIt = typeMap.find(property.name); + if (nameIt==typeMap.end()) { session->invalidXMLFileStructure("There's no Property named (TODO) of Type (TODO) supported by ElementType (TODO)"); return; } - // TODO: visit variant type checks - nameIt->second(static_cast(element),std::move(property),session->params->logger); + const auto& callback = nameIt->second; + auto* typedElement = static_cast(element); + if constexpr (!std::is_same_v) + if (std::find(callback.allowedVariantTypes.begin(),callback.allowedVariantTypes.end(),typedElement->type)==callback.allowedVariantTypes.end()) + { + session->invalidXMLFileStructure("There's no Property named (TODO) of Type (TODO) not supported on ElementType (TODO) of Variant (TODO)"); + return; + } + callback(typedElement,std::move(property),session->params->logger); } }; std::apply([&run](const auto&... maps)->void @@ -234,6 +241,11 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts (run(maps), ...); },manager->addPropertyMaps ); + if (unsupportedElement) + { + session->invalidXMLFileStructure("Current Element Type doesn't have a AddPropertyMap at all (no property adding supported)!"); + return; + } return; } From 51dc3aa1a08d458bc18c2498fd1dbcf27d824d79 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 16 Oct 2025 16:19:25 +0200 Subject: [PATCH 072/472] This is a neat rewrite! --- src/nbl/ext/MitsubaLoader/CElementSensor.cpp | 148 ++++++------------- src/nbl/ext/MitsubaLoader/ElementMacros.h | 25 +++- 2 files changed, 71 insertions(+), 102 deletions(-) diff --git a/src/nbl/ext/MitsubaLoader/CElementSensor.cpp b/src/nbl/ext/MitsubaLoader/CElementSensor.cpp index 86724403b4..6982870fc6 100644 --- a/src/nbl/ext/MitsubaLoader/CElementSensor.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementSensor.cpp @@ -9,8 +9,6 @@ #include -template -struct derived_from : std::is_base_of {}; namespace nbl::ext::MitsubaLoader { @@ -20,36 +18,14 @@ auto CElementSensor::compAddPropertyMap() -> AddPropertyMap using this_t = CElementSensor; AddPropertyMap retval; -// auto setUp = SET_PROPERTY_TEMPLATE(up, SNamedPropertyElement::Type::VECTOR, ShutterSensor); - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(VECTOR,"clipPlane") - { - if (_property.getVectorDimension()!=4) - { - return false; - } - constexpr std::string_view Name = "clipPlane"; - const std::string_view sv(_property.name); - if (sv.length()!=Name.length()+1 || sv.find(Name)!=0) - { - return false; - } - const auto index = std::atoi(sv.data()+Name.length()); - if (index>MaxClipPlanes) - { - return false; - } - // everyone inherits from this - _this->perspective.clipPlanes[index] = _property.vvalue; - return true; - } - }); - -// auto setShiftX = SET_PROPERTY_TEMPLATE(shiftX, SNamedPropertyElement::Type::FLOAT, PerspectivePinhole); -// auto setShiftY = SET_PROPERTY_TEMPLATE(shiftY, SNamedPropertyElement::Type::FLOAT, PerspectivePinhole); -// auto setFov = SET_PROPERTY_TEMPLATE(fov, SNamedPropertyElement::Type::FLOAT, PerspectivePinhole); - retval.template registerCallback(SNamedPropertyElement::Type::STRING,"fovAxis",[](CElementSensor* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(up,VECTOR,derived_from,ShutterSensor); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shiftX,FLOAT,derived_from,PerspectivePinhole); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shiftY,FLOAT,derived_from,PerspectivePinhole); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(fov,FLOAT,derived_from,PerspectivePinhole); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("fovAxis",STRING,derived_from,PerspectivePinhole) { auto& state = _this->perspective; + // TODO: check if this gives problem with delay loads static const core::unordered_map StringToType = { {"x", PerspectivePinhole::FOVAxis::X}, @@ -67,81 +43,51 @@ auto CElementSensor::compAddPropertyMap() -> AddPropertyMap } ); - return retval; -} + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(up,VECTOR,derived_from,ShutterSensor); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shutterOpen,FLOAT,derived_from,ShutterSensor); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shutterClose,FLOAT,derived_from,ShutterSensor); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(moveSpeed,FLOAT,derived_from,ShutterSensor); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(zoomSpeed,FLOAT,derived_from,ShutterSensor); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(rotateSpeed,FLOAT,derived_from,ShutterSensor); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(nearClip,FLOAT,derived_from,CameraBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(farClip,FLOAT,derived_from,CameraBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(focusDistance,FLOAT,derived_from,DepthOfFieldBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(apertureRadius,FLOAT,derived_from,DepthOfFieldBase); + + // special + auto setClipPlane = [](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool + { + if (_property.getVectorDimension()!=4) + { + return false; + } + constexpr std::string_view Name = "clipPlane"; + const std::string_view sv(_property.name); + if (sv.length()!=Name.length()+1 || sv.find(Name)!=0) + { + return false; + } + const auto index = std::atoi(sv.data()+Name.length()); + if (index>MaxClipPlanes) + { + return false; + } + // everyone inherits from this + _this->perspective.clipPlanes[index] = _property.vvalue; + return true; + }; + for (auto i=0; i= Type::INVALID) - return false; - bool error = false; - -#if 0 - -#define SET_PROPERTY_TEMPLATE(MEMBER,PROPERTY_TYPE,BASE) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ - if constexpr (std::is_base_of::type >::value) \ - { \ - if (_property.type!=PROPERTY_TYPE) { \ - error = true; \ - return; \ - } \ - state. ## MEMBER = _property.getProperty(); \ - } \ - }); \ - } - - auto setUp = SET_PROPERTY_TEMPLATE(up,SNamedPropertyElement::Type::VECTOR,ShutterSensor); - auto setShutterOpen = SET_PROPERTY_TEMPLATE(shutterOpen,SNamedPropertyElement::Type::FLOAT,ShutterSensor); - auto setShutterClose = SET_PROPERTY_TEMPLATE(shutterClose,SNamedPropertyElement::Type::FLOAT,ShutterSensor); - auto setMoveSpeed = SET_PROPERTY_TEMPLATE(moveSpeed,SNamedPropertyElement::Type::FLOAT,ShutterSensor); - auto setZoomSpeed = SET_PROPERTY_TEMPLATE(zoomSpeed,SNamedPropertyElement::Type::FLOAT,ShutterSensor); - auto setRotateSpeed = SET_PROPERTY_TEMPLATE(rotateSpeed,SNamedPropertyElement::Type::FLOAT,ShutterSensor); - auto setNearClip = SET_PROPERTY_TEMPLATE(nearClip,SNamedPropertyElement::Type::FLOAT,CameraBase); - auto setFarClip = SET_PROPERTY_TEMPLATE(farClip,SNamedPropertyElement::Type::FLOAT,CameraBase); - auto setFocusDistance = SET_PROPERTY_TEMPLATE(focusDistance,SNamedPropertyElement::Type::FLOAT,DepthOfFieldBase); - auto setApertureRadius = SET_PROPERTY_TEMPLATE(apertureRadius,SNamedPropertyElement::Type::FLOAT,DepthOfFieldBase); + // TODOs: //auto setKc = SET_PROPERTY_TEMPLATE(apertureRadius,SNamedPropertyElement::Type::STRING,PerspectivePinholeRadialDistortion); + //{"focalLength", noIdeaHowToProcessValue}, - const core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> SetPropertyMap = - { - //{"focalLength", noIdeaHowToProcessValue}, - {"up", setUp}, - {"clipPlane0", setClipPlane}, - {"clipPlane1", setClipPlane}, - {"clipPlane2", setClipPlane}, - {"clipPlane3", setClipPlane}, - {"clipPlane4", setClipPlane}, - {"clipPlane5", setClipPlane}, - // UPDATE WHENEVER `MaxClipPlanes` changes! - {"shiftX", setShiftX}, - {"shiftY", setShiftY}, - {"fov", setFov}, - {"fovAxis", setFovAxis}, - {"shutterOpen", setShutterOpen}, - {"shuttterClose", setShutterClose}, - {"moveSpeed", setMoveSpeed}, - {"zoomSpeed", setZoomSpeed}, - {"rotateSpeed", setRotateSpeed}, - {"nearClip", setNearClip}, - {"farClip", setFarClip}, - {"focusDistance", setFocusDistance}, - {"apertureRadius", setApertureRadius} -//, {"kc", setKc} - }; - + return retval; +} - auto found = SetPropertyMap.find(_property.name); - if (found==SetPropertyMap.end()) - { - _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("No Integrator can have such property set with name: "+_property.name); - return false; - } - - found->second(); - return !error; -#endif +bool CElementSensor::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) +{ assert(false); return false; } diff --git a/src/nbl/ext/MitsubaLoader/ElementMacros.h b/src/nbl/ext/MitsubaLoader/ElementMacros.h index 9492032ca1..7a210c3b0a 100644 --- a/src/nbl/ext/MitsubaLoader/ElementMacros.h +++ b/src/nbl/ext/MitsubaLoader/ElementMacros.h @@ -5,7 +5,30 @@ #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(PROP_TYPE,NAME) retval.registerCallback(SNamedPropertyElement::Type::PROP_TYPE,NAME,{.func=[](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool +#define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(NAME,PROP_TYPE) retval.registerCallback(SNamedPropertyElement::Type::PROP_TYPE,NAME,{\ + .func=[](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool + +#define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(NAME,PROP_TYPE) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(#NAME,PROP_TYPE) {\ + _this->NAME = _property.getProperty(); \ + return true;}}) + + +#define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) retval.template registerCallback( \ + SNamedPropertyElement::Type::PROP_TYPE,NAME,[](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool + +#define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(#NAME,PROP_TYPE,CONSTRAINT,__VA_ARGS__) {\ + _this->visit([&_property](auto& state)->void{ \ + if constexpr (CONSTRAINT,__VA_ARGS__>::value) \ + state. ## NAME = _property.getProperty(); \ + }); return true;}) + + +// just to reverse `is_base_of` +namespace nbl::ext::MitsubaLoader +{ +template +struct derived_from : std::is_base_of {}; +} /* template<> From a892678a067c1c19a23011cdcc4c4323cf0ee5c1 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 16 Oct 2025 18:19:14 +0200 Subject: [PATCH 073/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 7e4399a48b..1500ce014c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 7e4399a48b6f47892b8406ff7853489653a6acaf +Subproject commit 1500ce014cceb14df35cd5009d8b126b34ef706f From 5e236e60ac71552b461e1971636f962c2b748a9d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 19 Oct 2025 12:58:40 +0200 Subject: [PATCH 074/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 1500ce014c..7d50d65b36 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1500ce014cceb14df35cd5009d8b126b34ef706f +Subproject commit 7d50d65b36124ded561018ac8e86de41141c96a3 From 7b2716811c4e282dff61500a3e2fa7f9c4cc1118 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 20 Oct 2025 10:38:39 +0200 Subject: [PATCH 075/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 7d50d65b36..c46f8f8755 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 7d50d65b36124ded561018ac8e86de41141c96a3 +Subproject commit c46f8f8755a26d5e3d1ba9587769902e4025248a From 77e9e573a0dcf380f0e4cd4bbbdd60b73ba83a22 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 20 Oct 2025 16:13:23 +0200 Subject: [PATCH 076/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c46f8f8755..ceada5e891 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c46f8f8755a26d5e3d1ba9587769902e4025248a +Subproject commit ceada5e891a70b1fcc8d456c4bd50ef3106ee181 From 699e841a3841693088aa3d515bd3addef72589ce Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 21 Oct 2025 14:13:27 +0200 Subject: [PATCH 077/472] update ImGUI extension to allow hijacking precompiled SPIRV shaders (experimental, no real validation), update examples_tests submodule --- examples_tests | 2 +- include/nbl/ext/ImGui/ImGui.h | 8 ++++++ .../nbl/ext/ImGui/builtin/hlsl/fragment.hlsl | 1 + .../nbl/ext/ImGui/builtin/hlsl/vertex.hlsl | 1 + src/nbl/ext/ImGui/ImGui.cpp | 28 ++++++++++++------- 5 files changed, 29 insertions(+), 11 deletions(-) diff --git a/examples_tests b/examples_tests index ceada5e891..349a850d4e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ceada5e891a70b1fcc8d456c4bd50ef3106ee181 +Subproject commit 349a850d4eb51619c33228c676eb1c84ac0f4a54 diff --git a/include/nbl/ext/ImGui/ImGui.h b/include/nbl/ext/ImGui/ImGui.h index 5f3c1d5f08..16f080d102 100644 --- a/include/nbl/ext/ImGui/ImGui.h +++ b/include/nbl/ext/ImGui/ImGui.h @@ -99,6 +99,14 @@ class UI final : public core::IReferenceCounted //! optional, no cache used if not provided core::smart_refctd_ptr pipelineCache = nullptr; + + struct PrecompiledShaders + { + core::smart_refctd_ptr vertex = nullptr, fragment = nullptr; + }; + + //! optional, precompiled spirv shaders (experimental) + std::optional spirv = std::nullopt; }; //! parameters which may change every frame, used with the .update call to interact with ImGuiIO; we require a very *required* minimum - if you need to cover more IO options simply get the IO with ImGui::GetIO() to customize them (they all have default values you can change before calling the .update) diff --git a/include/nbl/ext/ImGui/builtin/hlsl/fragment.hlsl b/include/nbl/ext/ImGui/builtin/hlsl/fragment.hlsl index 26e2b461a3..44ef6a0cb1 100644 --- a/include/nbl/ext/ImGui/builtin/hlsl/fragment.hlsl +++ b/include/nbl/ext/ImGui/builtin/hlsl/fragment.hlsl @@ -39,6 +39,7 @@ using namespace nbl::ext::imgui; to request per object data with BDA */ +[shader("pixel")] float4 PSMain(PSInput input) : SV_Target0 { // BDA for requesting object data diff --git a/include/nbl/ext/ImGui/builtin/hlsl/vertex.hlsl b/include/nbl/ext/ImGui/builtin/hlsl/vertex.hlsl index 1651060c58..78dbe10ac7 100644 --- a/include/nbl/ext/ImGui/builtin/hlsl/vertex.hlsl +++ b/include/nbl/ext/ImGui/builtin/hlsl/vertex.hlsl @@ -18,6 +18,7 @@ struct VSInput to request per object data with BDA */ +[shader("vertex")] PSInput VSMain(VSInput input, uint drawID : SV_InstanceID) { PSInput output; diff --git a/src/nbl/ext/ImGui/ImGui.cpp b/src/nbl/ext/ImGui/ImGui.cpp index f477e96cdf..fbb33f022f 100644 --- a/src/nbl/ext/ImGui/ImGui.cpp +++ b/src/nbl/ext/ImGui/ImGui.cpp @@ -154,6 +154,14 @@ core::smart_refctd_ptr UI::createPipeline(SCreation smart_refctd_ptr vertex, fragment; } shaders; + if (creationParams.spirv.has_value()) + { + // TODO: since prebuild is experminetal currently I don't validate anything + auto& spirv = creationParams.spirv.value(); + shaders.vertex = spirv.vertex; + shaders.fragment = spirv.fragment; + } + else { //! proxy the system, we will touch it gently auto system = smart_refctd_ptr(creationParams.assetManager->getSystem()); @@ -269,18 +277,18 @@ core::smart_refctd_ptr UI::createPipeline(SCreation shaders.vertex = createShader.template operator() < NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("vertex.hlsl"), IShader::E_SHADER_STAGE::ESS_VERTEX > (); shaders.fragment = createShader.template operator() < NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("fragment.hlsl"), IShader::E_SHADER_STAGE::ESS_FRAGMENT > (); + } - if (!shaders.vertex) - { - creationParams.utilities->getLogger()->log("Failed to compile vertex shader!", ILogger::ELL_ERROR); - return nullptr; - } + if (!shaders.vertex) + { + creationParams.utilities->getLogger()->log("Failed to create vertex shader!", ILogger::ELL_ERROR); + return nullptr; + } - if (!shaders.fragment) - { - creationParams.utilities->getLogger()->log("Failed to compile fragment shader!", ILogger::ELL_ERROR); - return nullptr; - } + if (!shaders.fragment) + { + creationParams.utilities->getLogger()->log("Failed to create fragment shader!", ILogger::ELL_ERROR); + return nullptr; } SVertexInputParams vertexInputParams{}; From 0e1d4c2381c00e053d530817061b8428147ed96b Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 21 Oct 2025 23:35:44 +0200 Subject: [PATCH 078/472] try to pick up where I left off --- include/nbl/ext/MitsubaLoader/CElementBSDF.h | 11 ++- .../MitsubaLoader/CElementEmissionProfile.h | 11 +-- .../nbl/ext/MitsubaLoader/CElementEmitter.h | 16 ++-- include/nbl/ext/MitsubaLoader/CElementFilm.h | 7 +- .../ext/MitsubaLoader/CElementIntegrator.h | 9 +- .../nbl/ext/MitsubaLoader/CElementRFilter.h | 34 +++++--- .../nbl/ext/MitsubaLoader/CElementSampler.h | 6 +- .../nbl/ext/MitsubaLoader/CElementSensor.h | 1 - include/nbl/ext/MitsubaLoader/CElementShape.h | 7 +- .../nbl/ext/MitsubaLoader/CElementTexture.h | 16 ++-- .../nbl/ext/MitsubaLoader/CElementTransform.h | 7 +- include/nbl/ext/MitsubaLoader/IElement.h | 1 - include/nbl/ext/MitsubaLoader/ParserUtil.h | 21 +++-- src/nbl/ext/MitsubaLoader/CElementSampler.cpp | 87 +++++++++++-------- src/nbl/ext/MitsubaLoader/CElementSensor.cpp | 9 -- .../ext/MitsubaLoader/CElementTransform.cpp | 38 ++++---- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 3 + 17 files changed, 164 insertions(+), 120 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementBSDF.h b/include/nbl/ext/MitsubaLoader/CElementBSDF.h index 1d4a823e30..7f5e73c13f 100644 --- a/include/nbl/ext/MitsubaLoader/CElementBSDF.h +++ b/include/nbl/ext/MitsubaLoader/CElementBSDF.h @@ -289,6 +289,10 @@ class CElementBSDF : public IElement CElementTexture::SpectrumOrTexture diffuseReflectance = 0.5f; }; + // + static AddPropertyMap compAddPropertyMap(); + + // inline CElementBSDF(const char* id) : IElement(id), type(Type::INVALID) { } @@ -369,14 +373,15 @@ class CElementBSDF : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; - IElement::Type getType() const override { return IElement::Type::BSDF; } + + constexpr static inline auto ElementType = IElement::Type::BSDF; + inline IElement::Type getType() const override { return ElementType; } std::string getLogName() const override { return "bsdf"; } bool processChildData(IElement* _child, const std::string& name) override; - bool isMeta() const + inline bool isMeta() const { switch (type) { diff --git a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h index a4b1d22485..331abf2873 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h @@ -14,6 +14,8 @@ namespace nbl::ext::MitsubaLoader struct CElementEmissionProfile final : public IElement { + static AddPropertyMap compAddPropertyMap(); + inline CElementEmissionProfile(const char* id) : IElement(id), normalization(EN_NONE), flatten(0.0) /*no blending by default*/ {} inline CElementEmissionProfile() : IElement(""), normalization(EN_NONE) {} inline CElementEmissionProfile(const CElementEmissionProfile& other) : IElement("") @@ -24,6 +26,7 @@ struct CElementEmissionProfile final : public IElement { operator=(std::move(other)); } + inline ~CElementEmissionProfile() {} inline CElementEmissionProfile& operator=(const CElementEmissionProfile& other) { @@ -39,14 +42,12 @@ struct CElementEmissionProfile final : public IElement return *this; } - inline ~CElementEmissionProfile() - { - } - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; inline bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override {return true;} bool processChildData(IElement* _child, const std::string& name) override; - inline IElement::Type getType() const override { return IElement::Type::EMISSION_PROFILE; } + + constexpr static inline auto ElementType = IElement::Type::EMISSION_PROFILE; + inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "emissionprofile "; } enum E_NORMALIZE : uint8_t diff --git a/include/nbl/ext/MitsubaLoader/CElementEmitter.h b/include/nbl/ext/MitsubaLoader/CElementEmitter.h index 389ec31fd5..5de6861d7f 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmitter.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmitter.h @@ -97,16 +97,19 @@ class CElementEmitter : public IElement core::vectorSIMDf radiance = core::vectorSIMDf(1.f); // Watts Meter^-2 Steradian^-1 }; + // + static AddPropertyMap compAddPropertyMap(); - CElementEmitter(const char* id) : IElement(id), type(Type::INVALID), /*toWorldType(IElement::Type::TRANSFORM),*/ transform() + // + inline CElementEmitter(const char* id) : IElement(id), type(Type::INVALID), /*toWorldType(IElement::Type::TRANSFORM),*/ transform() { } - CElementEmitter() : CElementEmitter("") {} - CElementEmitter(const CElementEmitter& other) : IElement(""), transform() + inline CElementEmitter() : CElementEmitter("") {} + inline CElementEmitter(const CElementEmitter& other) : IElement(""), transform() { operator=(other); } - CElementEmitter(CElementEmitter&& other) : IElement(""), transform() + inline CElementEmitter(CElementEmitter&& other) : IElement(""), transform() { operator=(std::move(other)); } @@ -200,9 +203,10 @@ class CElementEmitter : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; - IElement::Type getType() const override { return IElement::Type::EMITTER; } + + constexpr static inline auto ElementType = IElement::Type::EMITTER; + inline IElement::Type getType() const override { return ElementType; } std::string getLogName() const override { return "emitter"; } bool processChildData(IElement* _child, const std::string& name) override diff --git a/include/nbl/ext/MitsubaLoader/CElementFilm.h b/include/nbl/ext/MitsubaLoader/CElementFilm.h index dcd7402132..986a5a5cbe 100644 --- a/include/nbl/ext/MitsubaLoader/CElementFilm.h +++ b/include/nbl/ext/MitsubaLoader/CElementFilm.h @@ -99,7 +99,7 @@ class CElementFilm final : public IElement { hdrfilm = HDR(); } - virtual ~CElementFilm() + virtual inline ~CElementFilm() { } @@ -124,9 +124,10 @@ class CElementFilm final : public IElement } } - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; - inline IElement::Type getType() const override { return IElement::Type::FILM; } + + constexpr static inline auto ElementType = IElement::Type::FILM; + inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "film"; } inline bool processChildData(IElement* _child, const std::string& name) override diff --git a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h index d683204439..94ea85b7b6 100644 --- a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h +++ b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h @@ -215,6 +215,10 @@ class CElementIntegrator final : public IElement { }; + // + static AddPropertyMap compAddPropertyMap(); + + // inline CElementIntegrator(const char* id) : IElement(id), type(Type::INVALID) { } @@ -303,9 +307,10 @@ class CElementIntegrator final : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; - inline IElement::Type getType() const override { return IElement::Type::INTEGRATOR; } + + constexpr static inline auto ElementType = IElement::Type::INTEGRATOR; + inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "integrator"; } inline bool processChildData(IElement* _child, const std::string& name) override diff --git a/include/nbl/ext/MitsubaLoader/CElementRFilter.h b/include/nbl/ext/MitsubaLoader/CElementRFilter.h index 0ca2426d73..b3673c9e35 100644 --- a/include/nbl/ext/MitsubaLoader/CElementRFilter.h +++ b/include/nbl/ext/MitsubaLoader/CElementRFilter.h @@ -26,17 +26,6 @@ class CElementRFilter final : public IElement CATMULLROM, LANCZOS }; - static inline core::unordered_map compStringToTypeMap() - { - return { - std::make_pair("box", Type::BOX), - std::make_pair("tent", Type::TENT), - std::make_pair("gaussian", Type::GAUSSIAN), - std::make_pair("mitchell", Type::MITCHELL), - std::make_pair("catmullrom", Type::CATMULLROM), - std::make_pair("lanczos", Type::LANCZOS) - }; - } struct Gaussian { @@ -52,6 +41,24 @@ class CElementRFilter final : public IElement int32_t lobes = 3; }; + using variant_list_t = core::type_list< + Gaussian, + MitchellNetravali, + LanczosSinc + >; + static inline core::unordered_map compStringToTypeMap() + { + return { + std::make_pair("box", Type::BOX), + std::make_pair("tent", Type::TENT), + std::make_pair("gaussian", Type::GAUSSIAN), + std::make_pair("mitchell", Type::MITCHELL), + std::make_pair("catmullrom", Type::CATMULLROM), + std::make_pair("lanczos", Type::LANCZOS) + }; + } + static AddPropertyMap compAddPropertyMap(); + inline CElementRFilter(const char* id) : IElement(id), type(GAUSSIAN) { gaussian = Gaussian(); @@ -93,9 +100,10 @@ class CElementRFilter final : public IElement ); } - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; - inline IElement::Type getType() const override { return IElement::Type::RFILTER; } + + constexpr static inline auto ElementType = IElement::Type::RFILTER; + inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "rfilter"; } // make these public diff --git a/include/nbl/ext/MitsubaLoader/CElementSampler.h b/include/nbl/ext/MitsubaLoader/CElementSampler.h index 02e3ae6f6b..9b9bc2b820 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSampler.h +++ b/include/nbl/ext/MitsubaLoader/CElementSampler.h @@ -36,6 +36,7 @@ class CElementSampler : public IElement {"sobol", Type::SOBOL} }; } + static AddPropertyMap compAddPropertyMap(); inline CElementSampler(const char* id) : IElement(id), type(INVALID), sampleCount(4) {} inline ~CElementSampler() {} @@ -63,9 +64,10 @@ class CElementSampler : public IElement } } - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; - inline IElement::Type getType() const override { return IElement::Type::SAMPLER; } + + constexpr static inline auto ElementType = IElement::Type::SAMPLER; + inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "sampler"; } // make these public diff --git a/include/nbl/ext/MitsubaLoader/CElementSensor.h b/include/nbl/ext/MitsubaLoader/CElementSensor.h index d40b19bc5e..4ef41e42d5 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSensor.h +++ b/include/nbl/ext/MitsubaLoader/CElementSensor.h @@ -201,7 +201,6 @@ class CElementSensor final : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; constexpr static inline auto ElementType = IElement::Type::SENSOR; diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index 08dfa75783..23018079a1 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -99,6 +99,8 @@ class CElementShape final : public IElement CElementTexture* texture; };*/ + static AddPropertyMap compAddPropertyMap(); + inline CElementShape(const char* id) : IElement(id), type(Type::INVALID), /*toWorldType(IElement::Type::TRANSFORM),*/ transform(), bsdf(nullptr), emitter(nullptr) { } @@ -215,9 +217,10 @@ class CElementShape final : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; - inline IElement::Type getType() const override { return IElement::Type::SHAPE; } + + constexpr static inline auto ElementType = IElement::Type::SHAPE; + inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "shape"; } diff --git a/include/nbl/ext/MitsubaLoader/CElementTexture.h b/include/nbl/ext/MitsubaLoader/CElementTexture.h index ec2ff2d1bf..ece070785e 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTexture.h +++ b/include/nbl/ext/MitsubaLoader/CElementTexture.h @@ -155,18 +155,22 @@ class CElementTexture : public IElement float scale; }; - CElementTexture(const char* id) : IElement(id), type(Type::INVALID) + // + static AddPropertyMap compAddPropertyMap(); + + // + inline CElementTexture(const char* id) : IElement(id), type(Type::INVALID) { } - CElementTexture(const CElementTexture& other) : CElementTexture("") + inline CElementTexture(const CElementTexture& other) : CElementTexture("") { operator=(other); } - CElementTexture(CElementTexture&& other) : CElementTexture("") + inline CElementTexture(CElementTexture&& other) : CElementTexture("") { operator=(std::move(other)); } - virtual ~CElementTexture() + inline virtual ~CElementTexture() { } @@ -235,10 +239,10 @@ class CElementTexture : public IElement return *this; } - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; - inline IElement::Type getType() const override { return IElement::Type::TEXTURE; } + constexpr static inline auto ElementType = IElement::Type::TEXTURE; + inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "texture"; } bool processChildData(IElement* _child, const std::string& name) override; diff --git a/include/nbl/ext/MitsubaLoader/CElementTransform.h b/include/nbl/ext/MitsubaLoader/CElementTransform.h index 292a081bba..45612174ad 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTransform.h +++ b/include/nbl/ext/MitsubaLoader/CElementTransform.h @@ -14,12 +14,15 @@ namespace nbl::ext::MitsubaLoader class CElementTransform final : public IElement { public: + static AddPropertyMap compAddPropertyMap(); + inline CElementTransform() : IElement(""), matrix() {} inline ~CElementTransform() {} - bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) override; inline bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override {return true;} - inline IElement::Type getType() const override { return IElement::Type::TRANSFORM; } + + constexpr static inline auto ElementType = IElement::Type::TRANSFORM; + inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "transform"; } /* inline CElementTransform& operator=(const CElementTransform& other) diff --git a/include/nbl/ext/MitsubaLoader/IElement.h b/include/nbl/ext/MitsubaLoader/IElement.h index 68c2958fb2..4da3dbb848 100644 --- a/include/nbl/ext/MitsubaLoader/IElement.h +++ b/include/nbl/ext/MitsubaLoader/IElement.h @@ -64,7 +64,6 @@ class IElement virtual IElement::Type getType() const = 0; virtual std::string getLogName() const = 0; - virtual bool addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) = 0; virtual bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) = 0; //! default implementation for elements that doesnt have any children virtual bool processChildData(IElement* _child, const std::string& name) diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index 5787cc6c62..5c2b3efbef 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -8,9 +8,16 @@ #include "nbl/asset/interchange/IAssetLoader.h" #include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" -#include "nbl/ext/MitsubaLoader/PropertyElement.h" +#include "nbl/ext/MitsubaLoader/CElementIntegrator.h" #include "nbl/ext/MitsubaLoader/CElementSensor.h" +#include "nbl/ext/MitsubaLoader/CElementFilm.h" +#include "nbl/ext/MitsubaLoader/CElementRFilter.h" +#include "nbl/ext/MitsubaLoader/CElementSampler.h" #include "nbl/ext/MitsubaLoader/CElementShape.h" +#include "nbl/ext/MitsubaLoader/CElementBSDF.h" +#include "nbl/ext/MitsubaLoader/CElementTexture.h" +#include "nbl/ext/MitsubaLoader/CElementEmitter.h" +#include "nbl/ext/MitsubaLoader/CElementEmissionProfile.h" #include @@ -74,16 +81,16 @@ class ParserManager final const CPropertyElementManager propertyElementManager; using supported_elements_t = core::type_list< -// CElementIntegrator, - CElementSensor -// CElementFilm, -// CElementRFilter, -// CElementSampler, + CElementIntegrator, + CElementSensor, + CElementFilm, + CElementRFilter, + CElementSampler, /// CElementShape, /// CElementBSDF, /// CElementTexture, /// CElementEmitter, -// CElementEmissionProfile + CElementEmissionProfile >; private: diff --git a/src/nbl/ext/MitsubaLoader/CElementSampler.cpp b/src/nbl/ext/MitsubaLoader/CElementSampler.cpp index 3dbb2db9f7..b862a754fb 100644 --- a/src/nbl/ext/MitsubaLoader/CElementSampler.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementSampler.cpp @@ -1,56 +1,69 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/ext/MitsubaLoader/CElementSampler.h" +#include "nbl/ext/MitsubaLoader/ParserUtil.h" #include "nbl/ext/MitsubaLoader/ElementMacros.h" namespace nbl::ext::MitsubaLoader { - - -bool CElementSampler::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) + +auto CElementSampler::compAddPropertyMap() -> AddPropertyMap { - if (_property.type==SNamedPropertyElement::Type::INTEGER && _property.name=="sampleCount") - { - sampleCount = _property.ivalue; - switch (type) + using this_t = CElementSampler; + AddPropertyMap retval; + + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("sampleCount",INTEGER) { - case Type::STRATIFIED: - sampleCount = ceilf(sqrtf(sampleCount)); - break; - case Type::LDSAMPLER: - //sampleCount = core::roundUpToPoT(sampleCount); - break; - default: - break; + auto& sampleCount = _this->sampleCount; + sampleCount = _property.ivalue; + switch (_this->type) + { + case Type::STRATIFIED: + sampleCount = ceilf(sqrtf(sampleCount)); + break; + case Type::LDSAMPLER: + //sampleCount = core::roundUpToPoT(sampleCount); + break; + default: + break; + } + return true; } - } - else if (_property.type == SNamedPropertyElement::Type::INTEGER && _property.name == "dimension") - { - dimension = _property.ivalue; - if (type == Type::INDEPENDENT || type == Type::HALTON || type == Type::HAMMERSLEY) + }); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("dimension",INTEGER) { - invalidXMLFileStructure(logger,"this sampler type ("+std::to_string(type)+") does not take these parameters"); - return false; + _this->dimension = _property.ivalue; + switch (_this->type) + { + case Type::INDEPENDENT: [[fallthrough]]; + case Type::HALTON: [[fallthrough]]; + case Type::HAMMERSLEY: + invalidXMLFileStructure(logger,"this sampler type ("+std::to_string(_this->type)+") does not take these parameters"); + return false; + default: + return true; + } } - } - else if (_property.type == SNamedPropertyElement::Type::INTEGER && _property.name == "scramble") - { - scramble = _property.ivalue; - if (type==Type::INDEPENDENT || type==Type::STRATIFIED || type == Type::LDSAMPLER) + }); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("scramble",INTEGER) { - invalidXMLFileStructure(logger,"this sampler type ("+std::to_string(type)+") does not take these parameters"); - return false; + _this->scramble = _property.ivalue; + switch (_this->type) + { + case Type::INDEPENDENT: [[fallthrough]]; + case Type::HALTON: [[fallthrough]]; + case Type::HAMMERSLEY: + invalidXMLFileStructure(logger,"this sampler type ("+std::to_string(_this->type)+") does not take these parameters"); + return false; + default: + return true; + } } - } - else - { - invalidXMLFileStructure(logger,"unknown property named `"+_property.name+"` of type "+std::to_string(_property.type)); - return false; - } + }); - return true; + return retval; } bool CElementSampler::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) diff --git a/src/nbl/ext/MitsubaLoader/CElementSensor.cpp b/src/nbl/ext/MitsubaLoader/CElementSensor.cpp index 6982870fc6..d8ab85bb26 100644 --- a/src/nbl/ext/MitsubaLoader/CElementSensor.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementSensor.cpp @@ -1,8 +1,6 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - - #include "nbl/ext/MitsubaLoader/CElementSensor.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" #include "nbl/ext/MitsubaLoader/ElementMacros.h" @@ -43,7 +41,6 @@ auto CElementSensor::compAddPropertyMap() -> AddPropertyMap } ); - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(up,VECTOR,derived_from,ShutterSensor); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shutterOpen,FLOAT,derived_from,ShutterSensor); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shutterClose,FLOAT,derived_from,ShutterSensor); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(moveSpeed,FLOAT,derived_from,ShutterSensor); @@ -86,12 +83,6 @@ auto CElementSensor::compAddPropertyMap() -> AddPropertyMap return retval; } -bool CElementSensor::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) -{ - assert(false); - return false; -} - bool CElementSensor::onEndTag(CMitsubaMetadata* meta, system::logger_opt_ptr logger) { NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(true); diff --git a/src/nbl/ext/MitsubaLoader/CElementTransform.cpp b/src/nbl/ext/MitsubaLoader/CElementTransform.cpp index 60496ad714..cb583c211c 100644 --- a/src/nbl/ext/MitsubaLoader/CElementTransform.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementTransform.cpp @@ -8,30 +8,26 @@ namespace nbl::ext::MitsubaLoader { -bool CElementTransform::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) +auto CElementTransform::compAddPropertyMap() -> AddPropertyMap { - switch (_property.type) + using this_t = CElementTransform; + AddPropertyMap retval; + + auto setMatrix = [](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool { - case SNamedPropertyElement::Type::MATRIX: - [[fallthrough]]; - case SNamedPropertyElement::Type::TRANSLATE: - [[fallthrough]]; - case SNamedPropertyElement::Type::ROTATE: - [[fallthrough]]; - case SNamedPropertyElement::Type::SCALE: - [[fallthrough]]; - case SNamedPropertyElement::Type::LOOKAT: - matrix = hlsl::mul(matrix,_property.mvalue); - break; - default: - { - invalidXMLFileStructure(logger,"The transform element does not take child property: "+_property.type); - return false; - } - break; - } + _this->matrix = _property.mvalue; + return true; + }; + for (const auto& type : { + SNamedPropertyElement::Type::MATRIX, + SNamedPropertyElement::Type::TRANSLATE, + SNamedPropertyElement::Type::ROTATE, + SNamedPropertyElement::Type::SCALE, + SNamedPropertyElement::Type::LOOKAT + }) + retval.registerCallback(type,"",{.func=setMatrix}); - return true; + return retval; } } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 0ba2bad8d0..a79fdbb906 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -220,6 +220,9 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts return; } auto nameIt = typeMap.find(property.name); + // special, find callback that matches all names (registered with empty name) + if (nameIt==typeMap.end()) + nameIt = typeMap.find(""); if (nameIt==typeMap.end()) { session->invalidXMLFileStructure("There's no Property named (TODO) of Type (TODO) supported by ElementType (TODO)"); From bd6cc4dff57af3516bcff4ce1bb5cc72017300b0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 22 Oct 2025 12:16:03 +0200 Subject: [PATCH 079/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 349a850d4e..bdbae93a63 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 349a850d4eb51619c33228c676eb1c84ac0f4a54 +Subproject commit bdbae93a6389414dac1e47f59fb5bb74d99e68ec From 93814913e84eb86e4c625d9e32d83221c62965c9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 22 Oct 2025 14:58:21 +0200 Subject: [PATCH 080/472] a few minor updates to NBL_CREATE_NSC_COMPILE_RULES to skip optional arguments in json payload, CI should pass now --- cmake/common.cmake | 23 ++++++++++++++--------- examples_tests | 2 +- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 92250a276b..ab215a59e3 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1277,17 +1277,22 @@ namespace @IMPL_NAMESPACE@ { foreach(INDEX RANGE ${LAST_INDEX}) string(JSON INPUT GET "${IMPL_INPUTS}" ${INDEX} INPUT) string(JSON BASE_KEY GET "${IMPL_INPUTS}" ${INDEX} KEY) - string(JSON COMPILE_OPTIONS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS) - + set(COMPILE_OPTIONS "") - math(EXPR LAST_CO "${COMPILE_OPTIONS_LENGTH} - 1") - foreach(COMP_IDX RANGE 0 ${LAST_CO}) - string(JSON COMP_ITEM GET "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS ${COMP_IDX}) - list(APPEND COMPILE_OPTIONS "${COMP_ITEM}") - endforeach() + string(JSON HAS_COMPILE_OPTIONS ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS) + if(HAS_COMPILE_OPTIONS STREQUAL "ARRAY") + string(JSON COMPILE_OPTIONS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS) + if(NOT COMPILE_OPTIONS_LENGTH EQUAL 0) + math(EXPR LAST_CO "${COMPILE_OPTIONS_LENGTH} - 1") + foreach(COMP_IDX RANGE 0 ${LAST_CO}) + string(JSON COMP_ITEM GET "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS ${COMP_IDX}) + list(APPEND COMPILE_OPTIONS "${COMP_ITEM}") + endforeach() + endif() + endif() set(DEPENDS_ON "") - string(JSON HAS_DEPENDS TYPE "${IMPL_INPUTS}" ${INDEX} DEPENDS) + string(JSON HAS_DEPENDS ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} DEPENDS) if(HAS_DEPENDS STREQUAL "ARRAY") string(JSON DEPENDS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} DEPENDS) if(NOT DEPENDS_LENGTH EQUAL 0) @@ -1305,7 +1310,7 @@ namespace @IMPL_NAMESPACE@ { set(HAS_CAPS FALSE) set(CAPS_LENGTH 0) - string(JSON CAPS_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS) + string(JSON CAPS_TYPE ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} CAPS) if(CAPS_TYPE STREQUAL "ARRAY") string(JSON CAPS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS) if(NOT CAPS_LENGTH EQUAL 0) diff --git a/examples_tests b/examples_tests index bdbae93a63..ac85bdb781 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit bdbae93a6389414dac1e47f59fb5bb74d99e68ec +Subproject commit ac85bdb781c77fdde694a3da81a8f5831d17e96d From e5da351b3bff6aebce6e872594def0472901cafe Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 22 Oct 2025 14:58:21 +0200 Subject: [PATCH 081/472] Resolved conflicts, cherry picked 9381491 --- cmake/common.cmake | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 645837aaaa..6c7722a41b 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1277,17 +1277,22 @@ namespace @IMPL_NAMESPACE@ { foreach(INDEX RANGE ${LAST_INDEX}) string(JSON INPUT GET "${IMPL_INPUTS}" ${INDEX} INPUT) string(JSON BASE_KEY GET "${IMPL_INPUTS}" ${INDEX} KEY) - string(JSON COMPILE_OPTIONS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS) - + set(COMPILE_OPTIONS "") - math(EXPR LAST_CO "${COMPILE_OPTIONS_LENGTH} - 1") - foreach(COMP_IDX RANGE 0 ${LAST_CO}) - string(JSON COMP_ITEM GET "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS ${COMP_IDX}) - list(APPEND COMPILE_OPTIONS "${COMP_ITEM}") - endforeach() + string(JSON HAS_COMPILE_OPTIONS ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS) + if(HAS_COMPILE_OPTIONS STREQUAL "ARRAY") + string(JSON COMPILE_OPTIONS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS) + if(NOT COMPILE_OPTIONS_LENGTH EQUAL 0) + math(EXPR LAST_CO "${COMPILE_OPTIONS_LENGTH} - 1") + foreach(COMP_IDX RANGE 0 ${LAST_CO}) + string(JSON COMP_ITEM GET "${IMPL_INPUTS}" ${INDEX} COMPILE_OPTIONS ${COMP_IDX}) + list(APPEND COMPILE_OPTIONS "${COMP_ITEM}") + endforeach() + endif() + endif() set(DEPENDS_ON "") - string(JSON HAS_DEPENDS TYPE "${IMPL_INPUTS}" ${INDEX} DEPENDS) + string(JSON HAS_DEPENDS ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} DEPENDS) if(HAS_DEPENDS STREQUAL "ARRAY") string(JSON DEPENDS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} DEPENDS) if(NOT DEPENDS_LENGTH EQUAL 0) @@ -1305,7 +1310,7 @@ namespace @IMPL_NAMESPACE@ { set(HAS_CAPS FALSE) set(CAPS_LENGTH 0) - string(JSON CAPS_TYPE TYPE "${IMPL_INPUTS}" ${INDEX} CAPS) + string(JSON CAPS_TYPE ERROR_VARIABLE ERROR_VAR TYPE "${IMPL_INPUTS}" ${INDEX} CAPS) if(CAPS_TYPE STREQUAL "ARRAY") string(JSON CAPS_LENGTH LENGTH "${IMPL_INPUTS}" ${INDEX} CAPS) if(NOT CAPS_LENGTH EQUAL 0) From d7da234c506658675beac2e9049415f909201c26 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 24 Oct 2025 13:34:58 +0200 Subject: [PATCH 082/472] Updated examples --- examples_tests | 2 +- include/nbl/builtin/hlsl/ieee754.hlsl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 2b4db21239..f85ae8045c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2b4db2123918f380cc0a35f6889315a02f84ea73 +Subproject commit f85ae8045c13380ace4c124d8a07349b4fd5fb62 diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 6bdfcf2514..65db917883 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -159,7 +159,7 @@ struct flipSign_helper(asUint ^ spirv::select(AsUint(0ull), ieee754::traits::signMask, flip)); + return bit_cast(asUint ^ spirv::select(flip, ieee754::traits::signMask, AsUint(0ull))); #else return bit_cast(asUint ^ (flip ? ieee754::traits::signMask : AsUint(0ull))); #endif From ae0b1604869ecb3f41d118078de5cd22c7662a2b Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 24 Oct 2025 14:58:31 +0200 Subject: [PATCH 083/472] CP_UTF8 for dxc source buffer's encoding just like code page (experimental) --- .../nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl | 3 ++- include/nbl/builtin/hlsl/surface_transform.h | 4 +++- src/nbl/asset/utils/CHLSLCompiler.cpp | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl b/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl index a48d9b4623..7a2aef1cbf 100644 --- a/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl +++ b/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl @@ -33,4 +33,5 @@ SVertexAttributes main() SVertexAttributes retval; retval.uv = tc[gl_VertexIndex()]; return retval; -} \ No newline at end of file +} + diff --git a/include/nbl/builtin/hlsl/surface_transform.h b/include/nbl/builtin/hlsl/surface_transform.h index a681ecf0bb..2d7afa7add 100644 --- a/include/nbl/builtin/hlsl/surface_transform.h +++ b/include/nbl/builtin/hlsl/surface_transform.h @@ -181,4 +181,6 @@ TwoColumns applyToDerivatives(const FLAG_BITS transform, TwoColumns dDx_dDy) } } } -#endif \ No newline at end of file + +#endif // _NBL_BUILTIN_HLSL_SURFACE_TRANSFORM_INCLUDED_ + diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 306d2f60de..51609c4039 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -301,7 +301,7 @@ static DxcCompilationResult dxcCompile(const CHLSLCompiler* compiler, nbl::asset DxcBuffer sourceBuffer; sourceBuffer.Ptr = src->GetBufferPointer(); sourceBuffer.Size = src->GetBufferSize(); - sourceBuffer.Encoding = 0; + sourceBuffer.Encoding = CP_UTF8; ComPtr compileResult; res = dxc->m_dxcCompiler->Compile(&sourceBuffer, args, argCount, nullptr, IID_PPV_ARGS(compileResult.GetAddressOf())); From 9e4c75ee1d7afcc3164aa8c680efe08058508249 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 09:33:03 +0200 Subject: [PATCH 084/472] cap kind args --- cmake/common.cmake | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 6c7722a41b..4bcbee1c4e 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1210,7 +1210,7 @@ struct DeviceConfigCaps get_target_property(HEADER_RULE_GENERATED ${IMPL_TARGET} NBL_HEADER_GENERATED_RULE) if(NOT HEADER_RULE_GENERATED) - set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include") + set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include") set(INCLUDE_FILE "${INCLUDE_DIR}/$") set(INCLUDE_CONTENT $) @@ -1328,12 +1328,27 @@ namespace @IMPL_NAMESPACE@ { set(CAP_NAMES "") set(CAP_TYPES "") + set(CAP_KINDS "") if(HAS_CAPS) math(EXPR LAST_CAP "${CAPS_LENGTH} - 1") foreach(CAP_IDX RANGE 0 ${LAST_CAP}) + string(JSON CAP_KIND ERROR_VARIABLE CAP_TYPE_ERROR GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} kind) string(JSON CAP_NAME GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} name) string(JSON CAP_TYPE GET "${IMPL_INPUTS}" ${INDEX} CAPS ${CAP_IDX} type) + # -> TODO: improve validation, input should be string + if(CAP_TYPE_ERROR) + set(CAP_KIND limits) # I assume its limit by default (or when invalid value present, currently) + else() + if(NOT CAP_KIND MATCHES "^(limits|features)$") + ERROR_WHILE_PARSING_ITEM( + "Invalid CAP kind \"${CAP_KIND}\" for ${CAP_NAME}\n" + "Allowed kinds are: limits, features" + ) + endif() + endif() + # <- + if(NOT CAP_TYPE MATCHES "^(bool|uint16_t|uint32_t|uint64_t)$") ERROR_WHILE_PARSING_ITEM( "Invalid CAP type \"${CAP_TYPE}\" for ${CAP_NAME}\n" @@ -1371,6 +1386,7 @@ namespace @IMPL_NAMESPACE@ { set(CAP_VALUES_${CAP_IDX} "${VALUES}") list(APPEND CAP_NAMES "${CAP_NAME}") list(APPEND CAP_TYPES "${CAP_TYPE}") + list(APPEND CAP_KINDS "${CAP_KIND}") endforeach() endif() @@ -1410,12 +1426,16 @@ namespace @IMPL_NAMESPACE@ { ]=]) unset(RETVAL_EVAL) - foreach(CAP ${CAP_NAMES}) + list(LENGTH CAP_NAMES CAP_COUNT) + math(EXPR CAP_COUNT "${CAP_COUNT} - 1") + foreach(i RANGE ${CAP_COUNT}) + list(GET CAP_NAMES ${i} CAP) + list(GET CAP_KINDS ${i} KIND) string(CONFIGURE [=[ - retval += ".@CAP@_" + std::to_string(limits.@CAP@); + retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); ]=] RETVALUE_VIEW @ONLY) string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") - endforeach(CAP) + endforeach() string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") @@ -1466,6 +1486,7 @@ namespace @IMPL_NAMESPACE@ { list(GET CAP_NAMES ${CAP_INDEX} CURRENT_CAP) list(GET CAP_TYPES ${CAP_INDEX} CURRENT_TYPE) + list(GET CAP_KINDS ${CAP_INDEX} CURRENT_KIND) set(VAR_NAME "CAP_VALUES_${CAP_INDEX}") set(VALUES "${${VAR_NAME}}") From 639e2d4d73ef8691591434f1388c2e07447cda8f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 10:23:00 +0200 Subject: [PATCH 085/472] fix a bug after my NSC rules update --- cmake/common.cmake | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 4bcbee1c4e..ab215a59e3 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1427,15 +1427,18 @@ namespace @IMPL_NAMESPACE@ { ]=]) unset(RETVAL_EVAL) list(LENGTH CAP_NAMES CAP_COUNT) - math(EXPR CAP_COUNT "${CAP_COUNT} - 1") - foreach(i RANGE ${CAP_COUNT}) - list(GET CAP_NAMES ${i} CAP) - list(GET CAP_KINDS ${i} KIND) - string(CONFIGURE [=[ + if(CAP_COUNT GREATER 0) + math(EXPR LAST_CAP "${CAP_COUNT} - 1") + foreach(i RANGE ${LAST_CAP}) + list(GET CAP_NAMES ${i} CAP) + list(GET CAP_KINDS ${i} KIND) + string(CONFIGURE [=[ retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); -]=] RETVALUE_VIEW @ONLY) - string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") - endforeach() +]=] RETVALUE_VIEW @ONLY) + string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") + endforeach() + endif() + string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") From baa6b4af6a3c37cf8937357625fb02fdabf10955 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 09:33:03 +0200 Subject: [PATCH 086/472] Resolved conflicts, cherry-picked e3129939c8253ec04525bdb726578cfe61b754ac --- cmake/common.cmake | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index ab215a59e3..4bcbee1c4e 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1427,18 +1427,15 @@ namespace @IMPL_NAMESPACE@ { ]=]) unset(RETVAL_EVAL) list(LENGTH CAP_NAMES CAP_COUNT) - if(CAP_COUNT GREATER 0) - math(EXPR LAST_CAP "${CAP_COUNT} - 1") - foreach(i RANGE ${LAST_CAP}) - list(GET CAP_NAMES ${i} CAP) - list(GET CAP_KINDS ${i} KIND) - string(CONFIGURE [=[ + math(EXPR CAP_COUNT "${CAP_COUNT} - 1") + foreach(i RANGE ${CAP_COUNT}) + list(GET CAP_NAMES ${i} CAP) + list(GET CAP_KINDS ${i} KIND) + string(CONFIGURE [=[ retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); -]=] RETVALUE_VIEW @ONLY) - string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") - endforeach() - endif() - +]=] RETVALUE_VIEW @ONLY) + string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") + endforeach() string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") From 0bd968e543e879c737bf264e446cbdd91976af38 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 10 Oct 2025 10:23:00 +0200 Subject: [PATCH 087/472] fix a bug after my NSC rules update --- cmake/common.cmake | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 4bcbee1c4e..ab215a59e3 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1427,15 +1427,18 @@ namespace @IMPL_NAMESPACE@ { ]=]) unset(RETVAL_EVAL) list(LENGTH CAP_NAMES CAP_COUNT) - math(EXPR CAP_COUNT "${CAP_COUNT} - 1") - foreach(i RANGE ${CAP_COUNT}) - list(GET CAP_NAMES ${i} CAP) - list(GET CAP_KINDS ${i} KIND) - string(CONFIGURE [=[ + if(CAP_COUNT GREATER 0) + math(EXPR LAST_CAP "${CAP_COUNT} - 1") + foreach(i RANGE ${LAST_CAP}) + list(GET CAP_NAMES ${i} CAP) + list(GET CAP_KINDS ${i} KIND) + string(CONFIGURE [=[ retval += ".@CAP@_" + std::to_string(@KIND@.@CAP@); -]=] RETVALUE_VIEW @ONLY) - string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") - endforeach() +]=] RETVALUE_VIEW @ONLY) + string(APPEND RETVAL_EVAL "${RETVALUE_VIEW}") + endforeach() + endif() + string(CONFIGURE "${HEADER_ITEM_VIEW}" HEADER_ITEM_EVAL @ONLY) set_property(TARGET ${IMPL_TARGET} APPEND_STRING PROPERTY NBL_HEADER_CONTENT "${HEADER_ITEM_EVAL}") From 0c57c4e2e6c3cbfa5d41c0079cacc82a191b2437 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 22 Oct 2025 14:58:21 +0200 Subject: [PATCH 088/472] Resolved conflicts, cherry-picked 93814913e84eb86e4c625d9e32d83221c62965c9 From d45e3a7f7b8cead31283e5cdbcb8e6bd2bb81875 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 27 Oct 2025 13:48:50 +0100 Subject: [PATCH 089/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index ac85bdb781..776f925b8f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ac85bdb781c77fdde694a3da81a8f5831d17e96d +Subproject commit 776f925b8fa72348aa41a687089af280a5a8f57c From 15a91b4a584892a1115fb583402c5155a97ddafa Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 27 Oct 2025 15:52:36 +0100 Subject: [PATCH 090/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f85ae8045c..22f2a17401 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f85ae8045c13380ace4c124d8a07349b4fd5fb62 +Subproject commit 22f2a17401e8e70dddff477e11db12ebd1dea2bd From 5ecf50e68429a7dca39751efa5851a8fb2fca96a Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 28 Oct 2025 12:37:48 +0100 Subject: [PATCH 091/472] fix IES Profile average emission and domain calculations --- examples_tests | 2 +- src/nbl/asset/utils/CIESProfile.h | 7 ++++- src/nbl/asset/utils/CIESProfileParser.cpp | 38 ++++++++++++----------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/examples_tests b/examples_tests index 332e8d72cf..1fe9697453 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 332e8d72cf776dd78e4cf707d87c25f0aeeb6342 +Subproject commit 1fe969745304a3492b04c3d1902561d92f467827 diff --git a/src/nbl/asset/utils/CIESProfile.h b/src/nbl/asset/utils/CIESProfile.h index ea3d539613..1da2c60b79 100644 --- a/src/nbl/asset/utils/CIESProfile.h +++ b/src/nbl/asset/utils/CIESProfile.h @@ -72,7 +72,12 @@ namespace nbl inline IES_STORAGE_FORMAT getAvgEmmision(const bool fullDomain=false) const { if (fullDomain) - return totalEmissionIntegral*0.25/core::radians(vAngles.back()-vAngles.front()); + { + const float cosLo = std::cos(core::radians(vAngles.front())); + const float cosHi = std::cos(core::radians(vAngles.back())); + const float dsinTheta = cosLo - cosHi; + return totalEmissionIntegral*(0.5/core::PI())/dsinTheta; + } return avgEmmision; } diff --git a/src/nbl/asset/utils/CIESProfileParser.cpp b/src/nbl/asset/utils/CIESProfileParser.cpp index e0593cd7f2..2c6ce873be 100644 --- a/src/nbl/asset/utils/CIESProfileParser.cpp +++ b/src/nbl/asset/utils/CIESProfileParser.cpp @@ -180,15 +180,23 @@ bool CIESProfileParser::parse(CIESProfile& result) float totalEmissionIntegral = 0.0, nonZeroEmissionDomainSize = 0.0; constexpr auto FULL_SOLID_ANGLE = 4.0f * core::PI(); + // TODO: this code could have two separate inner for loops for `result.symmetry != CIESProfile::ISOTROPIC` cases const auto H_ANGLES_I_RANGE = result.symmetry != CIESProfile::ISOTROPIC ? result.hAngles.size() - 1 : 1; const auto V_ANGLES_I_RANGE = result.vAngles.size() - 1; - for (size_t i = 0; i < H_ANGLES_I_RANGE; i++) + for (size_t j = 0; j < V_ANGLES_I_RANGE; j++) { - const float dPhiRad = result.symmetry != CIESProfile::ISOTROPIC ? (hAngles[i + 1] - hAngles[i]) : core::PI() * 2.0f; - - for (size_t j = 0; j < V_ANGLES_I_RANGE; j++) + const float thetaRad = core::radians(result.vAngles[j]); + const float cosLo = std::cos(thetaRad); + const float cosHi = std::cos(core::radians(result.vAngles[j+1])); + const float dsinTheta = cosLo - cosHi; + + float stripIntegral = 0.f; + float nonZeroStripDomain = 0.f; + for (size_t i = 0; i < H_ANGLES_I_RANGE; i++) { + const float dPhiRad = result.symmetry != CIESProfile::ISOTROPIC ? core::radians(hAngles[i + 1] - hAngles[i]) : (core::PI() * 2.0f); + const auto candelaValue = result.getCandelaValue(i, j); // interpolate candela value spanned onto a solid angle @@ -199,23 +207,17 @@ bool CIESProfileParser::parse(CIESProfile& result) if (result.maxCandelaValue < candelaValue) result.maxCandelaValue = candelaValue; - const float thetaRad = core::radians(result.vAngles[j]); - const float cosLo = std::cos(core::radians(result.vAngles[j])); - const float cosHi = std::cos(core::radians(result.vAngles[j + 1])); - - const auto differentialSolidAngle = dPhiRad*(cosLo - cosHi); - const auto integralV = candelaAverage * differentialSolidAngle; - - if (integralV > 0.0) - { - totalEmissionIntegral += integralV; - nonZeroEmissionDomainSize += differentialSolidAngle; - } + stripIntegral += candelaAverage*dPhiRad; + if (candelaAverage>0.f) + nonZeroStripDomain += dPhiRad; } + totalEmissionIntegral += stripIntegral*dsinTheta; + nonZeroEmissionDomainSize += nonZeroStripDomain*dsinTheta; } - nonZeroEmissionDomainSize = std::clamp(nonZeroEmissionDomainSize, 0.0, FULL_SOLID_ANGLE); - if (nonZeroEmissionDomainSize <= 0) // protect us from division by 0 (just in case, we should never hit it) + assert(nonZeroEmissionDomainSize >= 0.f); + //assert(nonZeroEmissionDomainSize*fluxMultiplier =approx= 2.f*(cosBack-cosFront)*PI); + if (nonZeroEmissionDomainSize <= std::numeric_limits::min()) // protect us from division by small numbers (just in case, we should never hit it) return false; result.avgEmmision = totalEmissionIntegral / static_cast(nonZeroEmissionDomainSize); From 07feaff4b6f3a2696e2dae6d1c707822109f1a3d Mon Sep 17 00:00:00 2001 From: root Date: Tue, 28 Oct 2025 16:14:04 +0100 Subject: [PATCH 092/472] Update CI references --- ci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci b/ci index 043f8a0aa0..606901755b 160000 --- a/ci +++ b/ci @@ -1 +1 @@ -Subproject commit 043f8a0aa074f134b3230d56f27ed7d0c645f6b3 +Subproject commit 606901755b9ba13715c796ac8b303c57732a7581 From e8ea1194403c1b2f3d4e0af507dae56faffbb8ba Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 28 Oct 2025 16:28:38 +0100 Subject: [PATCH 093/472] `-Wno-local-type-template-args` for NBL_CREATE_NSC_COMPILE_RULES, update examples_tests --- cmake/common.cmake | 1 + examples_tests | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index ab215a59e3..c1ab2aa3e1 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1179,6 +1179,7 @@ struct DeviceConfigCaps -Zpr -spirv -fspv-target-env=vulkan1.3 + -Wno-local-type-template-args ) if(NOT NBL_EMBED_BUILTIN_RESOURCES) diff --git a/examples_tests b/examples_tests index 776f925b8f..06b2d09bbc 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 776f925b8fa72348aa41a687089af280a5a8f57c +Subproject commit 06b2d09bbc338ee3f57d27587d099bbf1ad22949 From 1e9138e441ac2a1d4afdca71fa5eb390d0d218e5 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 28 Oct 2025 16:41:26 +0100 Subject: [PATCH 094/472] make the IES octahedral maps corner sampled --- examples_tests | 2 +- include/nbl/builtin/glsl/ies/functions.glsl | 36 +++++++++++-------- .../glsl/material_compiler/common.glsl | 3 +- src/nbl/asset/utils/CIESProfile.cpp | 16 ++++++--- 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/examples_tests b/examples_tests index 1fe9697453..b742c70c5a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1fe969745304a3492b04c3d1902561d92f467827 +Subproject commit b742c70c5abc6feaf22ebb56cbae8a2bbcfa12f9 diff --git a/include/nbl/builtin/glsl/ies/functions.glsl b/include/nbl/builtin/glsl/ies/functions.glsl index 70f5165f98..54c266d114 100644 --- a/include/nbl/builtin/glsl/ies/functions.glsl +++ b/include/nbl/builtin/glsl/ies/functions.glsl @@ -6,25 +6,33 @@ #include -// TODO: implement proper mirroing -// MIRROR_180_BITS = 0b001, Last Angle is 180, so map V slightly differently -// MIRROR_90_BITS = 0b010, Last Angle is 90, so map both U and V slightly differently -// ISOTROPIC_BITS = 0b011, texture to sample is Nx1, pretend v=middle always -// FULL_THETA_BIT = 0b100, handle extended domain and rotate by 45 degrees for anisotropic - -vec2 nbl_glsl_IES_convert_dir_to_uv(vec3 dir) { - float sum = dot(vec3(1.0f), abs(dir)); +// TODO: when rewriting to HLSL this is not IES namespace or folder, this should be octahedral mapping sitting somewhere where the spherical/polar sits +// NOTE: I changed it to return NDC [-1,1]^2 instead of UV coords [0,1]^2 +vec2 nbl_glsl_TODOnamespace_octahedral_mapping(vec3 dir) +{ + float sum = dot(vec3(1.0f), abs(dir)); vec3 s = dir / sum; - if(s.z < 0.0f) { - s.xy = sign(s.xy) * (1.0f - abs(s.yx)); + if(s.z < 0.0f) + { + const uvec2 flipSignMask = floatBitsToUint(s.xy)&0x80000000u; + s.xy = uintBitsToFloat(floatBitsToUint(1.0f - abs(s.yx))^flipSignMask); } - return s.xy * 0.5f + 0.5f; + return s.xy; } -// vec2 nbl_glsl_IES_convert_dir_to_uv(vec3 dir) { -// return vec2((atan(dir.x, dir.y) + nbl_glsl_PI) / (2.0*nbl_glsl_PI), acos(dir.z) / nbl_glsl_PI); -// } +// TODO: implement proper mirroing +// MIRROR_180_BITS = 0b001, Last Angle is 180, so map V with MIRROR and corner sampling off +// MIRROR_90_BITS = 0b010, Last Angle is 90, so map both U and V with MIRROR and corner sampling off +// ISOTROPIC_BITS = 0b011, texture to sample is Nx1, pretend v=middle always , and make u REPEAT or CLAMP_TO_BORDER +// FULL_THETA_BIT = 0b100, handle truncated domain and rotate by 45 degrees for anisotropic +// (certain combos wont work like 90 degree 2 symmetry domain & half theta), it really needs to be an 8 case label thing explicitly enumerated +vec2 nbl_glsl_IES_convert_dir_to_uv(vec3 dir, vec2 halfMinusHalfPixel) +{ + // halfMinusHalfPixel = 0.5-0.5/texSize + // believe it or not, cornerSampled(NDC*0.5+0.5) = NDC*0.5*(1-1/texSize)+0.5 + return nbl_glsl_TODOnamespace_octahedral_mapping(dir)*halfMinusHalfPixel+0.5; +} #endif \ No newline at end of file diff --git a/include/nbl/builtin/glsl/material_compiler/common.glsl b/include/nbl/builtin/glsl/material_compiler/common.glsl index da67b12cbf..ba3c95d5e2 100644 --- a/include/nbl/builtin/glsl/material_compiler/common.glsl +++ b/include/nbl/builtin/glsl/material_compiler/common.glsl @@ -601,7 +601,8 @@ vec3 nbl_glsl_MC_oriented_material_t_getEmissive(in nbl_glsl_MC_oriented_materia if ((floatBitsToInt(emitter.orientation[0])&1u) != 1u) { right *= -1; } - return emissive * nbl_glsl_vTextureGrad(emitter.emissionProfile, nbl_glsl_IES_convert_dir_to_uv(mat3(right, up, view)*dir), mat2(0.0)).r; + vec2 halfMinusHalfPixel = 0.5-0.5/vec2(nbl_glsl_unpackSize(emitter.emissionProfile)); + return emissive * nbl_glsl_vTextureGrad(emitter.emissionProfile, nbl_glsl_IES_convert_dir_to_uv(mat3(right, up, view)*dir,halfMinusHalfPixel), mat2(0.0)).r; } #endif return emissive; diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index b507ab0d45..59829b9c71 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -88,8 +88,8 @@ inline core::vectorSIMDf CIESProfile::octahdronUVToDir(const float& u, const flo float abs_x = core::abs(pos.x), abs_y = core::abs(pos.y); pos.z = 1.0 - abs_x - abs_y; if (pos.z < 0.0) { - pos.x = core::sign(pos.x) * (1.0 - abs_y); - pos.y = core::sign(pos.y) * (1.0 - abs_x); + pos.x = (pos.x<0.f ? (-1.f):1.f) * (1.0 - abs_y); + pos.y = (pos.y<0.f ? (-1.f):1.f) * (1.0 - abs_x); } return core::normalize(pos); @@ -116,6 +116,8 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu if (height > CDC_MAX_TEXTURE_HEIGHT) height = CDC_MAX_TEXTURE_HEIGHT; + // TODO: If no symmetry (no folding in half and abuse of mirror sampler) make dimensions odd-sized so middle texel taps the south pole + asset::ICPUImage::SCreationParams imgInfo; imgInfo.type = asset::ICPUImage::ET_2D; imgInfo.extent.width = width; @@ -161,15 +163,19 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu const double maxValue = getMaxCandelaValue(); const double maxValueRecip = 1.0 / maxValue; - const double vertInv = 1.0 / height; - const double horiInv = 1.0 / width; + // There is one huge issue, the IES files love to give us values for degrees 0, 90, 180 an 360 + // So standard octahedral mapping won't work, because for above data points you need corner sampled images. + const float vertInv = 1.0 / (height-1); + const float horiInv = 1.0 / (width-1); const double flattenTarget = getAvgEmmision(fullDomainFlatten); const double domainLo = core::radians(vAngles.front()); const double domainHi = core::radians(vAngles.back()); auto fill = [&](uint32_t blockArrayOffset, core::vectorSIMDu32 position) -> void { - const auto dir = octahdronUVToDir(((float)position.x + 0.5) * vertInv, ((float)position.y + 0.5) * horiInv); + // We don't currently support generating IES images that exploit symmetries or reduced domains, all are full octahederal mappings of a sphere. + // If we did, we'd rely on MIRROR and CLAMP samplers to do some of the work for us while handling the discontinuity due to corner sampling. + const auto dir = octahdronUVToDir(position.x * vertInv, position.y * horiInv); const auto [theta, phi] = sphericalDirToRadians(dir); const auto intensity = sample(theta, phi); From be62e3ac5716e93794423efbbdd7e0d34a00683b Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 28 Oct 2025 17:36:32 +0100 Subject: [PATCH 095/472] improve how the resolution of an IES is calculated but also leave a TODO on how to improve it even more --- examples_tests | 2 +- .../glsl/material_compiler/common.glsl | 2 +- src/nbl/asset/utils/CIESProfile.cpp | 5 +++++ src/nbl/asset/utils/CIESProfileParser.cpp | 19 +++++++++++++------ 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/examples_tests b/examples_tests index b742c70c5a..cd3efce0ac 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b742c70c5abc6feaf22ebb56cbae8a2bbcfa12f9 +Subproject commit cd3efce0ac9b498bd8569a68fb9ddef0f6f19332 diff --git a/include/nbl/builtin/glsl/material_compiler/common.glsl b/include/nbl/builtin/glsl/material_compiler/common.glsl index ba3c95d5e2..f6b7d97c46 100644 --- a/include/nbl/builtin/glsl/material_compiler/common.glsl +++ b/include/nbl/builtin/glsl/material_compiler/common.glsl @@ -601,7 +601,7 @@ vec3 nbl_glsl_MC_oriented_material_t_getEmissive(in nbl_glsl_MC_oriented_materia if ((floatBitsToInt(emitter.orientation[0])&1u) != 1u) { right *= -1; } - vec2 halfMinusHalfPixel = 0.5-0.5/vec2(nbl_glsl_unpackSize(emitter.emissionProfile)); + vec2 halfMinusHalfPixel = vec2(0.5)-vec2(0.5)/vec2(nbl_glsl_unpackSize(emitter.emissionProfile)); return emissive * nbl_glsl_vTextureGrad(emitter.emissionProfile, nbl_glsl_IES_convert_dir_to_uv(mat3(right, up, view)*dir,halfMinusHalfPixel), mat2(0.0)).r; } #endif diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index 59829b9c71..431cc14eba 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -118,6 +118,11 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu // TODO: If no symmetry (no folding in half and abuse of mirror sampler) make dimensions odd-sized so middle texel taps the south pole + // TODO: This is hack because the mitsuba loader and its material compiler use Virtual Texturing, and there's some bug with IES not sampling sub 128x128 mip levels + // don't want to spend time to fix this since we'll be using descriptor indexing for the next iteration + width = core::max(width,128); + height = core::max(height,128); + asset::ICPUImage::SCreationParams imgInfo; imgInfo.type = asset::ICPUImage::ET_2D; imgInfo.extent.width = width; diff --git a/src/nbl/asset/utils/CIESProfileParser.cpp b/src/nbl/asset/utils/CIESProfileParser.cpp index 2c6ce873be..dd6b321414 100644 --- a/src/nbl/asset/utils/CIESProfileParser.cpp +++ b/src/nbl/asset/utils/CIESProfileParser.cpp @@ -106,12 +106,6 @@ bool CIESProfileParser::parse(CIESProfile& result) if (vSize < 2) return false; - { - const uint32_t maxDimMeasureSize = core::max(hSize, vSize); - result.optimalIESResolution = decltype(result.optimalIESResolution){ maxDimMeasureSize, maxDimMeasureSize }; - result.optimalIESResolution *= 2u; // safe bias for our bilinear interpolation to work nicely and increase resolution of a profile - } - auto& vAngles = result.vAngles; for (int i = 0; i < vSize; i++) { vAngles[i] = getDouble("vertical angle truncated"); @@ -184,6 +178,7 @@ bool CIESProfileParser::parse(CIESProfile& result) const auto H_ANGLES_I_RANGE = result.symmetry != CIESProfile::ISOTROPIC ? result.hAngles.size() - 1 : 1; const auto V_ANGLES_I_RANGE = result.vAngles.size() - 1; + float smallestRangeSolidAngle = FULL_SOLID_ANGLE; for (size_t j = 0; j < V_ANGLES_I_RANGE; j++) { const float thetaRad = core::radians(result.vAngles[j]); @@ -196,6 +191,11 @@ bool CIESProfileParser::parse(CIESProfile& result) for (size_t i = 0; i < H_ANGLES_I_RANGE; i++) { const float dPhiRad = result.symmetry != CIESProfile::ISOTROPIC ? core::radians(hAngles[i + 1] - hAngles[i]) : (core::PI() * 2.0f); + // TODO: in reality one should transform the 4 vertices (or 3) into octahedral map, work out the dUV/dPhi and dUV/dTheta vectors as-if for Anisotropic Filtering + // then choose the minor axis length, and use that as a pixel size (since looking for smallest thing, dont have to worry about handling discont) + const float solidAngle = dsinTheta * dPhiRad; + if (solidAngle= 0.f); //assert(nonZeroEmissionDomainSize*fluxMultiplier =approx= 2.f*(cosBack-cosFront)*PI); if (nonZeroEmissionDomainSize <= std::numeric_limits::min()) // protect us from division by small numbers (just in case, we should never hit it) From 0c03e2f7517c6cd8cfff2dfb5c874a3da41c27e7 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 28 Oct 2025 19:37:17 +0100 Subject: [PATCH 096/472] Update CI references --- ci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci b/ci index 606901755b..8c3e98c651 160000 --- a/ci +++ b/ci @@ -1 +1 @@ -Subproject commit 606901755b9ba13715c796ac8b303c57732a7581 +Subproject commit 8c3e98c651e55a9e0e9c6121d055d483b2f90618 From 80de6aaa741b908cab2c9c2ade3e04ce049f3204 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 29 Oct 2025 11:58:52 +0100 Subject: [PATCH 097/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 06b2d09bbc..7d0b0db615 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 06b2d09bbc338ee3f57d27587d099bbf1ad22949 +Subproject commit 7d0b0db6159ee194c8d9a2f094a03f85fc0c7f8c From e05478d676faf15c4b61d8ddf541d8a80fcc785b Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 29 Oct 2025 15:12:00 +0100 Subject: [PATCH 098/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 7d0b0db615..6d2c3d4f32 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 7d0b0db6159ee194c8d9a2f094a03f85fc0c7f8c +Subproject commit 6d2c3d4f32383eaeb706bef30b47e68292e7f24f From 33e742842f6b9fe9d23f101af63b647d00f4b6b2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 29 Oct 2025 17:12:05 +0100 Subject: [PATCH 099/472] Update examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 22f2a17401..eb1e29f4d0 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 22f2a17401e8e70dddff477e11db12ebd1dea2bd +Subproject commit eb1e29f4d071956d8397108680cb0256ec012b5b From 8d0c2375aa79e617200ee738dc65ad3890f14f76 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 30 Oct 2025 15:21:36 +0100 Subject: [PATCH 100/472] add createGrid to geometry creator --- include/nbl/asset/utils/CGeometryCreator.h | 8 +- src/nbl/asset/utils/CGeometryCreator.cpp | 97 ++++++++++++++++++++++ 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h index 5c669b9510..290c81b239 100644 --- a/include/nbl/asset/utils/CGeometryCreator.h +++ b/include/nbl/asset/utils/CGeometryCreator.h @@ -115,9 +115,15 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \param subdivision Specifies subdivision level of the icosphere. \param smooth Specifies whether vertecies should be built for smooth or flat shading. */ - core::smart_refctd_ptr createIcoSphere(float radius=1.f, uint32_t subdivision=1, bool smooth=false) const; + //! Create a grid geometry + /** + No vertex buffer, only index in triangle strip topology without reset, "snake" with degenerates + \param "resolution" Specifies resolution of grid + */ + core::smart_refctd_ptr createGrid(const hlsl::uint16_t2 resolution = { 128u, 128u }) const; + private: SCreationParams m_params; }; diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 9dc8cdd42a..8bf0a7521b 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -498,6 +498,15 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal)); } + for (auto i = 0u; i < vertexCount; ++i) + { + auto position = positions[i]; + auto len = glm::length(position); + + auto ok = len >= 1.f - 0.01f; + assert(ok); + } + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } @@ -1889,5 +1898,93 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl return retval; } +core::smart_refctd_ptr CGeometryCreator::createGrid(const hlsl::uint16_t2 resolution) const +{ + if (resolution.x < 2 || resolution.y < 2) + return nullptr; + + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleStrip()); + + //! Create indices + /* + i \in [0, resolution.x - 1], j \in [0, resolution.y - 1] + logical vertex id : V(i, j) + + Eg. resolution = {5u, 4u}: + + j=3 15--16--17--18--19 + | \ | \ | \ | \ | + j=2 10--11--12--13--14 + | \ | \ | \ | \ | + j=1 5-- 6-- 7-- 8-- 9 + | \ | \ | \ | \ | + j=0 0-- 1-- 2-- 3-- 4 + i=0 1 2 3 4 + + Strip order (one draw), rows linked by 2 degenerate indices: + row 0 -> 1 (L->R): 0,5, 1,6, 2,7, 3,8, 4,9, 9,9 + row 1 -> 2 (R->L): 9,14, 8,13, 7,12, 6,11, 5,10, 5,5 + row 2 -> 3 (L->R): 5,10, 6,11, 7,12, 8,13, 9,14, 14,14 + */ + + const size_t indexCount = 2ull * resolution.x * (resolution.y - 1) + 2ull * (resolution.y - 2); + const size_t maxIndex = resolution.x * resolution.y - 1u; + + auto createIndices = [&]() -> void + { + auto indexView = createIndexView(indexCount, maxIndex); + + auto V = [&](IndexT i, IndexT j) { return IndexT(j * resolution.x + i); }; + auto* index = static_cast(indexView.src.buffer->getPointer()); + #define PUSH_INDEX(value) *index = value; ++index; + + for (IndexT j = 0u; j < resolution.y - 1; ++j) + { + if ((j & 1u) == 0) + { + for (IndexT i = 0u; i < resolution.x; ++i) + { + PUSH_INDEX(V(i, j)) + PUSH_INDEX(V(i, j + 1)) + } + + if (j + 1 < resolution.y - 1) + { + IndexT last = V(resolution.x - 1, j + 1); + PUSH_INDEX(last) + PUSH_INDEX(last) + } + } + else + { + for (int i = int(resolution.x) - 1; i >= 0; --i) + { + PUSH_INDEX(V(uint32_t(i), j)) + PUSH_INDEX(V(uint32_t(i), j + 1)) + } + + if (j + 1 < resolution.y - 1) + { + IndexT first = V(0, j + 1); + PUSH_INDEX(first) + PUSH_INDEX(first) + } + } + } + retval->setIndexView(std::move(indexView)); + }; + + if (maxIndex <= std::numeric_limits::max()) + createIndices.template operator() < uint16_t > (); + else if (maxIndex <= std::numeric_limits::max()) + createIndices.template operator() < uint32_t > (); + else + return nullptr; + + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); + return retval; +} + } // end namespace nbl::asset From dbd28055bba525730ba7fc3cfbc45cbc4c76b181 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 30 Oct 2025 15:24:36 +0100 Subject: [PATCH 101/472] remove debug code I forgot about --- src/nbl/asset/utils/CGeometryCreator.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 8bf0a7521b..ae1995aa0e 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -498,15 +498,6 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal)); } - for (auto i = 0u; i < vertexCount; ++i) - { - auto position = positions[i]; - auto len = glm::length(position); - - auto ok = len >= 1.f - 0.01f; - assert(ok); - } - CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } From b4ce8f6999c92d38a5fd0f4446c850fe239eebc2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 30 Oct 2025 18:37:37 +0100 Subject: [PATCH 102/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index eb1e29f4d0..e1e8dd6fb0 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit eb1e29f4d071956d8397108680cb0256ec012b5b +Subproject commit e1e8dd6fb0c46612defeea46c960a6b85f4b4155 From 054038ffd81a660c36b6ceb596cd6bc8e6116f89 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 31 Oct 2025 12:40:04 +0100 Subject: [PATCH 103/472] add positions to geometry creator's grid for the polygon to be usable with GPU converter, encode to EF_A2R10G10B10_UNORM_PACK32, update examples_tests submodule --- examples_tests | 2 +- src/nbl/asset/utils/CGeometryCreator.cpp | 39 ++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 6d2c3d4f32..86a50c5a2b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 6d2c3d4f32383eaeb706bef30b47e68292e7f24f +Subproject commit 86a50c5a2b9b07aee676cc92d70b1910b4da46d4 diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index ae1995aa0e..be37ec640a 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -1891,6 +1891,8 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl core::smart_refctd_ptr CGeometryCreator::createGrid(const hlsl::uint16_t2 resolution) const { + using namespace hlsl; + if (resolution.x < 2 || resolution.y < 2) return nullptr; @@ -1973,6 +1975,43 @@ core::smart_refctd_ptr CGeometryCreator::createGrid(const h else return nullptr; + //! Create positions + const size_t vertexCount = resolution.x * resolution.y; + { + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4((resolution.x - 0.5f) / float(resolution.x), 0.5f, (resolution.y - 0.5f) / float(resolution.y), 1.f); + aabb.minVx = float32_t4(0.5f / float(resolution.x), 0.5f, 0.5f / float(resolution.y), 1.f); + + const auto stride = getTexelOrBlockBytesize(); + const auto bytes = stride * vertexCount; + auto buffer = ICPUBuffer::create({ bytes, IBuffer::EUF_NONE }); + ICPUPolygonGeometry::SDataView positionView = { + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = stride, + .format = EF_A2R10G10B10_UNORM_PACK32, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = {.offset = 0,.size = buffer->getSize(),.buffer = core::smart_refctd_ptr(buffer)} + }; + + auto* packed = reinterpret_cast(buffer->getPointer()); + for (uint32_t j = 0; j < resolution.y; ++j) + for (uint32_t i = 0; i < resolution.x; ++i) + { + const double u = (i + 0.5) / double(resolution.x); + const double v = (j + 0.5) / double(resolution.y); + + float64_t4 rgbaunorm = { u, 0.5, v, 1.0 }; + + *packed = {}; + encodePixels(packed, (double*)&rgbaunorm); + ++packed; + } + + retval->setPositionView(std::move(positionView)); + } + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } From f64a8f34edc5abf338d72a3042fa0b370d7554ce Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 3 Nov 2025 10:29:36 +0100 Subject: [PATCH 104/472] update examples_tests submodule --- examples_tests | 2 +- src/nbl/asset/utils/CGeometryCreator.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 86a50c5a2b..d0156858be 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 86a50c5a2b9b07aee676cc92d70b1910b4da46d4 +Subproject commit d0156858bedf2b985316184a73ac0a889350fdc2 diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index be37ec640a..30a9eea7af 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -1982,7 +1982,7 @@ core::smart_refctd_ptr CGeometryCreator::createGrid(const h aabb.maxVx = float32_t4((resolution.x - 0.5f) / float(resolution.x), 0.5f, (resolution.y - 0.5f) / float(resolution.y), 1.f); aabb.minVx = float32_t4(0.5f / float(resolution.x), 0.5f, 0.5f / float(resolution.y), 1.f); - const auto stride = getTexelOrBlockBytesize(); + static constexpr auto stride = getTexelOrBlockBytesize(); const auto bytes = stride * vertexCount; auto buffer = ICPUBuffer::create({ bytes, IBuffer::EUF_NONE }); ICPUPolygonGeometry::SDataView positionView = { From c94199199856e3b5d89d6998d641080e14c511c7 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 3 Nov 2025 16:30:16 +0100 Subject: [PATCH 105/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index d0156858be..462bb549f6 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit d0156858bedf2b985316184a73ac0a889350fdc2 +Subproject commit 462bb549f642fe0c02da384416766e8cc049adab From 95246c48adfd4f20bad093c93961e2fa8447860d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 5 Nov 2025 15:07:09 +0100 Subject: [PATCH 106/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 462bb549f6..4cd5f027ea 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 462bb549f642fe0c02da384416766e8cc049adab +Subproject commit 4cd5f027eabdf88f84e16d47f8fdc6acdd1d36b4 From 4bb28642c39ecf3f21cfb87a4653183900ecee28 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 8 Nov 2025 10:21:23 +0100 Subject: [PATCH 107/472] add include/nbl/builtin/hlsl/math/octahedral.hlsl --- include/nbl/builtin/hlsl/math/octahedral.hlsl | 69 +++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 70 insertions(+) create mode 100644 include/nbl/builtin/hlsl/math/octahedral.hlsl diff --git a/include/nbl/builtin/hlsl/math/octahedral.hlsl b/include/nbl/builtin/hlsl/math/octahedral.hlsl new file mode 100644 index 0000000000..55f845a0c8 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/octahedral.hlsl @@ -0,0 +1,69 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_MATH_OCTAHEDRAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_OCTAHEDRAL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/numbers.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace math +{ + +// Octahedral Transform, maps 3D direction vectors to 2D square and vice versa +template +struct OctahedralTransform +{ + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + + // F : [-1, 1]^2 -> S^2 + static vector3_type eval(const vector2_type ndc) + { + vector3_type p = vector3_type(ndc.xy, scalar_type(0)); + const vector2_type a = abs(p.xy); + + p.z = scalar_type(1) - a.x - a.y; + + if (p.z < scalar_type(0)) + p.xy = hlsl::sign(p.xy) * (scalar_type(1) - abs(p.yx)); + + return hlsl::normalize(p); + } + + // F^-1 : S^2 -> [-1, 1]^2 + static vector2_type inverse(vector3_type dir) + { + dir = hlsl::normalize(dir); + const scalar_type sum = hlsl::dot(vector3_type(scalar_type(1), scalar_type(1), scalar_type(1)), abs(dir)); + vector3_type s = dir / sum; + + if (s.z < scalar_type(0)) + s.xy = hlsl::sign(s.xy) * (scalar_type(1) - abs(s.yx)); + + return s.xy; + } + + // transforms direction vector into UV (for corner sampling) + // dir in S^2, halfMinusHalfPixel in [0, 0.5)^2, + // where halfMinusHalfPixel = 0.5-0.5/texSize + // and texSize.x >= 1, texSize.y >= 1 + // NOTE/TODO: not best place to keep it here imo + static vector2_type toCornerSampledUV(vector3_type dir, vector2_type halfMinusHalfPixel) + { + // note: cornerSampled(NDC*0.5+0.5) = NDC*0.5*(1-1/texSize)+0.5 + return inverse(dir) * halfMinusHalfPixel + scalar_type(0.5); + } +}; + +} +} +} + +#endif // _NBL_BUILTIN_HLSL_MATH_OCTAHEDRAL_INCLUDED_ \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index cc81b093a2..e15c1458da 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -224,6 +224,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/angle_adding.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/octahedral.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") From 12d608fbb805a24e597cea091c3da32ba090af9f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 8 Nov 2025 10:23:55 +0100 Subject: [PATCH 108/472] CONTRIBUTING.md post update --- CONTRIBUTING.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 52130bd445..7078612541 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,6 @@ Thank you for your interest in contributing to the Nabla Engine! Nabla is a high ## Table of Contents -<<<<<<< HEAD - [How Can I Contribute?](#how-can-i-contribute) - [Reporting Bugs](#reporting-bugs) - [Suggesting Enhancements](#suggesting-enhancements) @@ -13,9 +12,6 @@ Thank you for your interest in contributing to the Nabla Engine! Nabla is a high - [Pull Request Process](#pull-request-process) - [Connect with Other Project Contributors](#connect-with-other-project-contributors) - [License](#license) -======= -https://github.com/Devsh-Graphics-Programming/Nabla/issues ->>>>>>> 0c03e2f7517c6cd8cfff2dfb5c874a3da41c27e7 ## How Can I Contribute? From 2eadc3bf9b3fbf8708404589d7abe17fbb7b4719 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 8 Nov 2025 11:54:43 +0100 Subject: [PATCH 109/472] wipe CIESProfile::octahdronUVToDir and use HLSL version --- include/nbl/builtin/hlsl/math/octahedral.hlsl | 32 +++++++++++-------- src/nbl/asset/utils/CIESProfile.cpp | 23 ++++--------- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/octahedral.hlsl b/include/nbl/builtin/hlsl/math/octahedral.hlsl index 55f845a0c8..1f3c34d6f5 100644 --- a/include/nbl/builtin/hlsl/math/octahedral.hlsl +++ b/include/nbl/builtin/hlsl/math/octahedral.hlsl @@ -16,49 +16,55 @@ namespace math { // Octahedral Transform, maps 3D direction vectors to 2D square and vice versa -template +template struct OctahedralTransform { using scalar_type = T; using vector2_type = vector; using vector3_type = vector; - // F : [-1, 1]^2 -> S^2 - static vector3_type eval(const vector2_type ndc) + // F : [0, 1]^2 -> S^2 + static vector3_type uvToDir(const vector2_type uv) { - vector3_type p = vector3_type(ndc.xy, scalar_type(0)); - const vector2_type a = abs(p.xy); + vector3_type p = vector3_type((uv * scalar_type(2) - scalar_type(1)), scalar_type(0)); + const scalar_type a_x = abs(p.x); const scalar_type a_y = abs(p.y); - p.z = scalar_type(1) - a.x - a.y; + p.z = scalar_type(1) - a_x - a_y; - if (p.z < scalar_type(0)) - p.xy = hlsl::sign(p.xy) * (scalar_type(1) - abs(p.yx)); + if (p.z < scalar_type(0)) + { + p.x = (p.x < scalar_type(0) ? scalar_type(-1) : scalar_type(1)) * (scalar_type(1) - a_y); + p.y = (p.y < scalar_type(0) ? scalar_type(-1) : scalar_type(1)) * (scalar_type(1) - a_x); + } return hlsl::normalize(p); } // F^-1 : S^2 -> [-1, 1]^2 - static vector2_type inverse(vector3_type dir) + static vector2_type dirToNDC(vector3_type dir) { dir = hlsl::normalize(dir); const scalar_type sum = hlsl::dot(vector3_type(scalar_type(1), scalar_type(1), scalar_type(1)), abs(dir)); vector3_type s = dir / sum; if (s.z < scalar_type(0)) - s.xy = hlsl::sign(s.xy) * (scalar_type(1) - abs(s.yx)); + { + s.x = (s.x < scalar_type(0) ? scalar_type(-1) : scalar_type(1)) * (scalar_type(1) - abs(s.y)); + s.y = (s.y < scalar_type(0) ? scalar_type(-1) : scalar_type(1)) * (scalar_type(1) - abs(s.x)); + } return s.xy; } - // transforms direction vector into UV (for corner sampling) + // transforms direction vector into UV for corner sampling // dir in S^2, halfMinusHalfPixel in [0, 0.5)^2, // where halfMinusHalfPixel = 0.5-0.5/texSize // and texSize.x >= 1, texSize.y >= 1 - // NOTE/TODO: not best place to keep it here imo + // NOTE/TODO: not best place to keep it here static vector2_type toCornerSampledUV(vector3_type dir, vector2_type halfMinusHalfPixel) { // note: cornerSampled(NDC*0.5+0.5) = NDC*0.5*(1-1/texSize)+0.5 - return inverse(dir) * halfMinusHalfPixel + scalar_type(0.5); + return dirToNDC(dir) * halfMinusHalfPixel + scalar_type(0.5); } }; diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index 2cb79aa9f1..c981cd3208 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -2,6 +2,7 @@ #include #include "nbl/asset/filters/CBasicImageFilterCommon.h" +#include "nbl/builtin/hlsl/math/octahedral.hlsl" using namespace nbl; using namespace asset; @@ -82,20 +83,6 @@ const CIESProfile::IES_STORAGE_FORMAT CIESProfile::sample(IES_STORAGE_FORMAT the return s0 * (1.0 - u) + s1 * u; } -inline core::vectorSIMDf CIESProfile::octahdronUVToDir(const float& u, const float& v) -{ - core::vectorSIMDf pos = core::vectorSIMDf(2 * (u - 0.5), 2 * (v - 0.5), 0.0); - float abs_x = core::abs(pos.x), abs_y = core::abs(pos.y); - pos.z = 1.0 - abs_x - abs_y; - if (pos.z < 0.0) { - pos.x = (pos.x<0.f ? (-1.f):1.f) * (1.0 - abs_y); - pos.y = (pos.y<0.f ? (-1.f):1.f) * (1.0 - abs_x); - } - - return core::normalize(pos); -} - - inline std::pair CIESProfile::sphericalDirToRadians(const core::vectorSIMDf& dir) { const float theta = std::acos(std::clamp(dir.z, -1.f, 1.f)); @@ -183,8 +170,12 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu { // We don't currently support generating IES images that exploit symmetries or reduced domains, all are full octahederal mappings of a sphere. // If we did, we'd rely on MIRROR and CLAMP samplers to do some of the work for us while handling the discontinuity due to corner sampling. - const auto dir = octahdronUVToDir(position.x * vertInv, position.y * horiInv); - const auto [theta, phi] = sphericalDirToRadians(dir); + + using Octahedral = hlsl::math::OctahedralTransform; + const auto uv = Octahedral::vector2_type(position.x * vertInv, position.y * horiInv); + const auto dir = Octahedral::uvToDir(uv); + const auto tmp = core::vectorSIMDf(dir.x, dir.y, dir.z); + const auto [theta, phi] = sphericalDirToRadians(tmp); const auto intensity = sample(theta, phi); //! blend the IES texture with "flatten" From 1d8aa312f7cb044e31fb48f3bf419cb872ffc37a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 8 Nov 2025 12:40:12 +0100 Subject: [PATCH 110/472] wipe CIESProfile::sphericalDirToRadians and use polar.hlsl --- include/nbl/builtin/hlsl/math/octahedral.hlsl | 8 +++--- include/nbl/builtin/hlsl/math/polar.hlsl | 24 ++++++++-------- src/nbl/asset/utils/CIESProfile.cpp | 19 ++++--------- src/nbl/asset/utils/CIESProfile.h | 28 ------------------- 4 files changed, 23 insertions(+), 56 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/octahedral.hlsl b/include/nbl/builtin/hlsl/math/octahedral.hlsl index 1f3c34d6f5..8e32d60cb0 100644 --- a/include/nbl/builtin/hlsl/math/octahedral.hlsl +++ b/include/nbl/builtin/hlsl/math/octahedral.hlsl @@ -24,7 +24,7 @@ struct OctahedralTransform using vector3_type = vector; // F : [0, 1]^2 -> S^2 - static vector3_type uvToDir(const vector2_type uv) + static vector3_type uvToDir(NBL_CONST_REF_ARG(vector2_type) uv) { vector3_type p = vector3_type((uv * scalar_type(2) - scalar_type(1)), scalar_type(0)); const scalar_type a_x = abs(p.x); const scalar_type a_y = abs(p.y); @@ -41,9 +41,9 @@ struct OctahedralTransform } // F^-1 : S^2 -> [-1, 1]^2 - static vector2_type dirToNDC(vector3_type dir) + static vector2_type dirToNDC(NBL_CONST_REF_ARG(vector3_type) d) { - dir = hlsl::normalize(dir); + scalar_type dir = hlsl::normalize(d); const scalar_type sum = hlsl::dot(vector3_type(scalar_type(1), scalar_type(1), scalar_type(1)), abs(dir)); vector3_type s = dir / sum; @@ -61,7 +61,7 @@ struct OctahedralTransform // where halfMinusHalfPixel = 0.5-0.5/texSize // and texSize.x >= 1, texSize.y >= 1 // NOTE/TODO: not best place to keep it here - static vector2_type toCornerSampledUV(vector3_type dir, vector2_type halfMinusHalfPixel) + static vector2_type toCornerSampledUV(NBL_CONST_REF_ARG(vector3_type) dir, NBL_CONST_REF_ARG(vector2_type) halfMinusHalfPixel) { // note: cornerSampled(NDC*0.5+0.5) = NDC*0.5*(1-1/texSize)+0.5 return dirToNDC(dir) * halfMinusHalfPixel + scalar_type(0.5); diff --git a/include/nbl/builtin/hlsl/math/polar.hlsl b/include/nbl/builtin/hlsl/math/polar.hlsl index 7b30e3bb8f..59454e27e6 100644 --- a/include/nbl/builtin/hlsl/math/polar.hlsl +++ b/include/nbl/builtin/hlsl/math/polar.hlsl @@ -14,27 +14,29 @@ namespace hlsl namespace math { -template +template struct Polar { using scalar_type = T; using vector2_type = vector; using vector3_type = vector; - // should be normalized - static Polar createFromCartesian(const vector3_type coords) + // input must be normalized + static Polar createFromCartesian(NBL_CONST_REF_ARG(vector3_type) dir) { Polar retval; - retval.theta = hlsl::acos(coords.z); - retval.phi = hlsl::atan2(coords.y, coords.x); + retval.theta = acos(dir.z); + retval.phi = atan2(dir.y, dir.x); return retval; } - static vector3_type ToCartesian(const scalar_type theta, const scalar_type phi) + static vector3_type ToCartesian(NBL_CONST_REF_ARG(scalar_type) theta, NBL_CONST_REF_ARG(scalar_type) phi) { - return vector(hlsl::cos(phi) * hlsl::cos(theta), - hlsl::sin(phi) * hlsl::cos(theta), - hlsl::sin(theta)); + return vector( + cos(phi) * cos(theta), + sin(phi) * cos(theta), + sin(theta) + ); } vector3_type getCartesian() @@ -42,8 +44,8 @@ struct Polar return ToCartesian(theta, phi); } - scalar_type theta; - scalar_type phi; + scalar_type theta; //! polar angle in range [0, PI] + scalar_type phi; //! azimuthal angle in range [-PI, PI] }; } diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index c981cd3208..55b8b18fc8 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -3,6 +3,7 @@ #include #include "nbl/asset/filters/CBasicImageFilterCommon.h" #include "nbl/builtin/hlsl/math/octahedral.hlsl" +#include "nbl/builtin/hlsl/math/polar.hlsl" using namespace nbl; using namespace asset; @@ -83,14 +84,6 @@ const CIESProfile::IES_STORAGE_FORMAT CIESProfile::sample(IES_STORAGE_FORMAT the return s0 * (1.0 - u) + s1 * u; } -inline std::pair CIESProfile::sphericalDirToRadians(const core::vectorSIMDf& dir) -{ - const float theta = std::acos(std::clamp(dir.z, -1.f, 1.f)); - const float phi = std::atan2(dir.y, dir.x); - - return { theta, phi }; -} - template core::smart_refctd_ptr CIESProfile::createIESTexture(ExecutionPolicy&& policy, const float flatten, const bool fullDomainFlatten, uint32_t width, uint32_t height) const { @@ -172,15 +165,15 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu // If we did, we'd rely on MIRROR and CLAMP samplers to do some of the work for us while handling the discontinuity due to corner sampling. using Octahedral = hlsl::math::OctahedralTransform; + using Polar = hlsl::math::Polar; const auto uv = Octahedral::vector2_type(position.x * vertInv, position.y * horiInv); const auto dir = Octahedral::uvToDir(uv); - const auto tmp = core::vectorSIMDf(dir.x, dir.y, dir.z); - const auto [theta, phi] = sphericalDirToRadians(tmp); - const auto intensity = sample(theta, phi); - + const auto polar = Polar::createFromCartesian(dir); + const auto intensity = sample(polar.theta, polar.phi); + //! blend the IES texture with "flatten" double blendV = intensity * (1.0 - flatten); - if (fullDomainFlatten && domainLo<=theta && theta<=domainHi || intensity >0.0) + if (fullDomainFlatten && domainLo<= polar.theta && polar.theta<=domainHi || intensity >0.0) blendV += flattenTarget * flatten; blendV *= maxValueRecip; diff --git a/src/nbl/asset/utils/CIESProfile.h b/src/nbl/asset/utils/CIESProfile.h index 266b8435ec..1f6b2833e5 100644 --- a/src/nbl/asset/utils/CIESProfile.h +++ b/src/nbl/asset/utils/CIESProfile.h @@ -89,34 +89,6 @@ namespace nbl private: CIESProfile(PhotometricType type, size_t hSize, size_t vSize) : type(type), version(V_SIZE), hAngles(hSize), vAngles(vSize), data(hSize* vSize) {} - - // TODO for @Hazard, I would move it into separate file, we may use this abstraction somewhere too - //! Returns spherical coordinates with physics convention in radians - /* - https://en.wikipedia.org/wiki/Spherical_coordinate_system#/media/File:3D_Spherical.svg - Retval.first is "theta" polar angle in range [0, PI] & Retval.second "phi" is azimuthal angle - in range [-PI, PI] range - - Cartesian coordinates obtained from the spherical coordinates in Nabla - are assumed to have radius equal to 1 and therefore always are - - x = cos(phi)*sin(theta) - y = sin(phi)*sin(theta) - z = cos(theta) - */ - - static inline std::pair sphericalDirToRadians(const core::vectorSIMDf& dir); - - //! Octahedral coordinate mapping is following - /* - center is Z- - U+ from center is X+ - V+ from center is Y+ - - when viewed as a texture, the net folds, and the apex where the seams join is Z+ - */ - - static inline core::vectorSIMDf octahdronUVToDir(const float& u, const float& v); void setCandelaValue(size_t i, size_t j, IES_STORAGE_FORMAT val) { data[vAngles.size() * i + j] = val; } From 7105b0c5594c53f2cc5294cf950a956dd457b2c8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 14 Nov 2025 14:32:48 +0100 Subject: [PATCH 111/472] add include/nbl/builtin/hlsl/ies/profile.hlsl and include/nbl/builtin/hlsl/ies/sampler.hlsl, wipe CIESProfile::sample, save work (won't compile!) --- include/nbl/builtin/hlsl/ies/profile.hlsl | 66 +++++++++ include/nbl/builtin/hlsl/ies/sampler.hlsl | 156 ++++++++++++++++++++++ src/nbl/asset/utils/CIESProfile.cpp | 78 +---------- src/nbl/asset/utils/CIESProfile.h | 111 ++++++--------- 4 files changed, 263 insertions(+), 148 deletions(-) create mode 100644 include/nbl/builtin/hlsl/ies/profile.hlsl create mode 100644 include/nbl/builtin/hlsl/ies/sampler.hlsl diff --git a/include/nbl/builtin/hlsl/ies/profile.hlsl b/include/nbl/builtin/hlsl/ies/profile.hlsl new file mode 100644 index 0000000000..440fe84a98 --- /dev/null +++ b/include/nbl/builtin/hlsl/ies/profile.hlsl @@ -0,0 +1,66 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_IES_PROFILE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_IES_PROFILE_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ies +{ + +struct ProfileProperties +{ + //! max 16K resolution + NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MAX_TEXTURE_SIZE = 15360u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MAX_TEXTURE_HEIGHT = 8640u; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_DEFAULT_TEXTURE_WIDTH = 1024u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_DEFAULT_TEXTURE_HEIGHT = 1024u; + + NBL_CONSTEXPR_STATIC_INLINE float32_t MAX_VANGLE = 180.f; + NBL_CONSTEXPR_STATIC_INLINE float32_t MAX_HANGLE = 360.f; + + enum Version : uint16_t + { + V_1995, + V_2002, + V_SIZE + }; + + enum PhotometricType : uint16_t + { + TYPE_NONE, + TYPE_C, + TYPE_B, + TYPE_A + }; + + enum LuminairePlanesSymmetry : uint16_t + { + ISOTROPIC, //! Only one horizontal angle present and a luminaire is assumed to be laterally axial symmetric + QUAD_SYMETRIC, //! The luminaire is assumed to be symmetric in each quadrant + HALF_SYMETRIC, //! The luminaire is assumed to be symmetric about the 0 to 180 degree plane + OTHER_HALF_SYMMETRIC, //! HALF_SYMETRIC case for legacy V_1995 version where horizontal angles are in range [90, 270], in that case the parser patches horizontal angles to be HALF_SYMETRIC + NO_LATERAL_SYMMET //! The luminaire is assumed to exhibit no lateral symmet + }; + + PhotometricType type; + Version version; + LuminairePlanesSymmetry symmetry; + + float32_t maxCandelaValue; //! Max value from this->data vector + float32_t totalEmissionIntegral; //! Total energy emitted + float32_t avgEmmision; //! totalEmissionIntegral / +}; + +} +} +} + +#endif // _NBL_BUILTIN_HLSL_IES_PROFILE_INCLUDED_ \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/ies/sampler.hlsl b/include/nbl/builtin/hlsl/ies/sampler.hlsl new file mode 100644 index 0000000000..b8ee40b84f --- /dev/null +++ b/include/nbl/builtin/hlsl/ies/sampler.hlsl @@ -0,0 +1,156 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_IES_SAMPLER_INCLUDED_ +#define _NBL_BUILTIN_HLSL_IES_SAMPLER_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/numbers.hlsl" +#include "nbl/builtin/hlsl/concepts.hlsl" +#include "nbl/builtin/hlsl/ies/profile.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ies +{ +namespace concepts +{ +#define NBL_CONCEPT_NAME IESAccessor +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (accessor_t) +NBL_CONCEPT_BEGIN(0) +#define req_key_t uint32_t +#define req_key_t2 uint32_t2 +#define req_value_t float32_t +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(accessor_t::key_t)) + ((NBL_CONCEPT_REQ_TYPE)(accessor_t::key_t2)) + ((NBL_CONCEPT_REQ_TYPE)(accessor_t::value_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((req_key_t(0)), is_same_v, typename accessor_t::key_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((req_key_t2(0, 0)), is_same_v, typename accessor_t::key_t2)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((req_value_t(0)), is_same_v, typename accessor_t::value_t)) + + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().vAnglesCount()), is_same_v, req_key_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().hAnglesCount()), is_same_v, req_key_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().symmetry()), is_same_v, ProfileProperties::LuminairePlanesSymmetry)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().template vAngle((req_key_t)0)), is_same_v, req_value_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().template hAngle((req_key_t)0)), is_same_v, req_value_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().template value((req_key_t2)0)), is_same_v, req_value_t)) +); +#undef req_key_t +#undef req_key_t2 +#undef req_value_t +#include + +template +NBL_BOOL_CONCEPT IsIESAccessor = IESAccessor; +} + +template) + struct CandelaSampler +{ + using accessor_t = Accessor; + using value_t = typename accessor_t::value_t; + using symmetry_t = ProfileProperties::LuminairePlanesSymmetry; + + static value_t sample(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t2 polar) + { + const float32_t vAngle = degrees(polar.x); + const float32_t hAngle = degrees(wrapPhi(polar.y, symmetry)); + + const float32_t vABack = accessor.vAngle(accessor.vAnglesCount() - 1u); + if (vAngle > vABack) + return 0.f; + + const symmetry_t symmetry = accessor.symmetry(); + const uint32_t j0 = getVLB(accessor, vAngle); + const uint32_t j1 = getVUB(accessor, vAngle); + const uint32_t i0 = (symmetry == ISOTROPIC) ? 0u : getHLB(accessor, hAngle); + const uint32_t i1 = (symmetry == ISOTROPIC) ? 0u : getHUB(accessor, hAngle); + + const float32_t uReciprocal = ((i1 == i0) ? 1.f : 1.f / (accessor.hAngle(i1) - accessor.hAngle(i0))); + const float32_t vReciprocal = ((j1 == j0) ? 1.f : 1.f / (accessor.vAngle(j1) - accessor.vAngle(j0))); + + const float32_t u = ((hAngle - accessor.hAngle(i0)) * uReciprocal); + const float32_t v = ((vAngle - accessor.vAngle(j0)) * vReciprocal); + + const float32_t s0 = (accessor.value(uint32_t2(i0, j0)) * (1.f - v) + accessor.value(uint32_t2(i0, j1)) * v); + const float32_t s1 = (accessor.value(uint32_t2(i1, j0)) * (1.f - v) + accessor.value(uint32_t2(i1, j1)) * v); + + return s0 * (1.f - u) + s1 * u; + } + + static float32_t wrapPhi(const float32_t phi, const symmetry_t symmetry) + { + switch (symmetry) + { + case ISOTROPIC: //! axial symmetry + return 0.0f; + case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range + { + NBL_CONSTEXPR float32_t M_HALF_PI = numbers::pi *0.5f; + float32_t wrapPhi = abs(phi); //! first MIRROR + if (wrapPhi > M_HALF_PI) //! then REPEAT + wrapPhi = hlsl::clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0.f, M_HALF_PI); + return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 + } + case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range + case OTHER_HALF_SYMMETRIC: //! eg. maps (in degress) 181 -> 179 or 359 -> 1 + return abs(phi); + case NO_LATERAL_SYMMET: //! plot onto whole (in degress) [0, 360] range + { + NBL_CONSTEXPR float32_t M_TWICE_PI = numbers::pi *2.f; + return (phi < 0.f) ? (phi + M_TWICE_PI) : phi; + } + } + return 69.f; + } + + struct impl_t + { + static uint32_t getVUB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) + { + for (uint32_t i = 0u; i < accessor.vAnglesCount(); ++i) + if (accessor.vAngle(i) > angle) + return i; + return accessor.vAnglesCount(); + } + + static uint32_t getHUB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) + { + for (uint32_t i = 0u; i < accessor.hAnglesCount(); ++i) + if (accessor.hAngle(i) > angle) + return i; + return accessor.hAnglesCount(); + } + }; + + static uint32_t getVLB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) + { + return (uint32_t)hlsl::max((int64_t)impl_t::getVUB(accessor, angle) - 1ll, 0ll); + } + + static uint32_t getHLB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) + { + return (uint32_t)hlsl::max((int64_t)impl_t::getHUB(accessor, angle) - 1ll, 0ll); + } + + static uint32_t getVUB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) + { + return (uint32_t)hlsl::min((int64_t)impl_t::getVUB(accessor, angle), (int64_t)(accessor.vAnglesCount() - 1u)); + } + + static uint32_t getHUB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) + { + return (uint32_t)hlsl::min((int64_t)impl_t::getHUB(accessor, angle), (int64_t)(accessor.hAnglesCount() - 1u)); + } +}; + +} +} +} + +#endif // _NBL_BUILTIN_HLSL_IES_SAMPLER_INCLUDED_ \ No newline at end of file diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index 55b8b18fc8..0a7d72b731 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -8,82 +8,6 @@ using namespace nbl; using namespace asset; -const CIESProfile::IES_STORAGE_FORMAT CIESProfile::sample(IES_STORAGE_FORMAT theta, IES_STORAGE_FORMAT phi) const -{ - auto wrapPhi = [&](const IES_STORAGE_FORMAT& _phi) -> IES_STORAGE_FORMAT - { - constexpr auto M_HALF_PI =core::HALF_PI(); - constexpr auto M_TWICE_PI = core::PI() * 2.0; - - switch (symmetry) - { - case ISOTROPIC: //! axial symmetry - return 0.0; - case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range - { - float wrapPhi = abs(_phi); //! first MIRROR - - if (wrapPhi > M_HALF_PI) //! then REPEAT - wrapPhi = std::clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0, M_HALF_PI); - - return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 - } - case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range - case OTHER_HALF_SYMMETRIC: - return abs(_phi); //! eg. maps (in degress) 181 -> 179 or 359 -> 1 - case NO_LATERAL_SYMMET: //! plot onto whole (in degress) [0, 360] range - { - if (_phi < 0) - return _phi + M_TWICE_PI; - else - return _phi; - } - default: - assert(false); - return 69; - } - }; - - const float vAngle = core::degrees(theta), hAngle = core::degrees(wrapPhi(phi)); - - assert(vAngle >= 0.0 && vAngle <= 180.0); - assert(hAngle >= 0.0 && hAngle <= 360.0); - - if (vAngle > vAngles.back()) - return 0.0; - - // bilinear interpolation - auto lb = [](const core::vector& angles, double angle) -> size_t - { - assert(!angles.empty()); - const size_t idx = std::upper_bound(std::begin(angles), std::end(angles), angle) - std::begin(angles); - return (size_t)std::max((int64_t)idx - 1, (int64_t)0); - }; - - auto ub = [](const core::vector& angles, double angle) -> size_t - { - assert(!angles.empty()); - const size_t idx = std::upper_bound(std::begin(angles), std::end(angles), angle) - std::begin(angles); - return std::min(idx, angles.size() - 1); - }; - - const size_t j0 = lb(vAngles, vAngle); - const size_t j1 = ub(vAngles, vAngle); - const size_t i0 = symmetry == ISOTROPIC ? 0 : lb(hAngles, hAngle); - const size_t i1 = symmetry == ISOTROPIC ? 0 : ub(hAngles, hAngle); - - double uResp = i1 == i0 ? 1.0 : 1.0 / (hAngles[i1] - hAngles[i0]); - double vResp = j1 == j0 ? 1.0 : 1.0 / (vAngles[j1] - vAngles[j0]); - - double u = (hAngle - hAngles[i0]) * uResp; - double v = (vAngle - vAngles[j0]) * vResp; - - double s0 = getCandelaValue(i0, j0) * (1.0 - v) + getCandelaValue(i0, j1) * (v); - double s1 = getCandelaValue(i1, j0) * (1.0 - v) + getCandelaValue(i1, j1) * (v); - - return s0 * (1.0 - u) + s1 * u; -} - template core::smart_refctd_ptr CIESProfile::createIESTexture(ExecutionPolicy&& policy, const float flatten, const bool fullDomainFlatten, uint32_t width, uint32_t height) const { @@ -169,7 +93,7 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu const auto uv = Octahedral::vector2_type(position.x * vertInv, position.y * horiInv); const auto dir = Octahedral::uvToDir(uv); const auto polar = Polar::createFromCartesian(dir); - const auto intensity = sample(polar.theta, polar.phi); + const auto intensity = sampler_t::sample(accessor, hlsl::uint32_t(polar.theta, polar.phi)); //! blend the IES texture with "flatten" double blendV = intensity * (1.0 - flatten); diff --git a/src/nbl/asset/utils/CIESProfile.h b/src/nbl/asset/utils/CIESProfile.h index 1f6b2833e5..1584f17006 100644 --- a/src/nbl/asset/utils/CIESProfile.h +++ b/src/nbl/asset/utils/CIESProfile.h @@ -6,6 +6,7 @@ #define __NBL_ASSET_C_IES_PROFILE_H_INCLUDED__ #include "nbl/asset/metadata/CIESProfileMetadata.h" +#include "nbl/builtin/hlsl/ies/sampler.hlsl" #include namespace nbl @@ -15,99 +16,67 @@ namespace nbl class CIESProfile { public: - using IES_STORAGE_FORMAT = double; + struct properties_t : public nbl::hlsl::ies::ProfileProperties + { + hlsl::uint32_t2 optimalIESResolution; //! Optimal resolution for IES CDC texture + }; - //! max 16K resolution - _NBL_STATIC_INLINE_CONSTEXPR size_t CDC_MAX_TEXTURE_WIDTH = 15360; - _NBL_STATIC_INLINE_CONSTEXPR size_t CDC_MAX_TEXTURE_HEIGHT = 8640; + struct accessor_t + { + using key_t = uint32_t; + using key_t2 = hlsl::uint32_t2; + using value_t = hlsl::float32_t; - _NBL_STATIC_INLINE_CONSTEXPR size_t CDC_DEFAULT_TEXTURE_WIDTH = 1024; - _NBL_STATIC_INLINE_CONSTEXPR size_t CDC_DEFAULT_TEXTURE_HEIGHT = 1024; + accessor_t(const key_t2& resolution, const properties_t& props) : hAngles(resolution.x), vAngles(resolution.y), data(resolution.x * resolution.y), properties(props) {} - _NBL_STATIC_INLINE_CONSTEXPR IES_STORAGE_FORMAT MAX_VANGLE = 180.0; - _NBL_STATIC_INLINE_CONSTEXPR IES_STORAGE_FORMAT MAX_HANGLE = 360.0; + template) + inline value_t vAngle(T j) const { return (value_t)vAngles[j]; } - _NBL_STATIC_INLINE_CONSTEXPR auto UI16_MAX_D = 65535.0; - _NBL_STATIC_INLINE_CONSTEXPR auto IES_TEXTURE_STORAGE_FORMAT = asset::EF_R16_UNORM; + template) + inline value_t hAngle(T i) const { return (value_t)hAngles[i]; } - enum Version : uint8_t - { - V_1995, - V_2002, - V_SIZE - }; + template) + inline value_t value(T ij) const { return (value_t)data[vAnglesCount() * ij.x + ij.y]; } - enum PhotometricType : uint8_t - { - TYPE_NONE, - TYPE_C, - TYPE_B, - TYPE_A - }; + template) + inline void setValue(T ij, value_t val) { data[vAnglesCount() * ij.x + ij.y] = val; } - enum LuminairePlanesSymmetry : uint8_t - { - ISOTROPIC, //! Only one horizontal angle present and a luminaire is assumed to be laterally axial symmetric - QUAD_SYMETRIC, //! The luminaire is assumed to be symmetric in each quadrant - HALF_SYMETRIC, //! The luminaire is assumed to be symmetric about the 0 to 180 degree plane - OTHER_HALF_SYMMETRIC, //! HALF_SYMETRIC case for legacy V_1995 version where horizontal angles are in range [90, 270], in that case the parser patches horizontal angles to be HALF_SYMETRIC - NO_LATERAL_SYMMET //! The luminaire is assumed to exhibit no lateral symmet + inline key_t vAnglesCount() { return (key_t)vAngles.size(); } + inline key_t hAnglesCount() { return (key_t)hAngles.size(); } + inline properties_t::LuminairePlanesSymmetry symmetry() { return properties.symmetry; } + + core::vector hAngles; //! The angular displacement indegreesfrom straight down, a value represents spherical coordinate "theta" with physics convention. Note that if symmetry is OTHER_HALF_SYMMETRIC then real horizontal angle provided by IES data is (hAngles[index] + 90) - the reason behind it is we patch 1995 IES OTHER_HALF_SYMETRIC case to be HALF_SYMETRIC + core::vector vAngles; //! Measurements in degrees of angular displacement measured counterclockwise in a horizontal plane for Type C photometry and clockwise for Type A and B photometry, a value represents spherical coordinate "phi" with physics convention + core::vector data; //! Candela scalar values + properties_t properties; //! Profile properties }; + using sampler_t = nbl::hlsl::ies::CandelaSampler; + CIESProfile() = default; ~CIESProfile() = default; + + inline const accessor_t& getAccessor() const { return accessor; } - auto getType() const { return type; } - auto getVersion() const { return version; } - auto getSymmetry() const { return symmetry; } - - const core::vector& getHoriAngles() const { return hAngles; } - const core::vector& getVertAngles() const { return vAngles; } - const core::vector& getData() const { return data; } - IES_STORAGE_FORMAT getCandelaValue(size_t i, size_t j) const { return data[vAngles.size() * i + j]; } - - IES_STORAGE_FORMAT getMaxCandelaValue() const { return maxCandelaValue; } - IES_STORAGE_FORMAT getTotalEmission() const { return totalEmissionIntegral; } - inline IES_STORAGE_FORMAT getAvgEmmision(const bool fullDomain=false) const + inline hlsl::float32_t getAvgEmmision(const bool fullDomain=false) const { if (fullDomain) { - const float cosLo = std::cos(core::radians(vAngles.front())); - const float cosHi = std::cos(core::radians(vAngles.back())); + const float cosLo = std::cos(core::radians(accessor.vAngles.front())); + const float cosHi = std::cos(core::radians(accessor.vAngles.back())); const float dsinTheta = cosLo - cosHi; - return totalEmissionIntegral*(0.5/core::PI())/dsinTheta; + return accessor.properties.totalEmissionIntegral*(0.5/core::PI())/dsinTheta; } - return avgEmmision; + return accessor.properties.avgEmmision; } - auto getOptimalIESResolution() const { return optimalIESResolution; } - template - core::smart_refctd_ptr createIESTexture(ExecutionPolicy&& policy, const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = CDC_DEFAULT_TEXTURE_HEIGHT) const; - core::smart_refctd_ptr createIESTexture(const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = CDC_DEFAULT_TEXTURE_HEIGHT) const; + core::smart_refctd_ptr createIESTexture(ExecutionPolicy&& policy, const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = properties_t::CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = properties_t::CDC_DEFAULT_TEXTURE_HEIGHT) const; + core::smart_refctd_ptr createIESTexture(const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = properties_t::CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = properties_t::CDC_DEFAULT_TEXTURE_HEIGHT) const; private: - CIESProfile(PhotometricType type, size_t hSize, size_t vSize) - : type(type), version(V_SIZE), hAngles(hSize), vAngles(vSize), data(hSize* vSize) {} - - void setCandelaValue(size_t i, size_t j, IES_STORAGE_FORMAT val) { data[vAngles.size() * i + j] = val; } - - const IES_STORAGE_FORMAT sample(IES_STORAGE_FORMAT vAngle, IES_STORAGE_FORMAT hAngle) const; - - PhotometricType type; - Version version; - LuminairePlanesSymmetry symmetry; - - core::vector hAngles; //! The angular displacement indegreesfrom straight down, a value represents spherical coordinate "theta" with physics convention. Note that if symmetry is OTHER_HALF_SYMMETRIC then real horizontal angle provided by IES data is (hAngles[index] + 90) - the reason behind it is we patch 1995 IES OTHER_HALF_SYMETRIC case to be HALF_SYMETRIC - core::vector vAngles; //! Measurements in degrees of angular displacement measured counterclockwise in a horizontal plane for Type C photometry and clockwise for Type A and B photometry, a value represents spherical coordinate "phi" with physics convention - core::vector data; //! Candela values - - IES_STORAGE_FORMAT maxCandelaValue = {}; //! Max value from this->data vector - IES_STORAGE_FORMAT totalEmissionIntegral = {}; //! Total energy emitted - IES_STORAGE_FORMAT avgEmmision = {}; //! this->totalEmissionIntegral / - - core::vector2du32_SIMD optimalIESResolution; //! optimal resolution for IES profile texture - + CIESProfile(const properties_t& props, const hlsl::uint32_t2& resolution) : accessor(resolution, props) {} + accessor_t accessor; //! IES profile data accessor friend class CIESProfileParser; }; } From 811ced6b75475fc4285447fd1222fd49141adfa3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 16 Nov 2025 13:59:23 +0100 Subject: [PATCH 112/472] update IES .cpp files, downgrade IES internal scalar storage to float32_t & make Nabla compile again --- include/nbl/builtin/hlsl/ies/profile.hlsl | 4 +- include/nbl/builtin/hlsl/ies/sampler.hlsl | 45 +++++---- include/nbl/builtin/hlsl/math/polar.hlsl | 4 +- .../asset/interchange/CIESProfileLoader.cpp | 2 +- src/nbl/asset/utils/CIESProfile.cpp | 27 +++--- src/nbl/asset/utils/CIESProfile.h | 11 ++- src/nbl/asset/utils/CIESProfileParser.cpp | 96 ++++++++++--------- 7 files changed, 101 insertions(+), 88 deletions(-) diff --git a/include/nbl/builtin/hlsl/ies/profile.hlsl b/include/nbl/builtin/hlsl/ies/profile.hlsl index 440fe84a98..c4b12b3211 100644 --- a/include/nbl/builtin/hlsl/ies/profile.hlsl +++ b/include/nbl/builtin/hlsl/ies/profile.hlsl @@ -17,7 +17,7 @@ namespace ies struct ProfileProperties { //! max 16K resolution - NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MAX_TEXTURE_SIZE = 15360u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MAX_TEXTURE_WIDTH = 15360u; NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MAX_TEXTURE_HEIGHT = 8640u; NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_DEFAULT_TEXTURE_WIDTH = 1024u; @@ -54,7 +54,7 @@ struct ProfileProperties Version version; LuminairePlanesSymmetry symmetry; - float32_t maxCandelaValue; //! Max value from this->data vector + float32_t maxCandelaValue; //! Max scalar value from candela data vector float32_t totalEmissionIntegral; //! Total energy emitted float32_t avgEmmision; //! totalEmissionIntegral / }; diff --git a/include/nbl/builtin/hlsl/ies/sampler.hlsl b/include/nbl/builtin/hlsl/ies/sampler.hlsl index b8ee40b84f..e38bc53551 100644 --- a/include/nbl/builtin/hlsl/ies/sampler.hlsl +++ b/include/nbl/builtin/hlsl/ies/sampler.hlsl @@ -6,7 +6,7 @@ #define _NBL_BUILTIN_HLSL_IES_SAMPLER_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/numbers.hlsl" +#include "nbl/builtin/hlsl/math/polar.hlsl" #include "nbl/builtin/hlsl/concepts.hlsl" #include "nbl/builtin/hlsl/ies/profile.hlsl" @@ -21,7 +21,9 @@ namespace concepts #define NBL_CONCEPT_NAME IESAccessor #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) #define NBL_CONCEPT_TPLT_PRM_NAMES (accessor_t) -NBL_CONCEPT_BEGIN(0) +#define NBL_CONCEPT_PARAM_0 (accessor, accessor_t) +NBL_CONCEPT_BEGIN(1) +#define accessor NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define req_key_t uint32_t #define req_key_t2 uint32_t2 #define req_value_t float32_t @@ -33,13 +35,14 @@ NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((req_key_t2(0, 0)), is_same_v, typename accessor_t::key_t2)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((req_value_t(0)), is_same_v, typename accessor_t::value_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().vAnglesCount()), is_same_v, req_key_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().hAnglesCount()), is_same_v, req_key_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().symmetry()), is_same_v, ProfileProperties::LuminairePlanesSymmetry)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().template vAngle((req_key_t)0)), is_same_v, req_value_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().template hAngle((req_key_t)0)), is_same_v, req_value_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((experimental::declval().template value((req_key_t2)0)), is_same_v, req_value_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.vAnglesCount()), is_same_v, req_key_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.hAnglesCount()), is_same_v, req_key_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.symmetry()), is_same_v, ProfileProperties::LuminairePlanesSymmetry)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template vAngle((req_key_t)0)), is_same_v, req_value_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template hAngle((req_key_t)0)), is_same_v, req_value_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template value((req_key_t2)0)), is_same_v, req_value_t)) ); +#undef accessor #undef req_key_t #undef req_key_t2 #undef req_value_t @@ -50,26 +53,26 @@ NBL_BOOL_CONCEPT IsIESAccessor = IESAccessor; } template) - struct CandelaSampler +struct CandelaSampler { using accessor_t = Accessor; using value_t = typename accessor_t::value_t; using symmetry_t = ProfileProperties::LuminairePlanesSymmetry; - static value_t sample(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t2 polar) + static value_t sample(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(math::Polar) polar) { - const float32_t vAngle = degrees(polar.x); - const float32_t hAngle = degrees(wrapPhi(polar.y, symmetry)); + const symmetry_t symmetry = accessor.symmetry(); + const float32_t vAngle = degrees(polar.theta); + const float32_t hAngle = degrees(wrapPhi(polar.phi, symmetry)); const float32_t vABack = accessor.vAngle(accessor.vAnglesCount() - 1u); if (vAngle > vABack) return 0.f; - const symmetry_t symmetry = accessor.symmetry(); const uint32_t j0 = getVLB(accessor, vAngle); const uint32_t j1 = getVUB(accessor, vAngle); - const uint32_t i0 = (symmetry == ISOTROPIC) ? 0u : getHLB(accessor, hAngle); - const uint32_t i1 = (symmetry == ISOTROPIC) ? 0u : getHUB(accessor, hAngle); + const uint32_t i0 = (symmetry == symmetry_t::ISOTROPIC) ? 0u : getHLB(accessor, hAngle); + const uint32_t i1 = (symmetry == symmetry_t::ISOTROPIC) ? 0u : getHUB(accessor, hAngle); const float32_t uReciprocal = ((i1 == i0) ? 1.f : 1.f / (accessor.hAngle(i1) - accessor.hAngle(i0))); const float32_t vReciprocal = ((j1 == j0) ? 1.f : 1.f / (accessor.vAngle(j1) - accessor.vAngle(j0))); @@ -87,20 +90,20 @@ template) { switch (symmetry) { - case ISOTROPIC: //! axial symmetry + case symmetry_t::ISOTROPIC: //! axial symmetry return 0.0f; - case QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range + case symmetry_t::QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range { - NBL_CONSTEXPR float32_t M_HALF_PI = numbers::pi *0.5f; + NBL_CONSTEXPR float32_t M_HALF_PI = numbers::pi * 0.5f; float32_t wrapPhi = abs(phi); //! first MIRROR if (wrapPhi > M_HALF_PI) //! then REPEAT wrapPhi = hlsl::clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0.f, M_HALF_PI); return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 } - case HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range - case OTHER_HALF_SYMMETRIC: //! eg. maps (in degress) 181 -> 179 or 359 -> 1 + case symmetry_t::HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range + case symmetry_t::OTHER_HALF_SYMMETRIC: //! eg. maps (in degress) 181 -> 179 or 359 -> 1 return abs(phi); - case NO_LATERAL_SYMMET: //! plot onto whole (in degress) [0, 360] range + case symmetry_t::NO_LATERAL_SYMMET: //! plot onto whole (in degress) [0, 360] range { NBL_CONSTEXPR float32_t M_TWICE_PI = numbers::pi *2.f; return (phi < 0.f) ? (phi + M_TWICE_PI) : phi; diff --git a/include/nbl/builtin/hlsl/math/polar.hlsl b/include/nbl/builtin/hlsl/math/polar.hlsl index 59454e27e6..01a95f61ef 100644 --- a/include/nbl/builtin/hlsl/math/polar.hlsl +++ b/include/nbl/builtin/hlsl/math/polar.hlsl @@ -44,8 +44,8 @@ struct Polar return ToCartesian(theta, phi); } - scalar_type theta; //! polar angle in range [0, PI] - scalar_type phi; //! azimuthal angle in range [-PI, PI] + scalar_type theta; //! polar angle + scalar_type phi; //! azimuthal angle }; } diff --git a/src/nbl/asset/interchange/CIESProfileLoader.cpp b/src/nbl/asset/interchange/CIESProfileLoader.cpp index 744756b607..a78c9af0a2 100644 --- a/src/nbl/asset/interchange/CIESProfileLoader.cpp +++ b/src/nbl/asset/interchange/CIESProfileLoader.cpp @@ -60,7 +60,7 @@ asset::SAssetBundle CIESProfileLoader::loadAsset(system::IFile* _file, const ass cpuImageView = _override->findDefaultAsset("nbl/builtin/image_view/dummy2d", loadContex, _hierarchyLevel).first; // note: we could also pass empty content, but this would require adjusting IAssetLoader source to not attempt to use all loaders to find the asset else { - const auto optimalResolution = profile.getOptimalIESResolution(); + const auto optimalResolution = profile.getAccessor().properties.optimalIESResolution; cpuImageView = profile.createIESTexture(0.f, false, optimalResolution.x, optimalResolution.y); } diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index 0a7d72b731..dc66c9693a 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -14,11 +14,11 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu const bool inFlattenDomain = flatten >= 0.0 && flatten <= 1.0; // [0, 1] range for blend equation, 1 is normally invalid but we use it to for special implied domain flatten mode assert(inFlattenDomain); - if (width > CDC_MAX_TEXTURE_WIDTH) - width = CDC_MAX_TEXTURE_WIDTH; + if (width > properties_t::CDC_MAX_TEXTURE_WIDTH) + width = properties_t::CDC_MAX_TEXTURE_WIDTH; - if (height > CDC_MAX_TEXTURE_HEIGHT) - height = CDC_MAX_TEXTURE_HEIGHT; + if (height > properties_t::CDC_MAX_TEXTURE_HEIGHT) + height = properties_t::CDC_MAX_TEXTURE_HEIGHT; // TODO: If no symmetry (no folding in half and abuse of mirror sampler) make dimensions odd-sized so middle texel taps the south pole @@ -36,11 +36,11 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu imgInfo.arrayLayers = 1u; imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; imgInfo.flags = static_cast(0u); - imgInfo.format = IES_TEXTURE_STORAGE_FORMAT; + imgInfo.format = properties_t::IES_TEXTURE_STORAGE_FORMAT; auto outImg = asset::ICPUImage::create(std::move(imgInfo)); asset::ICPUImage::SBufferCopy region; - constexpr auto texelBytesz = asset::getTexelOrBlockBytesize(); + constexpr auto texelBytesz = asset::getTexelOrBlockBytesize(); const size_t bufferRowLength = asset::IImageAssetHandlerBase::calcPitchInBlocks(width, texelBytesz); region.bufferRowLength = bufferRowLength; region.imageExtent = imgInfo.extent; @@ -72,8 +72,8 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu const IImageFilter::IState::ColorValue::WriteMemoryInfo wInfo(creationParams.format, outImg->getBuffer()->getPointer()); // Late Optimization TODO: Modify the Max Value for the UNORM texture to be the Max Value after flatten blending - const double maxValue = getMaxCandelaValue(); - const double maxValueRecip = 1.0 / maxValue; + const auto maxValue = accessor.properties.maxCandelaValue; + const auto maxValueRecip = 1.f / maxValue; // There is one huge issue, the IES files love to give us values for degrees 0, 90, 180 an 360 // So standard octahedral mapping won't work, because for above data points you need corner sampled images. @@ -81,8 +81,8 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu const float horiInv = 1.0 / (width-1); const double flattenTarget = getAvgEmmision(fullDomainFlatten); - const double domainLo = core::radians(vAngles.front()); - const double domainHi = core::radians(vAngles.back()); + const double domainLo = core::radians(accessor.vAngles.front()); + const double domainHi = core::radians(accessor.vAngles.back()); auto fill = [&](uint32_t blockArrayOffset, core::vectorSIMDu32 position) -> void { // We don't currently support generating IES images that exploit symmetries or reduced domains, all are full octahederal mappings of a sphere. @@ -93,10 +93,10 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu const auto uv = Octahedral::vector2_type(position.x * vertInv, position.y * horiInv); const auto dir = Octahedral::uvToDir(uv); const auto polar = Polar::createFromCartesian(dir); - const auto intensity = sampler_t::sample(accessor, hlsl::uint32_t(polar.theta, polar.phi)); + const auto intensity = sampler_t::sample(accessor, polar); //! blend the IES texture with "flatten" - double blendV = intensity * (1.0 - flatten); + float blendV = intensity * (1.f - flatten); if (fullDomainFlatten && domainLo<= polar.theta && polar.theta<=domainHi || intensity >0.0) blendV += flattenTarget * flatten; @@ -104,7 +104,8 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu asset::IImageFilter::IState::ColorValue color; //asset::encodePixels(color.asDouble, &blendV); TODO: FIX THIS ENCODE, GIVES ARTIFACTS - const uint16_t encodeV = static_cast(std::clamp(blendV * UI16_MAX_D + 0.5, 0.0, UI16_MAX_D)); + constexpr float UI16_MAX_D = static_cast(std::numeric_limits::max()); + const uint16_t encodeV = static_cast(std::clamp(blendV * UI16_MAX_D + 0.5f, 0.f, UI16_MAX_D)); *color.asUShort = encodeV; color.writeMemory(wInfo, blockArrayOffset); }; diff --git a/src/nbl/asset/utils/CIESProfile.h b/src/nbl/asset/utils/CIESProfile.h index 1584f17006..a165b0ae49 100644 --- a/src/nbl/asset/utils/CIESProfile.h +++ b/src/nbl/asset/utils/CIESProfile.h @@ -18,6 +18,7 @@ namespace nbl public: struct properties_t : public nbl::hlsl::ies::ProfileProperties { + NBL_CONSTEXPR_STATIC_INLINE auto IES_TEXTURE_STORAGE_FORMAT = asset::EF_R16_UNORM; hlsl::uint32_t2 optimalIESResolution; //! Optimal resolution for IES CDC texture }; @@ -27,7 +28,9 @@ namespace nbl using key_t2 = hlsl::uint32_t2; using value_t = hlsl::float32_t; + accessor_t() = default; accessor_t(const key_t2& resolution, const properties_t& props) : hAngles(resolution.x), vAngles(resolution.y), data(resolution.x * resolution.y), properties(props) {} + ~accessor_t() = default; template) inline value_t vAngle(T j) const { return (value_t)vAngles[j]; } @@ -41,9 +44,9 @@ namespace nbl template) inline void setValue(T ij, value_t val) { data[vAnglesCount() * ij.x + ij.y] = val; } - inline key_t vAnglesCount() { return (key_t)vAngles.size(); } - inline key_t hAnglesCount() { return (key_t)hAngles.size(); } - inline properties_t::LuminairePlanesSymmetry symmetry() { return properties.symmetry; } + inline key_t vAnglesCount() const { return (key_t)vAngles.size(); } + inline key_t hAnglesCount() const { return (key_t)hAngles.size(); } + inline properties_t::LuminairePlanesSymmetry symmetry() const { return properties.symmetry; } core::vector hAngles; //! The angular displacement indegreesfrom straight down, a value represents spherical coordinate "theta" with physics convention. Note that if symmetry is OTHER_HALF_SYMMETRIC then real horizontal angle provided by IES data is (hAngles[index] + 90) - the reason behind it is we patch 1995 IES OTHER_HALF_SYMETRIC case to be HALF_SYMETRIC core::vector vAngles; //! Measurements in degrees of angular displacement measured counterclockwise in a horizontal plane for Type C photometry and clockwise for Type A and B photometry, a value represents spherical coordinate "phi" with physics convention @@ -76,7 +79,7 @@ namespace nbl private: CIESProfile(const properties_t& props, const hlsl::uint32_t2& resolution) : accessor(resolution, props) {} - accessor_t accessor; //! IES profile data accessor + accessor_t accessor; friend class CIESProfileParser; }; } diff --git a/src/nbl/asset/utils/CIESProfileParser.cpp b/src/nbl/asset/utils/CIESProfileParser.cpp index dd6b321414..96285b6a6d 100644 --- a/src/nbl/asset/utils/CIESProfileParser.cpp +++ b/src/nbl/asset/utils/CIESProfileParser.cpp @@ -33,16 +33,16 @@ bool CIESProfileParser::parse(CIESProfile& result) std::getline(ss, line); removeTrailingWhiteChars(line); - CIESProfile::Version iesVersion; - + + CIESProfile::properties_t::Version iesVersion; if (line.find(SIG_LM63_1995.data()) != std::string::npos) - iesVersion = CIESProfile::V_1995; + iesVersion = CIESProfile::properties_t::V_1995; else if (line.find(SIG_LM63_2002.data()) != std::string::npos) - iesVersion = CIESProfile::V_2002; + iesVersion = CIESProfile::properties_t::V_2002; else if (line.find(SIG_IESNA91.data()) != std::string::npos) - iesVersion = CIESProfile::V_1995; + iesVersion = CIESProfile::properties_t::V_1995; else if (line.find(SIG_ERCO_LG.data()) != std::string::npos) - iesVersion = CIESProfile::V_1995; + iesVersion = CIESProfile::properties_t::V_1995; else { errorMsg = "Unknown IESNA:LM-63 version, the IES input being parsed is invalid!"; @@ -82,9 +82,8 @@ bool CIESProfileParser::parse(CIESProfile& result) errorMsg = "unrecognized type"; return false; } - CIESProfile::PhotometricType type = - static_cast(type_); - if (type != CIESProfile::PhotometricType::TYPE_C) { + auto type = static_cast(type_); + if (type != CIESProfile::properties_t::TYPE_C) { errorMsg = "Only type C is supported for now"; return false; } @@ -100,32 +99,39 @@ bool CIESProfileParser::parse(CIESProfile& result) if (error) return false; - result = CIESProfile(type, hSize, vSize); - result.version = iesVersion; + { + CIESProfile::properties_t init; + init.type = type; + init.version = iesVersion; + init.maxCandelaValue = 0.f; + init.totalEmissionIntegral = 0.f; + init.avgEmmision = 0.f; + result = CIESProfile(init, hlsl::uint32_t2(hSize, vSize)); + } if (vSize < 2) return false; - auto& vAngles = result.vAngles; + auto& vAngles = result.accessor.vAngles; for (int i = 0; i < vSize; i++) { - vAngles[i] = getDouble("vertical angle truncated"); + vAngles[i] = static_cast(getDouble("vertical angle truncated")); } if (!std::is_sorted(vAngles.begin(), vAngles.end())) { errorMsg = "Vertical angles should be sorted"; return false; } - if (vAngles[0] != 0.0 && vAngles[0] != 90.0) { + if (vAngles[0] != 0.f && vAngles[0] != 90.f) { errorMsg = "First vertical angle must be 0 or 90 in type C"; return false; } - if (vAngles[vSize - 1] != 90.0 && vAngles[vSize - 1] != 180.0) { + if (vAngles[vSize - 1] != 90.f && vAngles[vSize - 1] != 180.f) { errorMsg = "Last vertical angle must be 90 or 180 in type C"; return false; } - auto& hAngles = result.hAngles; + auto& hAngles = result.accessor.hAngles; for (int i = 0; i < hSize; i++) { - hAngles[i] = getDouble("horizontal angle truncated"); + hAngles[i] = static_cast(getDouble("horizontal angle truncated")); if (i != 0 && hAngles[i - 1] > hAngles[i]) return false; // Angles should be sorted } @@ -135,26 +141,26 @@ bool CIESProfileParser::parse(CIESProfile& result) const auto firstHAngle = hAngles.front(); const auto lastHAngle = hAngles.back(); - if (lastHAngle == 0) - result.symmetry = CIESProfile::ISOTROPIC; - else if (lastHAngle == 90) + if (lastHAngle == 0.f) + result.accessor.properties.symmetry = CIESProfile::properties_t::ISOTROPIC; + else if (lastHAngle == 90.f) { - result.symmetry = CIESProfile::QUAD_SYMETRIC; - fluxMultiplier = 4.0; + result.accessor.properties.symmetry = CIESProfile::properties_t::QUAD_SYMETRIC; + fluxMultiplier = 4.f; } - else if (lastHAngle == 180) + else if (lastHAngle == 180.f) { - result.symmetry = CIESProfile::HALF_SYMETRIC; + result.accessor.properties.symmetry = CIESProfile::properties_t::HALF_SYMETRIC; fluxMultiplier = 2.0; } - else if (lastHAngle == 360) - result.symmetry = CIESProfile::NO_LATERAL_SYMMET; + else if (lastHAngle == 360.f) + result.accessor.properties.symmetry = CIESProfile::properties_t::NO_LATERAL_SYMMET; else { - if (firstHAngle == 90 && lastHAngle == 270 && result.version == CIESProfile::V_1995) + if (firstHAngle == 90.f && lastHAngle == 270.f && result.accessor.properties.version == CIESProfile::properties_t::V_1995) { - result.symmetry = CIESProfile::OTHER_HALF_SYMMETRIC; - fluxMultiplier = 2.0; + result.accessor.properties.symmetry = CIESProfile::properties_t::OTHER_HALF_SYMMETRIC; + fluxMultiplier = 2.f; for (auto& angle : hAngles) angle -= firstHAngle; // patch the profile to HALF_SYMETRIC by shifting [90,270] range to [0, 180] @@ -168,44 +174,44 @@ bool CIESProfileParser::parse(CIESProfile& result) const double factor = ballastFactor * candelaMultiplier; for (int i = 0; i < hSize; i++) for (int j = 0; j < vSize; j++) - result.setCandelaValue(i, j, factor * getDouble("intensity value truncated")); + result.accessor.setValue(hlsl::uint32_t2(i, j), static_cast(factor * getDouble("intensity value truncated"))); } float totalEmissionIntegral = 0.0, nonZeroEmissionDomainSize = 0.0; constexpr auto FULL_SOLID_ANGLE = 4.0f * core::PI(); // TODO: this code could have two separate inner for loops for `result.symmetry != CIESProfile::ISOTROPIC` cases - const auto H_ANGLES_I_RANGE = result.symmetry != CIESProfile::ISOTROPIC ? result.hAngles.size() - 1 : 1; - const auto V_ANGLES_I_RANGE = result.vAngles.size() - 1; + const auto H_ANGLES_I_RANGE = result.accessor.properties.symmetry != CIESProfile::properties_t::ISOTROPIC ? result.accessor.hAngles.size() - 1 : 1; + const auto V_ANGLES_I_RANGE = result.accessor.vAngles.size() - 1; float smallestRangeSolidAngle = FULL_SOLID_ANGLE; for (size_t j = 0; j < V_ANGLES_I_RANGE; j++) { - const float thetaRad = core::radians(result.vAngles[j]); + const float thetaRad = core::radians(result.accessor.vAngles[j]); const float cosLo = std::cos(thetaRad); - const float cosHi = std::cos(core::radians(result.vAngles[j+1])); + const float cosHi = std::cos(core::radians(result.accessor.vAngles[j+1])); const float dsinTheta = cosLo - cosHi; float stripIntegral = 0.f; float nonZeroStripDomain = 0.f; for (size_t i = 0; i < H_ANGLES_I_RANGE; i++) { - const float dPhiRad = result.symmetry != CIESProfile::ISOTROPIC ? core::radians(hAngles[i + 1] - hAngles[i]) : (core::PI() * 2.0f); + const float dPhiRad = result.accessor.properties.symmetry != CIESProfile::properties_t::ISOTROPIC ? core::radians(hAngles[i + 1] - hAngles[i]) : (core::PI() * 2.0f); // TODO: in reality one should transform the 4 vertices (or 3) into octahedral map, work out the dUV/dPhi and dUV/dTheta vectors as-if for Anisotropic Filtering // then choose the minor axis length, and use that as a pixel size (since looking for smallest thing, dont have to worry about handling discont) const float solidAngle = dsinTheta * dPhiRad; if (solidAngle0.f) @@ -218,8 +224,8 @@ bool CIESProfileParser::parse(CIESProfile& result) // assuming octahedral map { const uint32_t maxDimMeasureSize = core::sqrt(FULL_SOLID_ANGLE/smallestRangeSolidAngle); - result.optimalIESResolution = decltype(result.optimalIESResolution){ maxDimMeasureSize, maxDimMeasureSize }; - result.optimalIESResolution *= 2u; // safe bias for our bilinear interpolation to work nicely and increase resolution of a profile + result.accessor.properties.optimalIESResolution = decltype(result.accessor.properties.optimalIESResolution){ maxDimMeasureSize, maxDimMeasureSize }; + result.accessor.properties.optimalIESResolution *= 2u; // safe bias for our bilinear interpolation to work nicely and increase resolution of a profile } assert(nonZeroEmissionDomainSize >= 0.f); @@ -227,8 +233,8 @@ bool CIESProfileParser::parse(CIESProfile& result) if (nonZeroEmissionDomainSize <= std::numeric_limits::min()) // protect us from division by small numbers (just in case, we should never hit it) return false; - result.avgEmmision = totalEmissionIntegral / static_cast(nonZeroEmissionDomainSize); - result.totalEmissionIntegral = totalEmissionIntegral * fluxMultiplier; // we use fluxMultiplier to calculate final total emission for case where we have some symmetry between planes (fluxMultiplier is 1.0f if ISOTROPIC or NO_LATERAL_SYMMET because they already have correct total emission integral calculated), also note it doesn't affect average emission at all + result.accessor.properties.avgEmmision = totalEmissionIntegral / static_cast(nonZeroEmissionDomainSize); + result.accessor.properties.totalEmissionIntegral = totalEmissionIntegral * fluxMultiplier; // we use fluxMultiplier to calculate final total emission for case where we have some symmetry between planes (fluxMultiplier is 1.0f if ISOTROPIC or NO_LATERAL_SYMMET because they already have correct total emission integral calculated), also note it doesn't affect average emission at all return !error; } \ No newline at end of file From 977c7dddb9300e830432df96a77d58121063775c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:44:51 +0700 Subject: [PATCH 113/472] Add constexpr to _static_cast --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 57 +++++++++++---------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 87baa1f0d6..f871e2a23d 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -3,34 +3,6 @@ #include -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -template -struct static_cast_helper -{ - static inline To cast(From u) - { -#ifndef __HLSL_VERSION - return static_cast(u); -#else - return To(u); -#endif - } -}; -} - -template -inline To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - -} -} #ifndef __HLSL_VERSION #include @@ -102,4 +74,33 @@ struct add_pointer #endif +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE To cast(From u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } +}; +} + +template +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +{ + return impl::static_cast_helper::cast(v); +} + +} +} + #endif From 3294d0451c367aaa5963eebf3ce3ec7f850f852c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:55:22 +0700 Subject: [PATCH 114/472] Change NBL_CONSTEXPR_STATIC_FUNC to NBL_CONSTEXPR_STATIC --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 6 +++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 18 +++++++++--------- include/nbl/builtin/hlsl/morton.hlsl | 18 +++++++++--------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 4566e2097b..242e30dfbe 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -673,7 +673,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) struct select_helper) > { - NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { return condition ? object1 : object2; } @@ -683,7 +683,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > { - NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { using traits = hlsl::vector_traits; array_get conditionGetter; @@ -701,7 +701,7 @@ struct select_helper&& concepts::V template struct undef_helper { - NBL_CONSTEXPR_STATIC_FUNC T __call() + NBL_CONSTEXPR_STATIC T __call() { T t; return t; diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 3780ce001b..47eb573359 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -192,7 +192,7 @@ NBL_CONSTEXPR_FUNC vector operator##OP (vector;\ using component_t = ComponentType;\ -NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other)\ +NBL_CONSTEXPR_STATIC this_t create(this_t other)\ {\ CRTP output;\ [[unroll]]\ @@ -209,7 +209,7 @@ NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \ } #define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ -NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other)\ +NBL_CONSTEXPR_STATIC this_t create(vector other)\ {\ this_t output;\ [[unroll]]\ @@ -356,7 +356,7 @@ struct emulated_vector, CRTP> : using component_t = emulated_float64_t; using this_t = emulated_vector; - NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other) + NBL_CONSTEXPR_STATIC this_t create(this_t other) { this_t output; @@ -367,7 +367,7 @@ struct emulated_vector, CRTP> : } template - NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other) + NBL_CONSTEXPR_STATIC this_t create(vector other) { this_t output; @@ -505,7 +505,7 @@ namespace impl template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) + NBL_CONSTEXPR_STATIC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -518,7 +518,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) + NBL_CONSTEXPR_STATIC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -532,7 +532,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) + NBL_CONSTEXPR_STATIC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -550,7 +550,7 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) + NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) { array_get getter; array_set setter; @@ -569,7 +569,7 @@ struct static_cast_helper, emulated_vecto {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##N ;\ - NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ + NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d570e249c8..35ce511359 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -115,7 +115,7 @@ struct Transcoder * * @param [in] decodedValue Cartesian coordinates to interleave and shift */ - NBL_CONSTEXPR_STATIC_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { left_shift_operator > leftShift; portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; @@ -144,7 +144,7 @@ struct Transcoder * * @param [in] decodedValue Cartesian coordinates to encode */ - NBL_CONSTEXPR_STATIC_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { const portable_vector_t interleaveShifted = interleaveShift(decodedValue); @@ -165,7 +165,7 @@ struct Transcoder * * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below) */ - NBL_CONSTEXPR_STATIC_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + NBL_CONSTEXPR_STATIC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { arithmetic_right_shift_operator encodedRightShift; portable_vector_t decoded; @@ -213,7 +213,7 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); @@ -229,7 +229,7 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); @@ -248,7 +248,7 @@ template { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); const portable_vector_t SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits); @@ -279,7 +279,7 @@ template { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); @@ -327,7 +327,7 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> + NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; @@ -525,7 +525,7 @@ namespace impl template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > { - NBL_CONSTEXPR_STATIC_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) + NBL_CONSTEXPR_STATIC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; return morton::impl::Transcoder::decode(val.value); From e2401c6ff03dd39c58751a4e10d0a5d65065c23d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:55:51 +0700 Subject: [PATCH 115/472] Add template<> to signify specialization --- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 8a3fd42faf..7f52638c61 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -370,12 +370,14 @@ constexpr inline emulated_int64_base emulated_int64_base::operat return leftShift(*this, bits); } +template<> constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); } +template<> constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; From 07f7a4acf59637f0641ed5ac485a55c28befd07b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:56:13 +0700 Subject: [PATCH 116/472] Remove duplicate partial specialization. --- include/nbl/builtin/hlsl/type_traits.hlsl | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index fb05e11fe2..bf2a35ede9 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -855,9 +855,6 @@ struct float_of_size<8> template using float_of_size_t = typename float_of_size::type; -template -struct extent, 0> : integral_constant {}; - template struct extent, 0> : integral_constant {}; From 42baa6c7eb7d78234d78ae12aba5d82ccdc32447 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:56:54 +0700 Subject: [PATCH 117/472] Change NBL_CONSTEXPR_STATIC_FUNC to NBL_CONSTEXPR_STATIC --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 48 +++++++++++++------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 7f52638c61..89c9e2e733 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -43,7 +43,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -56,7 +56,7 @@ struct emulated_int64_base * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) + NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { return create(storage_t(lo, hi)); } @@ -114,6 +114,24 @@ struct emulated_int64_base constexpr inline this_t operator<<(uint32_t bits) const; constexpr inline this_t operator>>(uint32_t bits) const; + constexpr inline this_t& operator&=(const this_t& val) + { + data &= val.data; + return *this; + } + + constexpr inline this_t& operator|=(const this_t& val) + { + data |= val.data; + return *this; + } + + constexpr inline this_t& operator^=(const this_t& val) + { + data ^= val.data; + return *this; + } + #endif // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- @@ -191,7 +209,7 @@ struct static_cast_helper, emulated_int64_base; using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) other) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) { To retVal; retVal.data = other.data; @@ -206,7 +224,7 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { return _static_cast(val.data.x); } @@ -218,7 +236,7 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con using To = I; using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { return bit_cast(val.data); } @@ -231,7 +249,7 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { return To::create(_static_cast(i), uint32_t(0)); } @@ -243,7 +261,7 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con using To = emulated_int64_base; using From = I; - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { // `bit_cast` blocked by GLM vectors using a union #ifndef __HLSL_VERSION @@ -417,13 +435,13 @@ struct minus > }; template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE emulated_int64_t minus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -457,13 +475,13 @@ struct minus_assign > }; template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; // ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- From 22e78eb4bbd9ae1675ec3812fe6773dc34ecc508 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:57:53 +0700 Subject: [PATCH 118/472] Fix concatenation of 'operator' and OP with '##' since operatorOP is not a single token --- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 47eb573359..cdeddeb105 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -133,17 +133,17 @@ struct emulated_vector; // Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral #define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator##OP() NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP() NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i).operator##OP());\ + output.setComponent(i, this_t::getComponent(i).operator OP());\ return output;\ } #define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP (component_t val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ @@ -151,7 +151,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ output.setComponent(i, this_t::getComponent(i) OP val);\ return output;\ }\ -NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ @@ -161,7 +161,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ } #define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator##OP(vector other) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP(vector other) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ @@ -170,7 +170,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP(vector othe return output;\ } -#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ {\ vector output;\ [[unroll]]\ @@ -180,7 +180,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP(vector othe } #define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC vector operator##OP (vector other) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC vector operator OP (vector other) NBL_CONST_MEMBER_FUNC \ {\ vector output;\ [[unroll]]\ From 8daf855e0a88f6d5b81b6b94b1b3426a4da211ea Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 16:31:27 +0700 Subject: [PATCH 119/472] 'equals' to 'equal' --- include/nbl/builtin/hlsl/morton.hlsl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 35ce511359..696124ae0c 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -207,10 +207,10 @@ template && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v, uint64_t(16)>)); template -struct Equals; +struct Equal; template -struct Equals +struct Equal { template) NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) @@ -226,14 +226,14 @@ struct Equals }; template -struct Equals +struct Equal { template) NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); - return Equals::template __call(value, interleaved); + return Equal::template __call(value, interleaved); } }; @@ -291,13 +291,13 @@ template > > {}; template -struct LessEquals : BaseComparison > > {}; +struct LessEqual : BaseComparison > > {}; template struct GreaterThan : BaseComparison > > {}; template -struct GreaterEquals : BaseComparison > > {}; +struct GreaterEqual : BaseComparison > > {}; } //namespace impl @@ -470,7 +470,7 @@ struct code NBL_FUNC_REQUIRES(impl::Comparable) NBL_CONSTEXPR_FUNC vector equal(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::Equals::template __call(value, rhs); + return impl::Equal::template __call(value, rhs); } NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -494,9 +494,9 @@ struct code template) - NBL_CONSTEXPR_FUNC vector lessThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThanEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessEquals::template __call(value, rhs); + return impl::LessEqual::template __call(value, rhs); } template) - NBL_CONSTEXPR_FUNC vector greaterThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThanEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterEquals::template __call(value, rhs); + return impl::GreaterEqual::template __call(value, rhs); } }; From 831244f2f407763511f47d9332560e5d2bf9d308 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 16:31:41 +0700 Subject: [PATCH 120/472] Pass vec by value not ref --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 0afe214de7..27461d5949 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC T operator()(const U v) { return T(v); } From a560180f8ef3962921e3060423250fbb37ecb0ea Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 16:41:08 +0700 Subject: [PATCH 121/472] Use truncate to truncate --- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 696124ae0c..4512774b14 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -118,7 +118,7 @@ struct Transcoder NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { left_shift_operator > leftShift; - portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; + portable_vector_t interleaved = truncate >(decodedValue) & coding_mask_v; #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ {\ From 1183da33755a6036930910ab7678ddc2afcd718a Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 21 Nov 2025 12:28:48 +0100 Subject: [PATCH 122/472] Removed core::matrixSIMD --- include/ICameraSceneNode.h | 13 +- include/matrix3x4SIMD.h | 263 ---------- include/matrix3x4SIMD_impl.h | 470 ------------------ include/matrix4SIMD.h | 385 -------------- include/matrix4SIMD_impl.h | 299 ----------- include/nabla.h | 1 - include/nbl/asset/IAccelerationStructure.h | 8 +- include/nbl/asset/IAnimationLibrary.h | 6 +- include/nbl/asset/ICPUSkeleton.h | 8 +- include/nbl/asset/ISkeleton.h | 2 +- .../nbl/asset/utils/CQuantQuaternionCache.h | 2 +- .../nbl/builtin/hlsl/cpp_compat/unroll.hlsl | 12 + .../hlsl/math/quaternion/quaternion.hlsl | 101 ++++ .../hlsl/math/quaternion/quaternion_impl.hlsl | 25 + .../transformation_matrix_utils.hlsl | 203 ++++++++ include/nbl/core/declarations.h | 1 - include/nbl/core/definitions.h | 4 - include/nbl/core/math/floatutil.tcc | 13 +- include/nbl/core/math/glslFunctions.tcc | 32 -- include/nbl/core/math/matrixutil.h | 29 -- include/nbl/core/math/plane3dSIMD.h | 18 +- include/nbl/ext/Bullet/BulletUtility.h | 6 +- include/nbl/ext/Bullet/CPhysicsWorld.h | 2 +- include/nbl/ext/DebugDraw/CDraw3DLine.h | 12 +- .../EnvmapImportanceSampling.h | 4 +- include/nbl/ext/MitsubaLoader/CElementShape.h | 2 +- .../nbl/ext/MitsubaLoader/CElementTransform.h | 2 +- .../nbl/ext/MitsubaLoader/CMitsubaLoader.h | 8 +- .../CMitsubaMaterialCompilerFrontend.h | 2 +- .../nbl/ext/MitsubaLoader/PropertyElement.h | 15 +- include/nbl/ext/MitsubaLoader/SContext.h | 4 +- include/nbl/scene/ISkinInstanceCache.h | 4 +- include/nbl/scene/ISkinInstanceCacheManager.h | 2 +- include/nbl/video/IGPUAccelerationStructure.h | 6 +- src/nbl/builtin/CMakeLists.txt | 5 + 35 files changed, 414 insertions(+), 1555 deletions(-) delete mode 100644 include/matrix3x4SIMD.h delete mode 100644 include/matrix3x4SIMD_impl.h delete mode 100644 include/matrix4SIMD.h delete mode 100644 include/matrix4SIMD_impl.h create mode 100644 include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl create mode 100644 include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl create mode 100644 include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl create mode 100644 include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl delete mode 100644 include/nbl/core/math/matrixutil.h diff --git a/include/ICameraSceneNode.h b/include/ICameraSceneNode.h index e3975e3802..577b6d0fb6 100644 --- a/include/ICameraSceneNode.h +++ b/include/ICameraSceneNode.h @@ -6,6 +6,9 @@ #ifndef __NBL_I_CAMERA_SCENE_NODE_H_INCLUDED__ #define __NBL_I_CAMERA_SCENE_NODE_H_INCLUDED__ +#include +#include + #include "ISceneNode.h" #include "matrixutil.h" @@ -46,17 +49,17 @@ class ICameraSceneNode : public ISceneNode The function will figure it out if you've set an orthogonal matrix. \param projection The new projection matrix of the camera. */ - virtual void setProjectionMatrix(const core::matrix4SIMD& projection) =0; + virtual void setProjectionMatrix(const hlsl::float32_t4x4& projection) =0; //! Gets the current projection matrix of the camera. /** \return The current projection matrix of the camera. */ - inline const core::matrix4SIMD& getProjectionMatrix() const { return projMatrix; } + inline const hlsl::float32_t4x4& getProjectionMatrix() const { return projMatrix; } //! Gets the current view matrix of the camera. /** \return The current view matrix of the camera. */ - virtual const core::matrix3x4SIMD& getViewMatrix() const =0; + virtual const hlsl::float32_t3x4& getViewMatrix() const =0; - virtual const core::matrix4SIMD& getConcatenatedMatrix() const =0; + virtual const hlsl::float32_t4x4& getConcatenatedMatrix() const =0; #if 0 //! It is possible to send mouse and key events to the camera. /** Most cameras may ignore this input, but camera scene nodes @@ -198,7 +201,7 @@ class ICameraSceneNode : public ISceneNode float ZFar; // Z-value of the far view-plane. // actual projection matrix used - core::matrix4SIMD projMatrix; + hlsl::float32_t4x4 projMatrix; bool leftHanded; }; diff --git a/include/matrix3x4SIMD.h b/include/matrix3x4SIMD.h deleted file mode 100644 index d52f305cec..0000000000 --- a/include/matrix3x4SIMD.h +++ /dev/null @@ -1,263 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX3X4SIMD_H_INCLUDED__ -#define __NBL_MATRIX3X4SIMD_H_INCLUDED__ - -#include "vectorSIMD.h" -#include "quaternion.h" - -namespace nbl::core -{ - -class matrix4x3; - -#define _NBL_MATRIX_ALIGNMENT _NBL_SIMD_ALIGNMENT -static_assert(_NBL_MATRIX_ALIGNMENT>=_NBL_VECTOR_ALIGNMENT,"Matrix must be equally or more aligned than vector!"); - -//! Equivalent of GLSL's mat4x3 -class matrix3x4SIMD// : private AllocationOverrideBase<_NBL_MATRIX_ALIGNMENT> EBO inheritance problem w.r.t `rows[3]` -{ - public: - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VectorCount = 3u; - vectorSIMDf rows[VectorCount]; - - explicit matrix3x4SIMD( const vectorSIMDf& _r0 = vectorSIMDf(1.f, 0.f, 0.f, 0.f), - const vectorSIMDf& _r1 = vectorSIMDf(0.f, 1.f, 0.f, 0.f), - const vectorSIMDf& _r2 = vectorSIMDf(0.f, 0.f, 1.f, 0.f)) : rows{_r0, _r1, _r2} - { - } - - matrix3x4SIMD( float _a00, float _a01, float _a02, float _a03, - float _a10, float _a11, float _a12, float _a13, - float _a20, float _a21, float _a22, float _a23) - : matrix3x4SIMD(vectorSIMDf(_a00, _a01, _a02, _a03), - vectorSIMDf(_a10, _a11, _a12, _a13), - vectorSIMDf(_a20, _a21, _a22, _a23)) - { - } - - explicit matrix3x4SIMD(const float* const _data) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4*i); - } - matrix3x4SIMD(const float* const _data, bool ALIGNED) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4*i, ALIGNED); - } - - float* pointer() { return rows[0].pointer; } - const float* pointer() const { return rows[0].pointer; } - - inline matrix3x4SIMD& set(const matrix4x3& _retarded); - inline matrix4x3 getAsRetardedIrrlichtMatrix() const; - - static inline matrix3x4SIMD concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b); - - static inline matrix3x4SIMD concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b); - - inline matrix3x4SIMD& concatenateAfter(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByA(*this, _other); - } - - inline matrix3x4SIMD& concatenateBefore(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByA(_other, *this); - } - - inline matrix3x4SIMD& concatenateAfterPrecisely(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByAPrecisely(*this, _other); - } - - inline matrix3x4SIMD& concatenateBeforePrecisely(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByAPrecisely(_other, *this); - } - - inline bool operator==(const matrix3x4SIMD& _other) - { - return !(*this != _other); - } - - inline bool operator!=(const matrix3x4SIMD& _other); - - - inline matrix3x4SIMD operator-() const - { - matrix3x4SIMD retval; - retval.rows[0] = -rows[0]; - retval.rows[1] = -rows[1]; - retval.rows[2] = -rows[2]; - return retval; - } - - - inline matrix3x4SIMD& operator+=(const matrix3x4SIMD& _other); - inline matrix3x4SIMD operator+(const matrix3x4SIMD& _other) const - { - matrix3x4SIMD retval(*this); - return retval += _other; - } - - inline matrix3x4SIMD& operator-=(const matrix3x4SIMD& _other); - inline matrix3x4SIMD operator-(const matrix3x4SIMD& _other) const - { - matrix3x4SIMD retval(*this); - return retval -= _other; - } - - inline matrix3x4SIMD& operator*=(float _scalar); - inline matrix3x4SIMD operator*(float _scalar) const - { - matrix3x4SIMD retval(*this); - return retval *= _scalar; - } - - inline matrix3x4SIMD& setTranslation(const vectorSIMDf& _translation) - { - // no faster way of doing it? - rows[0].w = _translation.x; - rows[1].w = _translation.y; - rows[2].w = _translation.z; - return *this; - } - inline vectorSIMDf getTranslation() const; - inline vectorSIMDf getTranslation3D() const; - - inline matrix3x4SIMD& setScale(const vectorSIMDf& _scale); - - inline vectorSIMDf getScale() const; - - inline void transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void transformVect(vectorSIMDf& _in_out) const - { - transformVect(_in_out, _in_out); - } - - inline void pseudoMulWith4x1(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void pseudoMulWith4x1(vectorSIMDf& _in_out) const - { - pseudoMulWith4x1(_in_out,_in_out); - } - - inline void mulSub3x3WithNx1(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void mulSub3x3WithNx1(vectorSIMDf& _in_out) const - { - mulSub3x3WithNx1(_in_out, _in_out); - } - - inline static matrix3x4SIMD buildCameraLookAtMatrixLH( - const vectorSIMDf& position, - const vectorSIMDf& target, - const vectorSIMDf& upVector); - inline static matrix3x4SIMD buildCameraLookAtMatrixRH( - const vectorSIMDf& position, - const vectorSIMDf& target, - const vectorSIMDf& upVector); - - inline matrix3x4SIMD& setRotation(const quaternion& _quat); - - inline matrix3x4SIMD& setScaleRotationAndTranslation( const vectorSIMDf& _scale, - const quaternion& _quat, - const vectorSIMDf& _translation); - - inline vectorSIMDf getPseudoDeterminant() const - { - vectorSIMDf tmp; - return determinant_helper(tmp); - } - - inline bool getInverse(matrix3x4SIMD& _out) const; - bool makeInverse() - { - matrix3x4SIMD tmp; - - if (getInverse(tmp)) - { - *this = tmp; - return true; - } - return false; - } - - // - inline bool getSub3x3InverseTranspose(matrix3x4SIMD& _out) const; - - // - inline bool getSub3x3InverseTransposePacked(float outRows[9]) const - { - matrix3x4SIMD tmp; - if (!getSub3x3InverseTranspose(tmp)) - return false; - - float* _out = outRows; - for (auto i=0; i<3; i++) - { - const auto& row = tmp.rows[i]; - for (auto j=0; j<3; j++) - *(_out++) = row[j]; - } - - return true; - } - - // - inline core::matrix3x4SIMD getSub3x3TransposeCofactors() const; - - // - inline void setTransformationCenter(const vectorSIMDf& _center, const vectorSIMDf& _translation); - - // - static inline matrix3x4SIMD buildAxisAlignedBillboard( - const vectorSIMDf& camPos, - const vectorSIMDf& center, - const vectorSIMDf& translation, - const vectorSIMDf& axis, - const vectorSIMDf& from); - - - // - float& operator()(size_t _i, size_t _j) { return rows[_i].pointer[_j]; } - const float& operator()(size_t _i, size_t _j) const { return rows[_i].pointer[_j]; } - - // - inline const vectorSIMDf& operator[](size_t _rown) const { return rows[_rown]; } - inline vectorSIMDf& operator[](size_t _rown) { return rows[_rown]; } - - private: - static inline vectorSIMDf doJob(const __m128& a, const matrix3x4SIMD& _mtx); - - // really need that dvec<2> or wider - inline __m128d halfRowAsDouble(size_t _n, bool _0) const; - static inline __m128d doJob_d(const __m128d& _a0, const __m128d& _a1, const matrix3x4SIMD& _mtx, bool _xyHalf); - - vectorSIMDf determinant_helper(vectorSIMDf& r1crossr2) const - { - r1crossr2 = core::cross(rows[1], rows[2]); - return core::dot(rows[0], r1crossr2); - } -}; - -inline matrix3x4SIMD concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - return matrix3x4SIMD::concatenateBFollowedByA(_a, _b); -} -/* -inline matrix3x4SIMD concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - return matrix3x4SIMD::concatenateBFollowedByAPrecisely(_a, _b); -} -*/ - -} - -#endif diff --git a/include/matrix3x4SIMD_impl.h b/include/matrix3x4SIMD_impl.h deleted file mode 100644 index 0e9022efd0..0000000000 --- a/include/matrix3x4SIMD_impl.h +++ /dev/null @@ -1,470 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_MATRIX3X4SIMD_IMPL_H_INCLUDED_ -#define _NBL_MATRIX3X4SIMD_IMPL_H_INCLUDED_ - -#include "matrix3x4SIMD.h" -#include "nbl/core/math/glslFunctions.tcc" - -namespace nbl::core -{ - -// TODO: move to another implementation header -inline quaternion::quaternion(const matrix3x4SIMD& m) -{ - const vectorSIMDf one(1.f); - auto Qx = m.rows[0].xxxx()^vectorSIMDu32(0,0,0x80000000u,0x80000000u); - auto Qy = m.rows[1].yyyy()^vectorSIMDu32(0,0x80000000u,0,0x80000000u); - auto Qz = m.rows[2].zzzz()^vectorSIMDu32(0,0x80000000u,0x80000000u,0); - - auto tmp = one+Qx+Qy+Qz; - auto invscales = inversesqrt(tmp)*0.5f; - auto scales = tmp*invscales*0.5f; - - // TODO: speed this up - if (tmp.x > 0.0f) - { - X = (m(2, 1) - m(1, 2)) * invscales.x; - Y = (m(0, 2) - m(2, 0)) * invscales.x; - Z = (m(1, 0) - m(0, 1)) * invscales.x; - W = scales.x; - } - else - { - if (tmp.y>0.f) - { - X = scales.y; - Y = (m(0, 1) + m(1, 0)) * invscales.y; - Z = (m(2, 0) + m(0, 2)) * invscales.y; - W = (m(2, 1) - m(1, 2)) * invscales.y; - } - else if (tmp.z>0.f) - { - X = (m(0, 1) + m(1, 0)) * invscales.z; - Y = scales.z; - Z = (m(1, 2) + m(2, 1)) * invscales.z; - W = (m(0, 2) - m(2, 0)) * invscales.z; - } - else - { - X = (m(0, 2) + m(2, 0)) * invscales.w; - Y = (m(1, 2) + m(2, 1)) * invscales.w; - Z = scales.w; - W = (m(1, 0) - m(0, 1)) * invscales.w; - } - } - - *this = normalize(*this); -} - -inline bool matrix3x4SIMD::operator!=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - if ((rows[i] != _other.rows[i]).any()) - return true; - return false; -} - -inline matrix3x4SIMD& matrix3x4SIMD::operator+=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] += _other.rows[i]; - return *this; -} -inline matrix3x4SIMD& matrix3x4SIMD::operator-=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] -= _other.rows[i]; - return *this; -} -inline matrix3x4SIMD& matrix3x4SIMD::operator*=(float _scalar) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] *= _scalar; - return *this; -} - -#ifdef __NBL_COMPILE_WITH_SSE3 -#define BROADCAST32(fpx) _MM_SHUFFLE(fpx, fpx, fpx, fpx) -#define BUILD_XORMASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_ ? 0x80000000u:0x0u, _y_ ? 0x80000000u:0x0u, _z_ ? 0x80000000u:0x0u, _w_ ? 0x80000000u:0x0u) -#define BUILD_MASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_*0xffffffff, _y_*0xffffffff, _z_*0xffffffff, _w_*0xffffffff) - -inline matrix3x4SIMD matrix3x4SIMD::concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ -#ifdef _NBL_DEBUG - assert(is_aligned_to(&_a, _NBL_SIMD_ALIGNMENT)); - assert(is_aligned_to(&_b, _NBL_SIMD_ALIGNMENT)); -#endif // _NBL_DEBUG - __m128 r0 = _a.rows[0].getAsRegister(); - __m128 r1 = _a.rows[1].getAsRegister(); - __m128 r2 = _a.rows[2].getAsRegister(); - - matrix3x4SIMD out; - out.rows[0] = matrix3x4SIMD::doJob(r0, _b); - out.rows[1] = matrix3x4SIMD::doJob(r1, _b); - out.rows[2] = matrix3x4SIMD::doJob(r2, _b); - - return out; -} - -inline matrix3x4SIMD matrix3x4SIMD::concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - __m128d r00 = _a.halfRowAsDouble(0u, true); - __m128d r01 = _a.halfRowAsDouble(0u, false); - __m128d r10 = _a.halfRowAsDouble(1u, true); - __m128d r11 = _a.halfRowAsDouble(1u, false); - __m128d r20 = _a.halfRowAsDouble(2u, true); - __m128d r21 = _a.halfRowAsDouble(2u, false); - - matrix3x4SIMD out; - - const __m128i mask0011 = BUILD_MASKF(0, 0, 1, 1); - - __m128 second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r00, r01, _b, false)); - out.rows[0] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r00, r01, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r10, r11, _b, false)); - out.rows[1] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r10, r11, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r20, r21, _b, false)); - out.rows[2] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r20, r21, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - return out; -} - -inline vectorSIMDf matrix3x4SIMD::getTranslation() const -{ - __m128 xmm0 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 xmm1 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setr_ps(0.f, 0.f, 0.f, 1.f)); // (2z,3z,2w,3w) - __m128 xmm2 = _mm_movehl_ps(xmm1, xmm0);// (0w,1w,2w,3w) - - return xmm2; -} -inline vectorSIMDf matrix3x4SIMD::getTranslation3D() const -{ - __m128 xmm0 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 xmm1 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setzero_ps()); // (2z,0,2w,0) - __m128 xmm2 = _mm_movehl_ps(xmm1, xmm0);// (0w,1w,2w,0) - - return xmm2; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setScale(const core::vectorSIMDf& _scale) -{ - const vectorSIMDu32 mask0001 = vectorSIMDu32(BUILD_MASKF(0, 0, 0, 1)); - const vectorSIMDu32 mask0010 = vectorSIMDu32(BUILD_MASKF(0, 0, 1, 0)); - const vectorSIMDu32 mask0100 = vectorSIMDu32(BUILD_MASKF(0, 1, 0, 0)); - const vectorSIMDu32 mask1000 = vectorSIMDu32(BUILD_MASKF(1, 0, 0, 0)); - - const vectorSIMDu32& scaleAlias = reinterpret_cast(_scale); - - vectorSIMDu32& rowAlias0 = reinterpret_cast(rows[0]); - vectorSIMDu32& rowAlias1 = reinterpret_cast(rows[1]); - vectorSIMDu32& rowAlias2 = reinterpret_cast(rows[2]); - rowAlias0 = (scaleAlias & reinterpret_cast(mask1000)) | (rowAlias0 & reinterpret_cast(mask0001)); - rowAlias1 = (scaleAlias & reinterpret_cast(mask0100)) | (rowAlias1 & reinterpret_cast(mask0001)); - rowAlias2 = (scaleAlias & reinterpret_cast(mask0010)) | (rowAlias2 & reinterpret_cast(mask0001)); - - return *this; -} - -inline core::vectorSIMDf matrix3x4SIMD::getScale() const -{ - // xmm4-7 will now become columuns of B - __m128 xmm4 = rows[0].getAsRegister(); - __m128 xmm5 = rows[1].getAsRegister(); - __m128 xmm6 = rows[2].getAsRegister(); - __m128 xmm7 = _mm_setzero_ps(); - // g==0 - __m128 xmm0 = _mm_unpacklo_ps(xmm4, xmm5); - __m128 xmm1 = _mm_unpacklo_ps(xmm6, xmm7); // (2x,g,2y,g) - __m128 xmm2 = _mm_unpackhi_ps(xmm4, xmm5); - __m128 xmm3 = _mm_unpackhi_ps(xmm6, xmm7); // (2z,g,2w,g) - xmm4 = _mm_movelh_ps(xmm1, xmm0); //(0x,1x,2x,g) - xmm5 = _mm_movehl_ps(xmm1, xmm0); - xmm6 = _mm_movelh_ps(xmm3, xmm2); //(0z,1z,2z,g) - - // See http://www.robertblum.com/articles/2005/02/14/decomposing-matrices - // We have to do the full calculation. - xmm0 = _mm_mul_ps(xmm4, xmm4);// column 0 squared - xmm1 = _mm_mul_ps(xmm5, xmm5);// column 1 squared - xmm2 = _mm_mul_ps(xmm6, xmm6);// column 2 squared - xmm4 = _mm_hadd_ps(xmm0, xmm1); - xmm5 = _mm_hadd_ps(xmm2, xmm7); - xmm6 = _mm_hadd_ps(xmm4, xmm5); - - return _mm_sqrt_ps(xmm6); -} - -inline void matrix3x4SIMD::transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - vectorSIMDf r0 = rows[0] * _in, - r1 = rows[1] * _in, - r2 = rows[2] * _in; - - _out = - _mm_hadd_ps( - _mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), - _mm_hadd_ps(r2.getAsRegister(), _mm_set1_ps(0.25f)) - ); -} - -inline void matrix3x4SIMD::pseudoMulWith4x1(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - _out = (_in & mask1110) | _mm_castps_si128(vectorSIMDf(0.f, 0.f, 0.f, 1.f).getAsRegister()); - transformVect(_out); -} - -inline void matrix3x4SIMD::mulSub3x3WithNx1(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - auto maskedIn = _in & BUILD_MASKF(1, 1, 1, 0); - vectorSIMDf r0 = rows[0] * maskedIn, - r1 = rows[1] * maskedIn, - r2 = rows[2] * maskedIn; - - _out = - _mm_hadd_ps( - _mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), - _mm_hadd_ps(r2.getAsRegister(), _mm_setzero_ps()) - ); -} - - -inline matrix3x4SIMD matrix3x4SIMD::buildCameraLookAtMatrixLH( - const core::vectorSIMDf& position, - const core::vectorSIMDf& target, - const core::vectorSIMDf& upVector) -{ - const core::vectorSIMDf zaxis = core::normalize(target - position); - const core::vectorSIMDf xaxis = core::normalize(core::cross(upVector, zaxis)); - const core::vectorSIMDf yaxis = core::cross(zaxis, xaxis); - - matrix3x4SIMD r; - r.rows[0] = xaxis; - r.rows[1] = yaxis; - r.rows[2] = zaxis; - r.rows[0].w = -dot(xaxis, position)[0]; - r.rows[1].w = -dot(yaxis, position)[0]; - r.rows[2].w = -dot(zaxis, position)[0]; - - return r; -} -inline matrix3x4SIMD matrix3x4SIMD::buildCameraLookAtMatrixRH( - const core::vectorSIMDf& position, - const core::vectorSIMDf& target, - const core::vectorSIMDf& upVector) -{ - const core::vectorSIMDf zaxis = core::normalize(position - target); - const core::vectorSIMDf xaxis = core::normalize(core::cross(upVector, zaxis)); - const core::vectorSIMDf yaxis = core::cross(zaxis, xaxis); - - matrix3x4SIMD r; - r.rows[0] = xaxis; - r.rows[1] = yaxis; - r.rows[2] = zaxis; - r.rows[0].w = -dot(xaxis, position)[0]; - r.rows[1].w = -dot(yaxis, position)[0]; - r.rows[2].w = -dot(zaxis, position)[0]; - - return r; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setRotation(const core::quaternion& _quat) -{ - const vectorSIMDu32 mask0001 = vectorSIMDu32(BUILD_MASKF(0, 0, 0, 1)); - const __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - - const core::vectorSIMDf& quat = reinterpret_cast(_quat); - rows[0] = ((quat.yyyy() * ((quat.yxwx() & mask1110) * vectorSIMDf(2.f))) + (quat.zzzz() * (quat.zwxx() & mask1110) * vectorSIMDf(2.f, -2.f, 2.f, 0.f))) | (reinterpret_cast(rows[0]) & (mask0001)); - rows[0].x = 1.f - rows[0].x; - - rows[1] = ((quat.zzzz() * ((quat.wzyx() & mask1110) * vectorSIMDf(2.f))) + (quat.xxxx() * (quat.yxwx() & mask1110) * vectorSIMDf(2.f, 2.f, -2.f, 0.f))) | (reinterpret_cast(rows[1]) & (mask0001)); - rows[1].y = 1.f - rows[1].y; - - rows[2] = ((quat.xxxx() * ((quat.zwxx() & mask1110) * vectorSIMDf(2.f))) + (quat.yyyy() * (quat.wzyx() & mask1110) * vectorSIMDf(-2.f, 2.f, 2.f, 0.f))) | (reinterpret_cast(rows[2]) & (mask0001)); - rows[2].z = 1.f - rows[2].z; - - return *this; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setScaleRotationAndTranslation(const vectorSIMDf& _scale, const core::quaternion& _quat, const vectorSIMDf& _translation) -{ - const __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - - const vectorSIMDf& quat = reinterpret_cast(_quat); - const vectorSIMDf dblScale = (_scale * 2.f) & mask1110; - - vectorSIMDf mlt = dblScale ^ BUILD_XORMASKF(0, 1, 0, 0); - rows[0] = ((quat.yyyy() * ((quat.yxwx() & mask1110) * dblScale)) + (quat.zzzz() * (quat.zwxx() & mask1110) * mlt)); - rows[0].x = _scale.x - rows[0].x; - - mlt = dblScale ^ BUILD_XORMASKF(0, 0, 1, 0); - rows[1] = ((quat.zzzz() * ((quat.wzyx() & mask1110) * dblScale)) + (quat.xxxx() * (quat.yxwx() & mask1110) * mlt)); - rows[1].y = _scale.y - rows[1].y; - - mlt = dblScale ^ BUILD_XORMASKF(1, 0, 0, 0); - rows[2] = ((quat.xxxx() * ((quat.zwxx() & mask1110) * dblScale)) + (quat.yyyy() * (quat.wzyx() & mask1110) * mlt)); - rows[2].z = _scale.z - rows[2].z; - - setTranslation(_translation); - - return *this; -} - - -inline bool matrix3x4SIMD::getInverse(matrix3x4SIMD& _out) const //! SUBOPTIMAL - OPTIMIZE! -{ - auto translation = getTranslation(); - // `tmp` will have columns in its `rows` - core::matrix4SIMD tmp; - auto* cols = tmp.rows; - if (!getSub3x3InverseTranspose(reinterpret_cast(tmp))) - return false; - - // find inverse post-translation - cols[3] = -cols[0]*translation.xxxx()-cols[1]*translation.yyyy()-cols[2]*translation.zzzz(); - - // columns into rows - _out = transpose(tmp).extractSub3x4(); - - return true; -} - -inline bool matrix3x4SIMD::getSub3x3InverseTranspose(core::matrix3x4SIMD& _out) const -{ - vectorSIMDf r1crossr2; - const vectorSIMDf d = determinant_helper(r1crossr2); - if (core::iszero(d.x, FLT_MIN)) - return false; - auto rcp = core::reciprocal(d); - - // matrix of cofactors * 1/det - _out = getSub3x3TransposeCofactors(); - _out.rows[0] *= rcp; - _out.rows[1] *= rcp; - _out.rows[2] *= rcp; - - return true; -} - -inline core::matrix3x4SIMD matrix3x4SIMD::getSub3x3TransposeCofactors() const -{ - core::matrix3x4SIMD _out; - _out.rows[0] = core::cross(rows[1], rows[2]); - _out.rows[1] = core::cross(rows[2], rows[0]); - _out.rows[2] = core::cross(rows[0], rows[1]); - return _out; -} - -// TODO: Double check this!- -inline void matrix3x4SIMD::setTransformationCenter(const core::vectorSIMDf& _center, const core::vectorSIMDf& _translation) -{ - core::vectorSIMDf r0 = rows[0] * _center; - core::vectorSIMDf r1 = rows[1] * _center; - core::vectorSIMDf r2 = rows[2] * _center; - core::vectorSIMDf r3(0.f, 0.f, 0.f, 1.f); - - __m128 col3 = _mm_hadd_ps(_mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), _mm_hadd_ps(r2.getAsRegister(), r3.getAsRegister())); - const vectorSIMDf vcol3 = _center - _translation - col3; - - for (size_t i = 0u; i < VectorCount; ++i) - rows[i].w = vcol3.pointer[i]; -} - - -// TODO: Double check this! -inline matrix3x4SIMD matrix3x4SIMD::buildAxisAlignedBillboard( - const core::vectorSIMDf& camPos, - const core::vectorSIMDf& center, - const core::vectorSIMDf& translation, - const core::vectorSIMDf& axis, - const core::vectorSIMDf& from) -{ - // axis of rotation - const core::vectorSIMDf up = core::normalize(axis); - const core::vectorSIMDf forward = core::normalize(camPos - center); - const core::vectorSIMDf right = core::normalize(core::cross(up, forward)); - - // correct look vector - const core::vectorSIMDf look = core::cross(right, up); - - // rotate from to - // axis multiplication by sin - const core::vectorSIMDf vs = core::cross(look, from); - - // cosinus angle - const core::vectorSIMDf ca = core::cross(from, look); - - const core::vectorSIMDf vt(up * (core::vectorSIMDf(1.f) - ca)); - const core::vectorSIMDf wt = vt * up.yzxx(); - const core::vectorSIMDf vtuppca = vt * up + ca; - - matrix3x4SIMD mat; - core::vectorSIMDf& row0 = mat.rows[0]; - core::vectorSIMDf& row1 = mat.rows[1]; - core::vectorSIMDf& row2 = mat.rows[2]; - - row0 = vtuppca & BUILD_MASKF(1, 0, 0, 0); - row1 = vtuppca & BUILD_MASKF(0, 1, 0, 0); - row2 = vtuppca & BUILD_MASKF(0, 0, 1, 0); - - row0 += (wt.xxzx() + vs.xzyx() * core::vectorSIMDf(1.f, 1.f, -1.f, 1.f)) & BUILD_MASKF(0, 1, 1, 0); - row1 += (wt.xxyx() + vs.zxxx() * core::vectorSIMDf(-1.f, 1.f, 1.f, 1.f)) & BUILD_MASKF(1, 0, 1, 0); - row2 += (wt.zyxx() + vs.yxxx() * core::vectorSIMDf(1.f, -1.f, 1.f, 1.f)) & BUILD_MASKF(1, 1, 0, 0); - - mat.setTransformationCenter(center, translation); - return mat; -} - - - -inline vectorSIMDf matrix3x4SIMD::doJob(const __m128& a, const matrix3x4SIMD& _mtx) -{ - __m128 r0 = _mtx.rows[0].getAsRegister(); - __m128 r1 = _mtx.rows[1].getAsRegister(); - __m128 r2 = _mtx.rows[2].getAsRegister(); - - const __m128i mask = _mm_setr_epi32(0, 0, 0, 0xffffffff); - - vectorSIMDf res; - res = _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(0)), r0); - res += _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(1)), r1); - res += _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(2)), r2); - res += vectorSIMDf(a) & mask; // always 0 0 0 a3 -- no shuffle needed - return res; - } - -inline __m128d matrix3x4SIMD::halfRowAsDouble(size_t _n, bool _0) const -{ - return _mm_cvtps_pd(_0 ? rows[_n].xyxx().getAsRegister() : rows[_n].zwxx().getAsRegister()); -} -inline __m128d matrix3x4SIMD::doJob_d(const __m128d& _a0, const __m128d& _a1, const matrix3x4SIMD& _mtx, bool _xyHalf) -{ - __m128d r0 = _mtx.halfRowAsDouble(0u, _xyHalf); - __m128d r1 = _mtx.halfRowAsDouble(1u, _xyHalf); - __m128d r2 = _mtx.halfRowAsDouble(2u, _xyHalf); - - const __m128d mask01 = _mm_castsi128_pd(_mm_setr_epi32(0, 0, 0xffffffff, 0xffffffff)); - - __m128d res; - res = _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 0), r0); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 3), r1)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 0), r2)); - if (!_xyHalf) - res = _mm_add_pd(res, _mm_and_pd(_a1, mask01)); - return res; -} - -#undef BUILD_MASKF -#undef BUILD_XORMASKF -#undef BROADCAST32 -#else -#error "no implementation" -#endif - -} // nbl::core - -#endif diff --git a/include/matrix4SIMD.h b/include/matrix4SIMD.h deleted file mode 100644 index 03126c61f7..0000000000 --- a/include/matrix4SIMD.h +++ /dev/null @@ -1,385 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX4SIMD_H_INCLUDED__ -#define __NBL_MATRIX4SIMD_H_INCLUDED__ - -#include "matrix3x4SIMD.h" - -namespace nbl -{ -namespace core -{ - -template -class aabbox3d; - - -class matrix4SIMD// : public AlignedBase<_NBL_SIMD_ALIGNMENT> don't inherit from AlignedBase (which is empty) because member `rows[4]` inherits from it as well -{ - public: - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VectorCount = 4u; - vectorSIMDf rows[VectorCount]; - - inline explicit matrix4SIMD(const vectorSIMDf& _r0 = vectorSIMDf(1.f, 0.f, 0.f, 0.f), - const vectorSIMDf& _r1 = vectorSIMDf(0.f, 1.f, 0.f, 0.f), - const vectorSIMDf& _r2 = vectorSIMDf(0.f, 0.f, 1.f, 0.f), - const vectorSIMDf& _r3 = vectorSIMDf(0.f, 0.f, 0.f, 1.f)) - : rows{ _r0, _r1, _r2, _r3 } - { - } - - inline matrix4SIMD( float _a00, float _a01, float _a02, float _a03, - float _a10, float _a11, float _a12, float _a13, - float _a20, float _a21, float _a22, float _a23, - float _a30, float _a31, float _a32, float _a33) - : matrix4SIMD( vectorSIMDf(_a00, _a01, _a02, _a03), - vectorSIMDf(_a10, _a11, _a12, _a13), - vectorSIMDf(_a20, _a21, _a22, _a23), - vectorSIMDf(_a30, _a31, _a32, _a33)) - { - } - - inline explicit matrix4SIMD(const float* const _data) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4 * i); - } - inline matrix4SIMD(const float* const _data, bool ALIGNED) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4 * i, ALIGNED); - } - - inline explicit matrix4SIMD(const matrix3x4SIMD& smallMat) - { - *reinterpret_cast(this) = smallMat; - rows[3].set(0.f,0.f,0.f,1.f); - } - - inline matrix3x4SIMD extractSub3x4() const - { - return matrix3x4SIMD(rows[0],rows[1],rows[2]); - } - - //! Access by row - inline const vectorSIMDf& getRow(size_t _rown) const{ return rows[_rown]; } - inline vectorSIMDf& getRow(size_t _rown) { return rows[_rown]; } - - //! Access by element - inline float operator()(size_t _i, size_t _j) const { return rows[_i].pointer[_j]; } - inline float& operator()(size_t _i, size_t _j) { return rows[_i].pointer[_j]; } - - //! Access for memory - inline const float* pointer() const {return rows[0].pointer;} - inline float* pointer() {return rows[0].pointer;} - - - inline bool operator==(const matrix4SIMD& _other) const - { - return !(*this != _other); - } - inline bool operator!=(const matrix4SIMD& _other) const; - - inline matrix4SIMD& operator+=(const matrix4SIMD& _other); - inline matrix4SIMD operator+(const matrix4SIMD& _other) const - { - matrix4SIMD r{*this}; - return r += _other; - } - - inline matrix4SIMD& operator-=(const matrix4SIMD& _other); - inline matrix4SIMD operator-(const matrix4SIMD& _other) const - { - matrix4SIMD r{*this}; - return r -= _other; - } - - inline matrix4SIMD& operator*=(float _scalar); - inline matrix4SIMD operator*(float _scalar) const - { - matrix4SIMD r{*this}; - return r *= _scalar; - } - - static inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b); - static inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b); - - inline bool isIdentity() const - { - return *this == matrix4SIMD(); - } - inline bool isIdentity(float _tolerance) const; - - inline bool isOrthogonal() const - { - return concatenateBFollowedByA(transpose(*this), *this).isIdentity(); - } - inline bool isOrthogonal(float _tolerance) const - { - return concatenateBFollowedByA(transpose(*this), *this).isIdentity(_tolerance); - } - - inline matrix4SIMD& setScale(const core::vectorSIMDf& _scale); - inline matrix4SIMD& setScale(float _scale) - { - return setScale(vectorSIMDf(_scale)); - } - - inline void setTranslation(const float* _t) - { - for (size_t i = 0u; i < 3u; ++i) - rows[i].w = _t[i]; - } - //! Takes into account only x,y,z components of _t - inline void setTranslation(const vectorSIMDf& _t) - { - setTranslation(_t.pointer); - } - inline void setTranslation(const vector3d& _t) - { - setTranslation(&_t.X); - } - - //! Returns last column of the matrix. - inline vectorSIMDf getTranslation() const; - - //! Returns translation part of the matrix (w component is always 0). - inline vectorSIMDf getTranslation3D() const; - - enum class E_MATRIX_INVERSE_PRECISION - { - EMIP_FAST_RECIPROCAL, - EMIP_32BIT, - EMIP_64BBIT - }; - - template - inline bool getInverseTransform(matrix4SIMD& _out) const - { - if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_64BBIT) - { - double a = rows[0][0], b = rows[0][1], c = rows[0][2], d = rows[0][3]; - double e = rows[1][0], f = rows[1][1], g = rows[1][2], h = rows[1][3]; - double i = rows[2][0], j = rows[2][1], k = rows[2][2], l = rows[2][3]; - double m = rows[3][0], n = rows[3][1], o = rows[3][2], p = rows[3][3]; - - double kp_lo = k * p - l * o; - double jp_ln = j * p - l * n; - double jo_kn = j * o - k * n; - double ip_lm = i * p - l * m; - double io_km = i * o - k * m; - double in_jm = i * n - j * m; - - double a11 = +(f * kp_lo - g * jp_ln + h * jo_kn); - double a12 = -(e * kp_lo - g * ip_lm + h * io_km); - double a13 = +(e * jp_ln - f * ip_lm + h * in_jm); - double a14 = -(e * jo_kn - f * io_km + g * in_jm); - - double det = a * a11 + b * a12 + c * a13 + d * a14; - - if (core::iszero(det, DBL_MIN)) - return false; - - double invDet = 1.0 / det; - - _out.rows[0][0] = a11 * invDet; - _out.rows[1][0] = a12 * invDet; - _out.rows[2][0] = a13 * invDet; - _out.rows[3][0] = a14 * invDet; - - _out.rows[0][1] = -(b * kp_lo - c * jp_ln + d * jo_kn) * invDet; - _out.rows[1][1] = +(a * kp_lo - c * ip_lm + d * io_km) * invDet; - _out.rows[2][1] = -(a * jp_ln - b * ip_lm + d * in_jm) * invDet; - _out.rows[3][1] = +(a * jo_kn - b * io_km + c * in_jm) * invDet; - - double gp_ho = g * p - h * o; - double fp_hn = f * p - h * n; - double fo_gn = f * o - g * n; - double ep_hm = e * p - h * m; - double eo_gm = e * o - g * m; - double en_fm = e * n - f * m; - - _out.rows[0][2] = +(b * gp_ho - c * fp_hn + d * fo_gn) * invDet; - _out.rows[1][2] = -(a * gp_ho - c * ep_hm + d * eo_gm) * invDet; - _out.rows[2][2] = +(a * fp_hn - b * ep_hm + d * en_fm) * invDet; - _out.rows[3][2] = -(a * fo_gn - b * eo_gm + c * en_fm) * invDet; - - double gl_hk = g * l - h * k; - double fl_hj = f * l - h * j; - double fk_gj = f * k - g * j; - double el_hi = e * l - h * i; - double ek_gi = e * k - g * i; - double ej_fi = e * j - f * i; - - _out.rows[0][3] = -(b * gl_hk - c * fl_hj + d * fk_gj) * invDet; - _out.rows[1][3] = +(a * gl_hk - c * el_hi + d * ek_gi) * invDet; - _out.rows[2][3] = -(a * fl_hj - b * el_hi + d * ej_fi) * invDet; - _out.rows[3][3] = +(a * fk_gj - b * ek_gi + c * ej_fi) * invDet; - - return true; - } - else - { - auto mat2mul = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A*_B.xwxw()+_A.yxwz()*_B.zyzy(); - }; - auto mat2adjmul = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A.wwxx()*_B-_A.yyzz()*_B.zwxy(); - }; - auto mat2muladj = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A*_B.wxwx()-_A.yxwz()*_B.zyzy(); - }; - - vectorSIMDf A = _mm_movelh_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); - vectorSIMDf B = _mm_movehl_ps(rows[1].getAsRegister(), rows[0].getAsRegister()); - vectorSIMDf C = _mm_movelh_ps(rows[2].getAsRegister(), rows[3].getAsRegister()); - vectorSIMDf D = _mm_movehl_ps(rows[3].getAsRegister(), rows[2].getAsRegister()); - - vectorSIMDf allDets = vectorSIMDf(_mm_shuffle_ps(rows[0].getAsRegister(),rows[2].getAsRegister(),_MM_SHUFFLE(2,0,2,0)))* - vectorSIMDf(_mm_shuffle_ps(rows[1].getAsRegister(),rows[3].getAsRegister(),_MM_SHUFFLE(3,1,3,1))) - - - vectorSIMDf(_mm_shuffle_ps(rows[0].getAsRegister(),rows[2].getAsRegister(),_MM_SHUFFLE(3,1,3,1)))* - vectorSIMDf(_mm_shuffle_ps(rows[1].getAsRegister(),rows[3].getAsRegister(),_MM_SHUFFLE(2,0,2,0))); - - auto detA = allDets.xxxx(); - auto detB = allDets.yyyy(); - auto detC = allDets.zzzz(); - auto detD = allDets.wwww(); - - // https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html - auto D_C = mat2adjmul(D, C); - // A#B - auto A_B = mat2adjmul(A, B); - // X# = |D|A - B(D#C) - auto X_ = detD*A - mat2mul(B, D_C); - // W# = |A|D - C(A#B) - auto W_ = detA*D - mat2mul(C, A_B); - - // |M| = |A|*|D| + ... (continue later) - auto detM = detA*detD; - - // Y# = |B|C - D(A#B)# - auto Y_ = detB*C - mat2muladj(D, A_B); - // Z# = |C|B - A(D#C)# - auto Z_ = detC*B - mat2muladj(A, D_C); - - // |M| = |A|*|D| + |B|*|C| ... (continue later) - detM += detB*detC; - - // tr((A#B)(D#C)) - __m128 tr = (A_B*D_C.xzyw()).getAsRegister(); - tr = _mm_hadd_ps(tr, tr); - tr = _mm_hadd_ps(tr, tr); - // |M| = |A|*|D| + |B|*|C| - tr((A#B)(D#C) - detM -= tr; - - if (core::iszero(detM.x, FLT_MIN)) - return false; - - vectorSIMDf rDetM; - - // (1/|M|, -1/|M|, -1/|M|, 1/|M|) - if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_FAST_RECIPROCAL) - rDetM = vectorSIMDf(1.f, -1.f, -1.f, 1.f)*core::reciprocal(detM); - else if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_32BIT) - rDetM = vectorSIMDf(1.f, -1.f, -1.f, 1.f).preciseDivision(detM); - - X_ *= rDetM; - Y_ *= rDetM; - Z_ *= rDetM; - W_ *= rDetM; - - // apply adjugate and store, here we combine adjugate shuffle and store shuffle - _out.rows[0] = _mm_shuffle_ps(X_.getAsRegister(), Y_.getAsRegister(), _MM_SHUFFLE(1, 3, 1, 3)); - _out.rows[1] = _mm_shuffle_ps(X_.getAsRegister(), Y_.getAsRegister(), _MM_SHUFFLE(0, 2, 0, 2)); - _out.rows[2] = _mm_shuffle_ps(Z_.getAsRegister(), W_.getAsRegister(), _MM_SHUFFLE(1, 3, 1, 3)); - _out.rows[3] = _mm_shuffle_ps(Z_.getAsRegister(), W_.getAsRegister(), _MM_SHUFFLE(0, 2, 0, 2)); - - return true; - } - } - - inline vectorSIMDf sub3x3TransformVect(const vectorSIMDf& _in) const; - - inline void transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void transformVect(vectorSIMDf& _vector) const - { - transformVect(_vector, _vector); - } - - inline void translateVect(vectorSIMDf& _vect) const - { - _vect += getTranslation(); - } - - bool isBoxInFrustum(const aabbox3d& bbox); - - bool perspectiveTransformVect(core::vectorSIMDf& inOutVec) - { - transformVect(inOutVec); - const bool inFront = inOutVec[3] > 0.f; - inOutVec /= inOutVec.wwww(); - return inFront; - } - - core::vector2di fragCoordTransformVect(const core::vectorSIMDf& _in, const core::dimension2du& viewportDimensions) - { - core::vectorSIMDf pos(_in); - pos.w = 1.f; - if (perspectiveTransformVect(pos)) - core::vector2di(-0x80000000, -0x80000000); - - pos[0] *= 0.5f; - pos[1] *= 0.5f; - pos[0] += 0.5f; - pos[1] += 0.5f; - - return core::vector2di(pos[0] * float(viewportDimensions.Width), pos[1] * float(viewportDimensions.Height)); - } - - static inline matrix4SIMD buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar); - static inline matrix4SIMD buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar); - - static inline matrix4SIMD buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar); - static inline matrix4SIMD buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar); - - //! Access by row - inline const vectorSIMDf& operator[](size_t _rown) const { return rows[_rown]; } - //! Access by row - inline vectorSIMDf& operator[](size_t _rown) { return rows[_rown]; } - - private: - //! TODO: implement a dvec<2> - inline __m128d halfRowAsDouble(size_t _n, bool _firstHalf) const; - static inline __m128d concat64_helper(const __m128d& _a0, const __m128d& _a1, const matrix4SIMD& _mtx, bool _firstHalf); -}; - -inline matrix4SIMD operator*(float _scalar, const matrix4SIMD& _mtx) -{ - return _mtx * _scalar; -} - -inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - return matrix4SIMD::concatenateBFollowedByA(_a, _b); -} -/* -inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - return matrix4SIMD::concatenateBFollowedByAPrecisely(_a, _b); -} -*/ - - -}} // nbl::core - -#endif diff --git a/include/matrix4SIMD_impl.h b/include/matrix4SIMD_impl.h deleted file mode 100644 index 02484e7a4c..0000000000 --- a/include/matrix4SIMD_impl.h +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX4SIMD_IMPL_H_INCLUDED__ -#define __NBL_MATRIX4SIMD_IMPL_H_INCLUDED__ - -#include "matrix4SIMD.h" -#include "nbl/core/math/glslFunctions.tcc" -#include "aabbox3d.h" - -namespace nbl -{ -namespace core -{ - - -inline bool matrix4SIMD::operator!=(const matrix4SIMD& _other) const -{ - for (size_t i = 0u; i < VectorCount; ++i) - if ((rows[i] != _other.rows[i]).any()) - return true; - return false; -} - -inline matrix4SIMD& matrix4SIMD::operator+=(const matrix4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] += _other.rows[i]; - return *this; -} - -inline matrix4SIMD& matrix4SIMD::operator-=(const matrix4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] -= _other.rows[i]; - return *this; -} - -inline matrix4SIMD& matrix4SIMD::operator*=(float _scalar) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] *= _scalar; - return *this; -} - -inline bool matrix4SIMD::isIdentity(float _tolerance) const -{ - return core::equals(*this, matrix4SIMD(), core::ROUNDING_ERROR()); -} - -#ifdef __NBL_COMPILE_WITH_SSE3 -#define BROADCAST32(fpx) _MM_SHUFFLE(fpx, fpx, fpx, fpx) -#define BUILD_MASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_*0xffffffff, _y_*0xffffffff, _z_*0xffffffff, _w_*0xffffffff) -inline matrix4SIMD matrix4SIMD::concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - auto calcRow = [](const __m128& _row, const matrix4SIMD& _mtx) - { - __m128 r0 = _mtx.rows[0].getAsRegister(); - __m128 r1 = _mtx.rows[1].getAsRegister(); - __m128 r2 = _mtx.rows[2].getAsRegister(); - __m128 r3 = _mtx.rows[3].getAsRegister(); - - __m128 res; - res = _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(0)), r0); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(1)), r1)); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(2)), r2)); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(3)), r3)); - return res; - }; - - matrix4SIMD r; - for (size_t i = 0u; i < 4u; ++i) - r.rows[i] = calcRow(_a.rows[i].getAsRegister(), _b); - - return r; -} -inline matrix4SIMD matrix4SIMD::concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - matrix4SIMD out; - - __m128i mask0011 = BUILD_MASKF(0, 0, 1, 1); - __m128 second; - - { - __m128d r00 = _a.halfRowAsDouble(0u, true); - __m128d r01 = _a.halfRowAsDouble(0u, false); - second = _mm_cvtpd_ps(concat64_helper(r00, r01, _b, false)); - out.rows[0] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r00, r01, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r10 = _a.halfRowAsDouble(1u, true); - __m128d r11 = _a.halfRowAsDouble(1u, false); - second = _mm_cvtpd_ps(concat64_helper(r10, r11, _b, false)); - out.rows[1] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r10, r11, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r20 = _a.halfRowAsDouble(2u, true); - __m128d r21 = _a.halfRowAsDouble(2u, false); - second = _mm_cvtpd_ps(concat64_helper(r20, r21, _b, false)); - out.rows[2] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r20, r21, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r30 = _a.halfRowAsDouble(3u, true); - __m128d r31 = _a.halfRowAsDouble(3u, false); - second = _mm_cvtpd_ps(concat64_helper(r30, r31, _b, false)); - out.rows[3] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r30, r31, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - return out; -} - -inline matrix4SIMD& matrix4SIMD::setScale(const core::vectorSIMDf& _scale) -{ - const __m128i mask0001 = BUILD_MASKF(0, 0, 0, 1); - - rows[0] = (_scale & BUILD_MASKF(1, 0, 0, 0)) | _mm_castps_si128((rows[0] & mask0001).getAsRegister()); - rows[1] = (_scale & BUILD_MASKF(0, 1, 0, 0)) | _mm_castps_si128((rows[1] & mask0001).getAsRegister()); - rows[2] = (_scale & BUILD_MASKF(0, 0, 1, 0)) | _mm_castps_si128((rows[2] & mask0001).getAsRegister()); - rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return *this; -} - -//! Returns last column of the matrix. -inline vectorSIMDf matrix4SIMD::getTranslation() const -{ - __m128 tmp1 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 tmp2 = _mm_unpackhi_ps(rows[2].getAsRegister(), rows[3].getAsRegister()); // (2z,3z,2w,3w) - __m128 col3 = _mm_movehl_ps(tmp1, tmp2);// (0w,1w,2w,3w) - - return col3; -} -//! Returns translation part of the matrix (w component is always 0). -inline vectorSIMDf matrix4SIMD::getTranslation3D() const -{ - __m128 tmp1 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 tmp2 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setzero_ps()); // (2z,0,2w,0) - __m128 transl = _mm_movehl_ps(tmp1, tmp2);// (0w,1w,2w,0) - - return transl; -} - -inline vectorSIMDf matrix4SIMD::sub3x3TransformVect(const vectorSIMDf& _in) const -{ - matrix4SIMD cp{*this}; - vectorSIMDf out = _in & BUILD_MASKF(1, 1, 1, 0); - transformVect(out); - return out; -} - -inline void matrix4SIMD::transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - vectorSIMDf r[4]; - for (size_t i = 0u; i < VectorCount; ++i) - r[i] = rows[i] * _in; - - _out = _mm_hadd_ps( - _mm_hadd_ps(r[0].getAsRegister(), r[1].getAsRegister()), - _mm_hadd_ps(r[2].getAsRegister(), r[3].getAsRegister()) - ); -} - -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians*0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(w, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -h, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, -zFar/(zFar-zNear), -zNear*zFar/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, -1.f, 0.f); - - return m; -} -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians*0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(w, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -h, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, zFar/(zFar-zNear), -zNear*zFar/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 1.f, 0.f); - - return m; -} - -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(2.f/widthOfViewVolume, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -2.f/heightOfViewVolume, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, -1.f/(zFar-zNear), -zNear/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return m; -} -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(2.f/widthOfViewVolume, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -2.f/heightOfViewVolume, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, 1.f/(zFar-zNear), -zNear/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return m; -} - - - -inline __m128d matrix4SIMD::halfRowAsDouble(size_t _n, bool _firstHalf) const -{ - return _mm_cvtps_pd(_firstHalf ? rows[_n].xyxx().getAsRegister() : rows[_n].zwxx().getAsRegister()); -} -inline __m128d matrix4SIMD::concat64_helper(const __m128d& _a0, const __m128d& _a1, const matrix4SIMD& _mtx, bool _firstHalf) -{ - __m128d r0 = _mtx.halfRowAsDouble(0u, _firstHalf); - __m128d r1 = _mtx.halfRowAsDouble(1u, _firstHalf); - __m128d r2 = _mtx.halfRowAsDouble(2u, _firstHalf); - __m128d r3 = _mtx.halfRowAsDouble(3u, _firstHalf); - - //const __m128d mask01 = _mm_castsi128_pd(_mm_setr_epi32(0, 0, 0xffffffff, 0xffffffff)); - - __m128d res; - res = _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 0), r0); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 3/*0b11*/), r1)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 0), r2)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 3/*0b11*/), r3)); - return res; -} - -#undef BUILD_MASKF -#undef BROADCAST32 -#else -#error "no implementation" -#endif - -inline bool matrix4SIMD::isBoxInFrustum(const aabbox3d& bbox) -{ - vectorSIMDf MinEdge, MaxEdge; - MinEdge.set(bbox.MinEdge); - MaxEdge.set(bbox.MaxEdge); - MinEdge.w = 1.f; - MaxEdge.w = 1.f; - - - auto getClosestDP = [&MinEdge,&MaxEdge](const vectorSIMDf& toDot) -> float - { - return dot(mix(MaxEdge,MinEdge,toDot struct Instance final { @@ -221,18 +221,18 @@ class ITopLevelAccelerationStructure : public IDescriptor, public IAccelerationS template struct StaticInstance final { - core::matrix3x4SIMD transform = core::matrix3x4SIMD(); + hlsl::float32_t3x4 transform = hlsl::float32_t3x4(); Instance base = {}; }; template struct MatrixMotionInstance final { - core::matrix3x4SIMD transform[2] = {core::matrix3x4SIMD(),core::matrix3x4SIMD()}; + hlsl::float32_t3x4 transform[2] = {hlsl::float32_t3x4(),hlsl::float32_t3x4()}; Instance base = {}; }; struct SRT { - // TODO: some operators to convert back and forth from `core::matrix3x4SIMD + // TODO: some operators to convert back and forth from `hlsl::float32_t3x4 float sx; float a; diff --git a/include/nbl/asset/IAnimationLibrary.h b/include/nbl/asset/IAnimationLibrary.h index 9665349103..d650cb25d9 100644 --- a/include/nbl/asset/IAnimationLibrary.h +++ b/include/nbl/asset/IAnimationLibrary.h @@ -34,7 +34,7 @@ class IAnimationLibrary : public virtual core::IReferenceCounted translation[2] = translation[1] = translation[0] = 0.f; quat = core::vectorSIMDu32(128u,128u,128u,255u); // should be (0,0,0,1) encoded } - Keyframe(const core::vectorSIMDf& _scale, const core::quaternion& _quat, const CQuantQuaternionCache* quantCache, const core::vectorSIMDf& _translation) + Keyframe(const core::vectorSIMDf& _scale, const hlsl::quaternion& _quat, const CQuantQuaternionCache* quantCache, const core::vectorSIMDf& _translation) { std::copy(_translation.pointer,_translation.pointer+3,translation); quat = quantCache->template quantize(_quat); @@ -42,13 +42,13 @@ class IAnimationLibrary : public virtual core::IReferenceCounted //scale = ; } - inline core::quaternion getRotation() const + inline hlsl::quaternion getRotation() const { const void* _pix[4] = {&quat,nullptr,nullptr,nullptr}; double out[4]; decodePixels(_pix,out,0u,0u); auto q = core::normalize(core::vectorSIMDf(out[0],out[1],out[2],out[3])); - return reinterpret_cast(&q)[0]; + return reinterpret_cast*>(&q)[0]; } inline core::vectorSIMDf getScale() const diff --git a/include/nbl/asset/ICPUSkeleton.h b/include/nbl/asset/ICPUSkeleton.h index 1049798268..7418e46ce3 100644 --- a/include/nbl/asset/ICPUSkeleton.h +++ b/include/nbl/asset/ICPUSkeleton.h @@ -42,15 +42,15 @@ class ICPUSkeleton final : public ISkeleton, public IAsset } //! - inline const core::matrix3x4SIMD& getDefaultTransformMatrix(base_t::joint_id_t jointID) const + inline const hlsl::float32_t3x4& getDefaultTransformMatrix(base_t::joint_id_t jointID) const { const uint8_t* ptr = reinterpret_cast(m_defaultTransforms.buffer->getPointer()); - return reinterpret_cast(ptr+m_defaultTransforms.offset)[jointID]; + return reinterpret_cast(ptr+m_defaultTransforms.offset)[jointID]; } - inline core::matrix3x4SIMD& getDefaultTransformMatrix(base_t::joint_id_t jointID) + inline hlsl::float32_t3x4& getDefaultTransformMatrix(base_t::joint_id_t jointID) { assert(isMutable()); - return const_cast(const_cast(this)->getDefaultTransformMatrix(jointID)); + return const_cast(const_cast(this)->getDefaultTransformMatrix(jointID)); } //! diff --git a/include/nbl/asset/ISkeleton.h b/include/nbl/asset/ISkeleton.h index 7960ca4eef..03ba3af4ea 100644 --- a/include/nbl/asset/ISkeleton.h +++ b/include/nbl/asset/ISkeleton.h @@ -62,7 +62,7 @@ class ISkeleton : public virtual core::IReferenceCounted return; assert(m_parentJointIDs.buffer->getSize()>=m_parentJointIDs.offset+sizeof(joint_id_t)*m_jointCount); - assert(m_defaultTransforms.buffer->getSize()>=m_defaultTransforms.offset+sizeof(core::matrix3x4SIMD)*m_jointCount); + assert(m_defaultTransforms.buffer->getSize()>=m_defaultTransforms.offset+sizeof(hlsl::float32_t3x4)*m_jointCount); } virtual ~ISkeleton() { diff --git a/include/nbl/asset/utils/CQuantQuaternionCache.h b/include/nbl/asset/utils/CQuantQuaternionCache.h index 8e46dffb0a..a51549d24d 100644 --- a/include/nbl/asset/utils/CQuantQuaternionCache.h +++ b/include/nbl/asset/utils/CQuantQuaternionCache.h @@ -60,7 +60,7 @@ class CQuantQuaternionCache : public CDirQuantCacheBase - value_type_t quantize(const core::quaternion& quat) + value_type_t quantize(const hlsl::quaternion& quat) { return Base::quantize<4u,CacheFormat>(reinterpret_cast(quat)); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl b/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl new file mode 100644 index 0000000000..36bcd944c6 --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl @@ -0,0 +1,12 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_UNROLL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_UNROLL_INCLUDED_ + +#ifdef __HLSL_VERSION +#define NBL_UNROLL [unroll] +#define NBL_UNROLL_LIMITED(LIMIT) [unroll(LIMIT)] +#else +#define NBL_UNROLL // can't be bothered / TODO +#define NBL_UNROLL_LIMITED(LIMIT) +#endif + +#endif diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl new file mode 100644 index 0000000000..bc0286e778 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl @@ -0,0 +1,101 @@ +// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" +// For conditions of distribution and use, see copyright notice in nabla.h +// See the original file in irrlicht source for authors + +#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_QUATERNION_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +//! Quaternion class for representing rotations. +/** It provides cheap combinations and avoids gimbal locks. +Also useful for interpolations. */ + +template +struct quaternion +{ + // i*data[0] + j*data[1] + k*data[2] + data[3] + using vec_t = vector; + vector data; + + //! creates identity quaternion + static inline quaternion create() + { + quaternion q; + q.data = vector(0.0f, 0.0f, 0.0f, 1.0f); + + return q; + } + + static inline quaternion create(float_t x, float_t y, float_t z, float_t w) + { + quaternion q; + q.data = vector(x, y, z, w); + + return q; + } + + static inline quaternion create(NBL_CONST_REF_ARG(quaternion) other) + { + return other; + } + + static inline quaternion create(float_t pitch, float_t yaw, float_t roll) + { + float angle; + + angle = roll * 0.5f; + const float sr = sinf(angle); + const float cr = cosf(angle); + + angle = pitch * 0.5f; + const float sp = sinf(angle); + const float cp = cos(angle); + + angle = yaw * 0.5f; + const float sy = sinf(angle); + const float cy = cosf(angle); + + const float cpcy = cp * cy; + const float spcy = sp * cy; + const float cpsy = cp * sy; + const float spsy = sp * sy; + + quaternion output; + output.data = float32_t4(sr, cr, cr, cr) * float32_t4(cpcy, spcy, cpsy, cpcy) + float32_t4(-cr, sr, -sr, sr) * float32_t4(spsy, cpsy, spcy, spsy); + + return output; + } + + // TODO: + //explicit quaternion(NBL_CONST_REF_ARG(float32_t3x4) m) {} + + inline quaternion operator*(float_t scalar) + { + quaternion output; + output.data = data * scalar; + return output; + } + + inline quaternion operator*(NBL_CONST_REF_ARG(quaternion) other) + { + return quaternion::create( + data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z, + data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y, + data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x, + data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w + ); + } +}; + +} // end namespace core +} // nbl + +#endif + diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl new file mode 100644 index 0000000000..d00d9ce2c4 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl @@ -0,0 +1,25 @@ +// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" +// For conditions of distribution and use, see copyright notice in nabla.h +// See the original file in irrlicht source for authors + +#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNION_IMPL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_QUATERNION_IMPL_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +namespace quaternion_impl +{ + +} + +} // end namespace core +} // nbl + +#endif + diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl new file mode 100644 index 0000000000..d1a628ccc0 --- /dev/null +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -0,0 +1,203 @@ +#ifndef _NBL_BUILTIN_HLSL_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#include +// TODO: remove this header when deleting vectorSIMDf.hlsl +#ifndef __HLSL_VERSION +#include +#include "vectorSIMD.h" +#endif +#include +#include "nbl/builtin/hlsl/cpp_compat/unroll.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +template +MatT diagonal(float diagonal = 1) +{ + MatT output; + + NBL_UNROLL_LIMITED(4) + for (uint32_t i = 0; i < matrix_traits::RowCount; ++i) + NBL_UNROLL_LIMITED(4) + for (uint32_t j = 0; j < matrix_traits::ColumnCount; ++j) + output[i][j] = 0; + + NBL_UNROLL_LIMITED(4) + for (uint32_t diag = 0; diag < matrix_traits::RowCount; ++diag) + output[diag][diag] = diagonal; + + return output; +} + +template +MatT identity() +{ + // TODO + // static_assert(MatT::Square); + return diagonal(1); +} + +// TODO: this is temporary function, delete when removing vectorSIMD +#ifndef __HLSL_VERSION +template +inline core::vectorSIMDf transformVector(NBL_CONST_REF_ARG(matrix) mat, NBL_CONST_REF_ARG(core::vectorSIMDf) vec) +{ + core::vectorSIMDf output; + float32_t4 tmp; + for (int i = 0; i < 4; ++i) // rather do that that reinterpret_cast for safety + tmp[i] = output[i]; + + for (int i = 0; i < 4; ++i) + output[i] = hlsl::dot(mat[i], tmp); + + return output; +} +#endif +template +inline matrix getMatrix3x4As4x4(NBL_CONST_REF_ARG(matrix) mat) +{ + matrix output; + for (int i = 0; i < 3; ++i) + output[i] = mat[i]; + output[3] = float32_t4(0.0f, 0.0f, 0.0f, 1.0f); + + return output; +} + +template +inline matrix getSub3x3(NBL_CONST_REF_ARG(matrix) mat) +{ + return matrix(mat); +} + +template +inline matrix getAs64BitPrecisionMatrix(NBL_CONST_REF_ARG(matrix) mat) +{ + matrix output; + for (int i = 0; i < N; ++i) + output[i] = mat[i]; + + return output; +} + +namespace transformation_matrix_utils_impl +{ + // This function calculates determinant using the scalar triple product. + template + inline T determinant_helper(NBL_CONST_REF_ARG(matrix) mat, NBL_REF_ARG(vector) r1crossr2) + { + r1crossr2 = hlsl::cross(mat[1], mat[2]); + return hlsl::dot(mat[0], r1crossr2); + } +} + +//! returs adjugate of the cofactor (sub 3x3) matrix +template +inline matrix getSub3x3TransposeCofactors(NBL_CONST_REF_ARG(matrix) mat) +{ + static_assert(N >= 3 && M >= 3); + + matrix output; + vector row0 = vector(mat[0]); + vector row1 = vector(mat[1]); + vector row2 = vector(mat[2]); + output[0] = hlsl::cross(row1, row2); + output[1] = hlsl::cross(row2, row0); + output[2] = hlsl::cross(row0, row1); + + output[0] = hlsl::cross(row0, row1); + + return output; +} + +template +inline bool getSub3x3InverseTranspose(NBL_CONST_REF_ARG(matrix) matIn, NBL_CONST_REF_ARG(matrix) matOut) +{ + matrix matIn3x3 = getSub3x3(matIn); + vector r1crossr2; + T d = transformation_matrix_utils_impl::determinant_helper(matIn3x3, r1crossr2); + if (abs(d) <= FLT_MIN) + return false; + auto rcp = T(1.0f)/d; + + // matrix of cofactors * 1/det + matOut = getSub3x3TransposeCofactors(matIn3x3); + matOut[0] *= rcp; + matOut[1] *= rcp; + matOut[2] *= rcp; + + return true; +} + +// TODO: use portable_float when merged +//! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) +template +inline matrix concatenateBFollowedByA(NBL_CONST_REF_ARG(matrix) a, NBL_CONST_REF_ARG(const matrix) b) +{ + // TODO + // static_assert(N == 3 || N == 4); + + const matrix a4x4 = getMatrix3x4As4x4(a); + const matrix b4x4 = getMatrix3x4As4x4(b); + return matrix(mul(a4x4, b4x4)); +} + +template +inline void setScale(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) scale) +{ + // TODO + // static_assert(N == 3 || N == 4); + + outMat[0][0] = scale[0]; + outMat[1][1] = scale[1]; + outMat[2][2] = scale[2]; +} + +//! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged +template +inline void setRotation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(nbl::hlsl::quaternion) quat) +{ + // TODO + //static_assert(N == 3 || N == 4); + + outMat[0] = vector( + 1 - 2 * (quat.data.y * quat.data.y + quat.data.z * quat.data.z), + 2 * (quat.data.x * quat.data.y - quat.data.z * quat.data.w), + 2 * (quat.data.x * quat.data.z + quat.data.y * quat.data.w), + + outMat[0][3] + ); + + outMat[1] = vector( + 2 * (quat.data.x * quat.data.y + quat.data.z * quat.data.w), + 1 - 2 * (quat.data.x * quat.data.x + quat.data.z * quat.data.z), + 2 * (quat.data.y * quat.data.z - quat.data.x * quat.data.w), + outMat[1][3] + ); + + outMat[2] = vector( + 2 * (quat.data.x * quat.data.z - quat.data.y * quat.data.w), + 2 * (quat.data.y * quat.data.z + quat.data.x * quat.data.w), + 1 - 2 * (quat.data.x * quat.data.x + quat.data.y * quat.data.y), + outMat[2][3] + ); +} + +template +inline void setTranslation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) translation) +{ + // TODO + // static_assert(N == 3 || N == 4); + + outMat[0].w = translation.x; + outMat[1].w = translation.y; + outMat[2].w = translation.z; +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/core/declarations.h b/include/nbl/core/declarations.h index 9aa708a793..466ea988aa 100644 --- a/include/nbl/core/declarations.h +++ b/include/nbl/core/declarations.h @@ -50,7 +50,6 @@ #include "nbl/core/math/colorutil.h" #include "nbl/core/math/rational.h" #include "nbl/core/math/plane3dSIMD.h" -#include "nbl/core/math/matrixutil.h" // memory #include "nbl/core/memory/memory.h" #include "nbl/core/memory/new_delete.h" diff --git a/include/nbl/core/definitions.h b/include/nbl/core/definitions.h index c08af6ad74..5913c2c8f2 100644 --- a/include/nbl/core/definitions.h +++ b/include/nbl/core/definitions.h @@ -15,8 +15,4 @@ #include "nbl/core/math/floatutil.tcc" #include "nbl/core/math/glslFunctions.tcc" -// implementations [deprecated] -#include "matrix3x4SIMD_impl.h" -#include "matrix4SIMD_impl.h" - #endif \ No newline at end of file diff --git a/include/nbl/core/math/floatutil.tcc b/include/nbl/core/math/floatutil.tcc index 71c8bd2da7..f20db5dec2 100644 --- a/include/nbl/core/math/floatutil.tcc +++ b/include/nbl/core/math/floatutil.tcc @@ -5,9 +5,8 @@ #ifndef __NBL_CORE_FLOAT_UTIL_TCC_INCLUDED__ #define __NBL_CORE_FLOAT_UTIL_TCC_INCLUDED__ - +#include "vectorSIMD.h" #include "nbl/core/math/floatutil.h" -#include "matrix4SIMD.h" namespace nbl { @@ -29,16 +28,6 @@ NBL_FORCE_INLINE vectorSIMDf ROUNDING_ERROR() { return vectorSIMDf(ROUNDING_ERROR()); } -template<> -NBL_FORCE_INLINE matrix3x4SIMD ROUNDING_ERROR() -{ - return matrix3x4SIMD(ROUNDING_ERROR(),ROUNDING_ERROR(),ROUNDING_ERROR()); -} -template<> -NBL_FORCE_INLINE matrix4SIMD ROUNDING_ERROR() -{ - return matrix4SIMD(ROUNDING_ERROR(),ROUNDING_ERROR(),ROUNDING_ERROR(),ROUNDING_ERROR()); -} template NBL_FORCE_INLINE T ROUNDING_ERROR() { diff --git a/include/nbl/core/math/glslFunctions.tcc b/include/nbl/core/math/glslFunctions.tcc index 205585965b..b8326b41d1 100644 --- a/include/nbl/core/math/glslFunctions.tcc +++ b/include/nbl/core/math/glslFunctions.tcc @@ -8,7 +8,6 @@ #include "nbl/core/declarations.h" #include "nbl/core/math/floatutil.tcc" -#include "matrix4SIMD.h" #include #include @@ -280,21 +279,6 @@ NBL_FORCE_INLINE vectorSIMDf cross(const vectorSIMDf& a, const vect #endif } -template<> -NBL_FORCE_INLINE matrix4SIMD transpose(const matrix4SIMD& m) -{ - core::matrix4SIMD retval; - __m128 a0 = m.rows[0].getAsRegister(), a1 = m.rows[1].getAsRegister(), a2 = m.rows[2].getAsRegister(), a3 = m.rows[3].getAsRegister(); - _MM_TRANSPOSE4_PS(a0, a1, a2, a3); - retval.rows[0] = a0; - retval.rows[1] = a1; - retval.rows[2] = a2; - retval.rows[3] = a3; - return retval; -} - - - template<> NBL_FORCE_INLINE bool equals(const vectorSIMDf& a, const vectorSIMDf& b, const vectorSIMDf& tolerance) { @@ -307,22 +291,6 @@ NBL_FORCE_INLINE bool equals(const core::vector3df& a, const core::vector3df& b, auto la = a-tolerance; return ha.X>=b.X&&ha.Y>=b.Y&&ha.Z>=b.Z && la.X<=b.X&&la.Y<=b.Y&&la.Z<=b.Z; } -template<> -NBL_FORCE_INLINE bool equals(const matrix4SIMD& a, const matrix4SIMD& b, const matrix4SIMD& tolerance) -{ - for (size_t i = 0u; i(a.rows[i], b.rows[i], tolerance.rows[i])) - return false; - return true; -} -template<> -NBL_FORCE_INLINE bool equals(const matrix3x4SIMD& a, const matrix3x4SIMD& b, const matrix3x4SIMD& tolerance) -{ - for (size_t i = 0u; i(a.rows[i], b.rows[i], tolerance[i])) - return false; - return true; -} template NBL_FORCE_INLINE bool equals(const T& a, const T& b, const T& tolerance) { diff --git a/include/nbl/core/math/matrixutil.h b/include/nbl/core/math/matrixutil.h deleted file mode 100644 index afe7955c9b..0000000000 --- a/include/nbl/core/math/matrixutil.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_MATRIX_UTIL_H_INCLUDED_ -#define _NBL_MATRIX_UTIL_H_INCLUDED_ - -#include "matrix4SIMD.h" -#include "matrix3x4SIMD.h" - -namespace nbl::core -{ - - -//! TODO: OPTIMIZE THIS, DON'T PROMOTE THE MATRIX IF DON'T HAVE TO -inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix3x4SIMD& _b) -{ - return concatenateBFollowedByA(_a, matrix4SIMD(_b)); -} -/* -inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix3x4SIMD& _b) -{ - return concatenateBFollowedByAPrecisely(_a, matrix4SIMD(_b)); -} -*/ - -} - -#endif diff --git a/include/nbl/core/math/plane3dSIMD.h b/include/nbl/core/math/plane3dSIMD.h index 891ed1300c..23099f0d61 100644 --- a/include/nbl/core/math/plane3dSIMD.h +++ b/include/nbl/core/math/plane3dSIMD.h @@ -3,11 +3,12 @@ // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors +#include "vectorSIMD.h" +#include + #ifndef __NBL_CORE_PLANE_3D_H_INCLUDED__ #define __NBL_CORE_PLANE_3D_H_INCLUDED__ -#include "matrix3x4SIMD.h" - namespace nbl { namespace core @@ -99,14 +100,19 @@ class plane3dSIMDf : private vectorSIMDf } //! - static inline plane3dSIMDf transform(const plane3dSIMDf& _in, const matrix3x4SIMD& _mat) + static inline plane3dSIMDf transform(const plane3dSIMDf& _in, const hlsl::float32_t3x4& _mat) { - matrix3x4SIMD inv; - _mat.getInverse(inv); + hlsl::float32_t4x4 inv = hlsl::getMatrix3x4As4x4(_mat); + hlsl::inverse(inv); vectorSIMDf normal(_in.getNormal()); // transform by inverse transpose - return plane3dSIMDf(inv.rows[0]*normal.xxxx()+inv.rows[1]*normal.yyyy()+inv.rows[2]*normal.zzzz()+(normal.wwww()&BUILD_MASKF(0,0,0,1))); + hlsl::float32_t4 planeEq = inv[0] * hlsl::float32_t4(normal.x) + inv[1] * hlsl::float32_t4(normal.y) + inv[2] * hlsl::float32_t4(normal.z) + (hlsl::float32_t4(0, 0, 0, normal.w)); + vectorSIMDf planeEqSIMD; + for (int i = 0; i < 4; ++i) + planeEqSIMD[i] = planeEq[i]; + + return plane3dSIMDf(planeEqSIMD); #undef BUILD_MASKF } diff --git a/include/nbl/ext/Bullet/BulletUtility.h b/include/nbl/ext/Bullet/BulletUtility.h index 507adbceda..450c20c50d 100644 --- a/include/nbl/ext/Bullet/BulletUtility.h +++ b/include/nbl/ext/Bullet/BulletUtility.h @@ -64,8 +64,8 @@ namespace Bullet3 return convert(vec); } - inline core::matrix3x4SIMD convertbtTransform(const btTransform &trans) { - core::matrix3x4SIMD mat; + inline hlsl::float32_t3x4 convertbtTransform(const btTransform &trans) { + hlsl::float32_t3x4 mat; for (uint32_t i = 0; i < 3u; ++i) { mat.rows[i] = frombtVec3(trans.getBasis().getRow(i)); @@ -75,7 +75,7 @@ namespace Bullet3 return mat; } - inline btTransform convertMatrixSIMD(const core::matrix3x4SIMD &mat) { + inline btTransform convertMatrixSIMD(const hlsl::float32_t3x4 &mat) { btTransform transform; //Calling makeSafe3D on rows erases translation so save it diff --git a/include/nbl/ext/Bullet/CPhysicsWorld.h b/include/nbl/ext/Bullet/CPhysicsWorld.h index d6529a2565..cfaf70d6d6 100644 --- a/include/nbl/ext/Bullet/CPhysicsWorld.h +++ b/include/nbl/ext/Bullet/CPhysicsWorld.h @@ -24,7 +24,7 @@ class CPhysicsWorld : public core::IReferenceCounted struct RigidBodyData { btCollisionShape *shape; - core::matrix3x4SIMD trans; + hlsl::float32_t3x4 trans; core::vectorSIMDf inertia; float mass; }; diff --git a/include/nbl/ext/DebugDraw/CDraw3DLine.h b/include/nbl/ext/DebugDraw/CDraw3DLine.h index 68cd64e9c1..86b874f9d1 100644 --- a/include/nbl/ext/DebugDraw/CDraw3DLine.h +++ b/include/nbl/ext/DebugDraw/CDraw3DLine.h @@ -33,7 +33,7 @@ class CDraw3DLine : public core::IReferenceCounted } - void setData(const core::matrix4SIMD& viewProjMat, const core::vector>& linesData) + void setData(const hlsl::float32_t4x4& viewProjMat, const core::vector>& linesData) { m_viewProj = viewProjMat; m_lines = linesData; @@ -45,7 +45,7 @@ class CDraw3DLine : public core::IReferenceCounted m_lines.clear(); } - void setLine(const core::matrix4SIMD& viewProjMat, + void setLine(const hlsl::float32_t4x4& viewProjMat, float fromX, float fromY, float fromZ, float toX, float toY, float toZ, float r, float g, float b, float a @@ -54,7 +54,7 @@ class CDraw3DLine : public core::IReferenceCounted m_lines = core::vector>{ std::pair(S3DLineVertex{{ fromX, fromY, fromZ }, { r, g, b, a }}, S3DLineVertex{{ toX, toY, toZ }, { r, g, b, a }}) }; } - void addLine(const core::matrix4SIMD& viewProjMat, + void addLine(const hlsl::float32_t4x4& viewProjMat, float fromX, float fromY, float fromZ, float toX, float toY, float toZ, float r, float g, float b, float a @@ -73,7 +73,7 @@ class CDraw3DLine : public core::IReferenceCounted m_lines.insert(m_lines.end(), linesData.begin(), linesData.end()); } - void setViewProjMatrix(const core::matrix4SIMD& viewProjMat) + void setViewProjMatrix(const hlsl::float32_t4x4& viewProjMat) { m_viewProj = viewProjMat; } @@ -91,7 +91,7 @@ class CDraw3DLine : public core::IReferenceCounted */ void recordToCommandBuffer(video::IGPUCommandBuffer* cmdBuffer, video::IGPUGraphicsPipeline* graphics_pipeline); - inline void addBox(const core::aabbox3df& box, float r, float g, float b, float a, const core::matrix3x4SIMD& tform=core::matrix3x4SIMD()) + inline void addBox(const core::aabbox3df& box, float r, float g, float b, float a, const hlsl::float32_t3x4& tform=hlsl::float32_t3x4()) { auto addLine = [&](auto s, auto e) -> void { @@ -128,7 +128,7 @@ class CDraw3DLine : public core::IReferenceCounted core::smart_refctd_ptr m_device; core::smart_refctd_ptr m_linesBuffer = nullptr; core::smart_refctd_ptr m_rpindependent_pipeline; - core::matrix4SIMD m_viewProj; + hlsl::float32_t4x4 m_viewProj; core::vector> m_lines; const uint32_t alignments[1] = { sizeof(S3DLineVertex) }; }; diff --git a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h index 678adf59a9..440a1ca463 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h +++ b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h @@ -56,8 +56,8 @@ class EnvmapImportanceSampling float x,y,z; }; #define vec4 core::vectorSIMDf - #define mat4 core::matrix4SIMD - #define mat4x3 core::matrix3x4SIMD + #define mat4 hlsl::float32_t4x4 + #define mat4x3 hlsl::float32_t3x4 #include "nbl/builtin/glsl/ext/EnvmapImportanceSampling/structs.glsl" #undef uint #undef vec4 diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index 205023afea..c1725963b2 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -225,7 +225,7 @@ class CElementShape : public IElement std::string getLogName() const override { return "shape"; } - inline core::matrix3x4SIMD getAbsoluteTransform() const + inline hlsl::float32_t3x4 getAbsoluteTransform() const { auto local = transform.matrix.extractSub3x4(); // TODO restore at some point (and make it actually work??) diff --git a/include/nbl/ext/MitsubaLoader/CElementTransform.h b/include/nbl/ext/MitsubaLoader/CElementTransform.h index d518f69e6c..88864f7365 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTransform.h +++ b/include/nbl/ext/MitsubaLoader/CElementTransform.h @@ -35,7 +35,7 @@ class CElementTransform : public IElement } */ - core::matrix4SIMD matrix; + hlsl::float32_t4x4 matrix; }; } diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h index e61ab3fa87..fd28d881db 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h @@ -28,7 +28,7 @@ class CMitsubaMaterialCompilerFrontend; //#include "nbl/builtin/glsl/ext/MitsubaLoader/instance_data_struct.glsl" #define uint uint32_t #define uvec2 uint64_t -#define mat4x3 nbl::core::matrix3x4SIMD +#define mat4x3 hlsl::float32_t3x4 #define nbl_glsl_MC_material_data_t asset::material_compiler::material_data_t struct nbl_glsl_ext_Mitsuba_Loader_instance_data_t { @@ -71,13 +71,13 @@ class CMitsubaLoader : public asset::IRenderpassIndependentPipelineLoader // core::vector getMesh(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape); - core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const core::matrix3x4SIMD& relTform); - SContext::shape_ass_type loadBasicShape(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const core::matrix3x4SIMD& relTform); + core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const hlsl::float32_t3x4& relTform); + SContext::shape_ass_type loadBasicShape(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const hlsl::float32_t3x4& relTform); void cacheTexture(SContext& ctx, uint32_t hierarchyLevel, const CElementTexture* texture, const CMitsubaMaterialCompilerFrontend::E_IMAGE_VIEW_SEMANTIC semantic); void cacheEmissionProfile(SContext& ctx, const CElementEmissionProfile* profile); - SContext::bsdf_type getBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf, const CElementEmitter* emitter, core::matrix4SIMD tform); + SContext::bsdf_type getBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf, const CElementEmitter* emitter, hlsl::float32_t4x4 tform); SContext::bsdf_type genBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf); template diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h b/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h index 42bad88655..8aaf9083fd 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h @@ -43,7 +43,7 @@ class CMitsubaMaterialCompilerFrontend explicit CMitsubaMaterialCompilerFrontend(const SContext* _ctx) : m_loaderContext(_ctx) {} front_and_back_t compileToIRTree(asset::material_compiler::IR* ir, const CElementBSDF* _bsdf); - EmitterNode* createEmitterNode(asset::material_compiler::IR* ir, const CElementEmitter* _emitter, core::matrix4SIMD transform); + EmitterNode* createEmitterNode(asset::material_compiler::IR* ir, const CElementEmitter* _emitter, hlsl::float32_t4x4 transform); private: using tex_ass_type = std::tuple,core::smart_refctd_ptr,float>; diff --git a/include/nbl/ext/MitsubaLoader/PropertyElement.h b/include/nbl/ext/MitsubaLoader/PropertyElement.h index ac257bd4b3..ce2acd967a 100644 --- a/include/nbl/ext/MitsubaLoader/PropertyElement.h +++ b/include/nbl/ext/MitsubaLoader/PropertyElement.h @@ -6,7 +6,6 @@ #define __PROPERTY_ELEMENT_H_INCLUDED__ #include "nbl/core/declarations.h" -#include "matrix4SIMD.h" #include namespace nbl @@ -202,7 +201,7 @@ struct SPropertyElementData bool bvalue; const char* svalue; core::vectorSIMDf vvalue; // rgb, srgb, vector, point - core::matrix4SIMD mvalue; // matrix, translate, rotate, scale, lookat + hlsl::float32_t4x4 mvalue; // matrix, translate, rotate, scale, lookat }; }; @@ -302,15 +301,15 @@ template<> struct SPropertyElementData::get_typename struct SPropertyElementData::get_typename { using type = void; }; template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; +{ using type = hlsl::float32_t4x4; }; template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; +{ using type = hlsl::float32_t4x4; }; template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; +{ using type = hlsl::float32_t4x4; }; template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; +{ using type = hlsl::float32_t4x4; }; template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; +{ using type = hlsl::float32_t4x4; }; template<> struct SPropertyElementData::get_typename { using type = void; }; @@ -321,7 +320,7 @@ class CPropertyElementManager static std::pair createPropertyData(const char* _el, const char** _atts); static bool retrieveBooleanValue(const std::string& _data, bool& success); - static core::matrix4SIMD retrieveMatrix(const std::string& _data, bool& success); + static hlsl::float32_t4x4 retrieveMatrix(const std::string& _data, bool& success); static core::vectorSIMDf retrieveVector(const std::string& _data, bool& success); static core::vectorSIMDf retrieveHex(const std::string& _data, bool& success); diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index 687f97054d..9777edf6f0 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -193,7 +193,7 @@ struct SContext struct SInstanceData { - SInstanceData(core::matrix3x4SIMD _tform, SContext::bsdf_type _bsdf, const std::string& _id, const CElementEmitter& _emitterFront, const CElementEmitter& _emitterBack) : + SInstanceData(hlsl::float32_t3x4 _tform, SContext::bsdf_type _bsdf, const std::string& _id, const CElementEmitter& _emitterFront, const CElementEmitter& _emitterBack) : tform(_tform), bsdf(_bsdf), #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) bsdf_id(_id), @@ -201,7 +201,7 @@ struct SContext emitter{_emitterFront, _emitterBack} {} - core::matrix3x4SIMD tform; + hlsl::float32_t3x4 tform; SContext::bsdf_type bsdf; #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) std::string bsdf_id; diff --git a/include/nbl/scene/ISkinInstanceCache.h b/include/nbl/scene/ISkinInstanceCache.h index 6cb18160d4..2eb83b4aac 100644 --- a/include/nbl/scene/ISkinInstanceCache.h +++ b/include/nbl/scene/ISkinInstanceCache.h @@ -19,7 +19,7 @@ class ISkinInstanceCache : public virtual core::IReferenceCounted // main pseudo-pool properties using joint_t = ITransformTree::node_t; - using skinning_matrix_t = core::matrix3x4SIMD; + using skinning_matrix_t = hlsl::float32_t3x4; using recomputed_stamp_t = ITransformTree::recomputed_stamp_t; using inverse_bind_pose_offset_t = uint32_t; @@ -35,7 +35,7 @@ class ISkinInstanceCache : public virtual core::IReferenceCounted static inline constexpr uint32_t inverse_bind_pose_offset_prop_ix = 3u; // for the inverse bind pose pool - using inverse_bind_pose_t = core::matrix3x4SIMD; + using inverse_bind_pose_t = hlsl::float32_t3x4; static inline constexpr uint32_t inverse_bind_pose_prop_ix = 0u; diff --git a/include/nbl/scene/ISkinInstanceCacheManager.h b/include/nbl/scene/ISkinInstanceCacheManager.h index 5a5e3f5881..474a8a3eaa 100644 --- a/include/nbl/scene/ISkinInstanceCacheManager.h +++ b/include/nbl/scene/ISkinInstanceCacheManager.h @@ -466,7 +466,7 @@ class ISkinInstanceCacheManager : public virtual core::IReferenceCounted } struct DebugPushConstants { - core::matrix4SIMD viewProjectionMatrix; + hlsl::float32_t4x4 viewProjectionMatrix; core::vector4df_SIMD lineColor; core::vector3df aabbColor; uint32_t skinCount; diff --git a/include/nbl/video/IGPUAccelerationStructure.h b/include/nbl/video/IGPUAccelerationStructure.h index 1bb4fb0c66..3c10a255a2 100644 --- a/include/nbl/video/IGPUAccelerationStructure.h +++ b/include/nbl/video/IGPUAccelerationStructure.h @@ -272,7 +272,7 @@ class IGPUBottomLevelAccelerationStructure : public asset::IBottomLevelAccelerat // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresIndirectKHR-pInfos-03809 // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresIndirectKHR-pInfos-03810 // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkBuildAccelerationStructuresKHR-pInfos-03773 - if (Base::invalidInputBuffer(geometry.transform,buildRangeInfo.transformByteOffset,1u,sizeof(core::matrix3x4SIMD),sizeof(core::vectorSIMDf))) + if (Base::invalidInputBuffer(geometry.transform,buildRangeInfo.transformByteOffset,1u,sizeof(hlsl::float32_t3x4),sizeof(core::vectorSIMDf))) return false; } else @@ -622,7 +622,7 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr inline PolymorphicInstance(const PolymorphicInstance&) = default; inline PolymorphicInstance(PolymorphicInstance&&) = default; - // I made all these assignment operators because of the `core::matrix3x4SIMD` alignment and keeping `type` correct at all times + // I made all these assignment operators because of the `hlsl::float32_t3x4` alignment and keeping `type` correct at all times inline PolymorphicInstance& operator=(const StaticInstance& _static) { type = INSTANCE_TYPE::STATIC; @@ -657,7 +657,7 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr static_assert(std::is_same_v,uint32_t>); // these must be 0 as per vulkan spec uint32_t reservedMotionFlags = 0u; - // I don't do an actual union because the preceeding members don't play nicely with alignment of `core::matrix3x4SIMD` and Vulkan requires this struct to be packed + // I don't do an actual union because the preceeding members don't play nicely with alignment of `hlsl::float32_t3x4` and Vulkan requires this struct to be packed SRTMotionInstance largestUnionMember = {}; static_assert(alignof(SRTMotionInstance)==8ull); diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index e8798499f9..9ad14818ab 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -158,6 +158,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754/impl.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/array_accessors.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/vector_utils/vector_traits.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/matrix_traits.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/transformation_matrix_utils.hlsl") #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") @@ -179,6 +180,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/unroll.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") @@ -228,6 +230,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/angle_adding.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") +#quaternions +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternion/quaternion.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternion/quaternion_impl.hlsl") #extra math LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/impl.hlsl") From bec99ef719188dccf8ba8cc82672b32ff23434c3 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 21 Nov 2025 13:04:03 +0100 Subject: [PATCH 123/472] Added projection and view matrix construction tools --- .../nbl/builtin/hlsl/camera/view_matrix.hlsl | 51 ++++++++++++ .../builtin/hlsl/projection/projection.hlsl | 81 +++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 3 files changed, 133 insertions(+) create mode 100644 include/nbl/builtin/hlsl/camera/view_matrix.hlsl create mode 100644 include/nbl/builtin/hlsl/projection/projection.hlsl diff --git a/include/nbl/builtin/hlsl/camera/view_matrix.hlsl b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl new file mode 100644 index 0000000000..27b2c63239 --- /dev/null +++ b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl @@ -0,0 +1,51 @@ +#ifndef _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +// /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) +template +inline matrix buildCameraLookAtMatrixLH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = hlsl::normalize(target - position); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +template +inline matrix buildCameraLookAtMatrixRH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = hlsl::normalize(position - target); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/projection/projection.hlsl b/include/nbl/builtin/hlsl/projection/projection.hlsl new file mode 100644 index 0000000000..22d2872fde --- /dev/null +++ b/include/nbl/builtin/hlsl/projection/projection.hlsl @@ -0,0 +1,81 @@ +#ifndef _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +// TODO: use glm instead for c++ +template +inline matrix buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +{ + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, -1.f, 0.f); + + return m; +} +template +inline matrix buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +{ + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 1.f, 0.f); + + return m; +} + +template +inline matrix buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +{ + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); + + return m; +} + +template +inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +{ + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); + + return m; +} + +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 9ad14818ab..467ed35ccd 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -159,6 +159,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/array_accessors.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/vector_utils/vector_traits.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/matrix_traits.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/transformation_matrix_utils.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/projection/projection.hlsl") #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") From def7d2e8480170468bcf46f75b3723ed72ae49c0 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 21 Nov 2025 15:29:52 +0100 Subject: [PATCH 124/472] Added more hlsl vector utils --- examples_tests | 2 +- .../nbl/builtin/hlsl/camera/view_matrix.hlsl | 4 +-- .../builtin/hlsl/projection/projection.hlsl | 4 +-- .../hlsl/vector_utils/vector_utils.hlsl | 21 +++++++++++ include/vectorSIMD.h | 36 +++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 6 files changed, 63 insertions(+), 5 deletions(-) create mode 100644 include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl diff --git a/examples_tests b/examples_tests index 829ea34183..16f06ed439 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 829ea34183a0a62a3bd68ded4dd9e451b97126d4 +Subproject commit 16f06ed43909092d43dbe2ea30f10aa1b8e7e5a1 diff --git a/include/nbl/builtin/hlsl/camera/view_matrix.hlsl b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl index 27b2c63239..7752d9b6eb 100644 --- a/include/nbl/builtin/hlsl/camera/view_matrix.hlsl +++ b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl @@ -1,5 +1,5 @@ -#ifndef _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ -#define _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_CAMERA_VIEW_MATRIX_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CAMERA_VIEW_MATRIX_INCLUDED_ #include diff --git a/include/nbl/builtin/hlsl/projection/projection.hlsl b/include/nbl/builtin/hlsl/projection/projection.hlsl index 22d2872fde..caff793083 100644 --- a/include/nbl/builtin/hlsl/projection/projection.hlsl +++ b/include/nbl/builtin/hlsl/projection/projection.hlsl @@ -1,5 +1,5 @@ -#ifndef _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ -#define _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_PROJECTION_PROJECTION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PROJECTION_PROJECTION_INCLUDED_ #include diff --git a/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl b/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl new file mode 100644 index 0000000000..e1fa9dd3a0 --- /dev/null +++ b/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl @@ -0,0 +1,21 @@ +#ifndef _NBL_BUILTIN_HLSL_VECTOR_UTILS_VECTOR_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_VECTOR_UTILS_VECTOR_UTILS_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +// TODO: why cant I NBL_CONST_REF_ARG(vector) +template +inline T lengthsquared(vector vec) +{ + return dot(vec, vec); +} + +} +} + +#endif \ No newline at end of file diff --git a/include/vectorSIMD.h b/include/vectorSIMD.h index 9b09f95c97..6144dc446f 100644 --- a/include/vectorSIMD.h +++ b/include/vectorSIMD.h @@ -887,6 +887,42 @@ namespace core } }; + // temporary solution until vectorSIMD gets deleted + inline hlsl::float32_t4 convertToHLSLVector(const vectorSIMDf& vec) + { + hlsl::float32_t4 retval; + retval.x = vec.x; + retval.y = vec.y; + retval.z = vec.z; + retval.w = vec.w; + + return retval; + } + + // temporary solution until vectorSIMD gets deleted + inline vectorSIMDf constructVecorSIMDFromHLSLVector(const hlsl::float32_t4& vec) + { + vectorSIMDf retval; + retval.x = vec.x; + retval.y = vec.y; + retval.z = vec.z; + retval.w = vec.w; + + return retval; + } + + // temporary solution until vectorSIMD gets deleted + inline vectorSIMDf constructVecorSIMDFromHLSLVector(const hlsl::float32_t3& vec) + { + vectorSIMDf retval; + retval.x = vec.x; + retval.y = vec.y; + retval.z = vec.z; + retval.w = 0.0f; + + return retval; + } + } // end namespace core } // end namespace nbl diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 467ed35ccd..b0c8a14d2f 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -157,6 +157,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754/impl.hlsl") # utility LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/array_accessors.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/vector_utils/vector_traits.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/vector_utils/vector_utils.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/matrix_traits.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/transformation_matrix_utils.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/projection/projection.hlsl") From e320ed8f8f620d1c133b14edf8ec7b96bfb39956 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 22 Nov 2025 12:30:57 +0700 Subject: [PATCH 125/472] Make morton compile --- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 4512774b14..696124ae0c 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -118,7 +118,7 @@ struct Transcoder NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { left_shift_operator > leftShift; - portable_vector_t interleaved = truncate >(decodedValue) & coding_mask_v; + portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ {\ From 80d4479b1b9c01d8fd8a213dd1575f7203278d41 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 24 Nov 2025 16:50:45 +0100 Subject: [PATCH 126/472] a few updates due to WAVE, ambiguity and DXC bugs, new ImGui/builtin/hlsl/pc.hlsl to allow me precompile unified uber shader, a little formatting, update examples_tests submodule --- examples_tests | 2 +- include/nbl/builtin/hlsl/ies/profile.hlsl | 2 +- include/nbl/builtin/hlsl/ies/sampler.hlsl | 9 +- include/nbl/builtin/hlsl/math/octahedral.hlsl | 6 +- .../nbl/ext/ImGui/builtin/hlsl/fragment.hlsl | 4 +- include/nbl/ext/ImGui/builtin/hlsl/pc.hlsl | 8 ++ .../nbl/ext/ImGui/builtin/hlsl/vertex.hlsl | 4 +- src/nbl/asset/utils/CIESProfile.h | 121 +++++++++--------- src/nbl/ext/ImGui/CMakeLists.txt | 7 +- 9 files changed, 84 insertions(+), 79 deletions(-) create mode 100644 include/nbl/ext/ImGui/builtin/hlsl/pc.hlsl diff --git a/examples_tests b/examples_tests index 4cd5f027ea..65e0126cac 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4cd5f027eabdf88f84e16d47f8fdc6acdd1d36b4 +Subproject commit 65e0126cac320c10efe34ebbc144da45d587a54e diff --git a/include/nbl/builtin/hlsl/ies/profile.hlsl b/include/nbl/builtin/hlsl/ies/profile.hlsl index c4b12b3211..88e212f069 100644 --- a/include/nbl/builtin/hlsl/ies/profile.hlsl +++ b/include/nbl/builtin/hlsl/ies/profile.hlsl @@ -63,4 +63,4 @@ struct ProfileProperties } } -#endif // _NBL_BUILTIN_HLSL_IES_PROFILE_INCLUDED_ \ No newline at end of file +#endif // _NBL_BUILTIN_HLSL_IES_PROFILE_INCLUDED_ diff --git a/include/nbl/builtin/hlsl/ies/sampler.hlsl b/include/nbl/builtin/hlsl/ies/sampler.hlsl index e38bc53551..3f518ff21a 100644 --- a/include/nbl/builtin/hlsl/ies/sampler.hlsl +++ b/include/nbl/builtin/hlsl/ies/sampler.hlsl @@ -61,7 +61,8 @@ struct CandelaSampler static value_t sample(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(math::Polar) polar) { - const symmetry_t symmetry = accessor.symmetry(); + // TODO: DXC seems to have a bug and cannot use symmetry_t directly with == operator https://godbolt.devsh.eu/z/P9Kc5x + const ProfileProperties::LuminairePlanesSymmetry symmetry = accessor.symmetry(); const float32_t vAngle = degrees(polar.theta); const float32_t hAngle = degrees(wrapPhi(polar.phi, symmetry)); @@ -71,8 +72,8 @@ struct CandelaSampler const uint32_t j0 = getVLB(accessor, vAngle); const uint32_t j1 = getVUB(accessor, vAngle); - const uint32_t i0 = (symmetry == symmetry_t::ISOTROPIC) ? 0u : getHLB(accessor, hAngle); - const uint32_t i1 = (symmetry == symmetry_t::ISOTROPIC) ? 0u : getHUB(accessor, hAngle); + const uint32_t i0 = (symmetry == ProfileProperties::LuminairePlanesSymmetry::ISOTROPIC) ? 0u : getHLB(accessor, hAngle); + const uint32_t i1 = (symmetry == ProfileProperties::LuminairePlanesSymmetry::ISOTROPIC) ? 0u : getHUB(accessor, hAngle); const float32_t uReciprocal = ((i1 == i0) ? 1.f : 1.f / (accessor.hAngle(i1) - accessor.hAngle(i0))); const float32_t vReciprocal = ((j1 == j0) ? 1.f : 1.f / (accessor.vAngle(j1) - accessor.vAngle(j0))); @@ -156,4 +157,4 @@ struct CandelaSampler } } -#endif // _NBL_BUILTIN_HLSL_IES_SAMPLER_INCLUDED_ \ No newline at end of file +#endif // _NBL_BUILTIN_HLSL_IES_SAMPLER_INCLUDED_ diff --git a/include/nbl/builtin/hlsl/math/octahedral.hlsl b/include/nbl/builtin/hlsl/math/octahedral.hlsl index 8e32d60cb0..45fe35b2d8 100644 --- a/include/nbl/builtin/hlsl/math/octahedral.hlsl +++ b/include/nbl/builtin/hlsl/math/octahedral.hlsl @@ -43,8 +43,8 @@ struct OctahedralTransform // F^-1 : S^2 -> [-1, 1]^2 static vector2_type dirToNDC(NBL_CONST_REF_ARG(vector3_type) d) { - scalar_type dir = hlsl::normalize(d); - const scalar_type sum = hlsl::dot(vector3_type(scalar_type(1), scalar_type(1), scalar_type(1)), abs(dir)); + vector3_type dir = hlsl::normalize(d); + const scalar_type sum = dot(vector3_type(scalar_type(1), scalar_type(1), scalar_type(1)), abs(dir)); vector3_type s = dir / sum; if (s.z < scalar_type(0)) @@ -72,4 +72,4 @@ struct OctahedralTransform } } -#endif // _NBL_BUILTIN_HLSL_MATH_OCTAHEDRAL_INCLUDED_ \ No newline at end of file +#endif // _NBL_BUILTIN_HLSL_MATH_OCTAHEDRAL_INCLUDED_ diff --git a/include/nbl/ext/ImGui/builtin/hlsl/fragment.hlsl b/include/nbl/ext/ImGui/builtin/hlsl/fragment.hlsl index 44ef6a0cb1..a0e70dfebd 100644 --- a/include/nbl/ext/ImGui/builtin/hlsl/fragment.hlsl +++ b/include/nbl/ext/ImGui/builtin/hlsl/fragment.hlsl @@ -22,13 +22,11 @@ #error "NBL_SAMPLERS_COUNT must be defined!" #endif -#include "common.hlsl" +#include "pc.hlsl" #include "psinput.hlsl" using namespace nbl::ext::imgui; -[[vk::push_constant]] struct PushConstants pc; - // separable image samplers to handle textures we do descriptor-index [[vk::binding(NBL_TEXTURES_BINDING_IX, NBL_TEXTURES_SET_IX)]] Texture2D textures[NBL_TEXTURES_COUNT]; [[vk::binding(NBL_SAMPLER_STATES_BINDING_IX, NBL_SAMPLER_STATES_SET_IX)]] SamplerState samplerStates[NBL_SAMPLERS_COUNT]; diff --git a/include/nbl/ext/ImGui/builtin/hlsl/pc.hlsl b/include/nbl/ext/ImGui/builtin/hlsl/pc.hlsl new file mode 100644 index 0000000000..103ce5b6c1 --- /dev/null +++ b/include/nbl/ext/ImGui/builtin/hlsl/pc.hlsl @@ -0,0 +1,8 @@ +#ifndef _NBL_IMGUI_EXT_PC_HLSL_ +#define _NBL_IMGUI_EXT_PC_HLSL_ + +// TODO: have only unified.hlsl uber shader and common.hlsl then update imgui cpp files, doing a quick workaround for my prebuilds +#include "common.hlsl" +[[vk::push_constant]] struct nbl::ext::imgui::PushConstants pc; + +#endif // _NBL_IMGUI_EXT_PC_HLSL_ diff --git a/include/nbl/ext/ImGui/builtin/hlsl/vertex.hlsl b/include/nbl/ext/ImGui/builtin/hlsl/vertex.hlsl index 78dbe10ac7..ef4582146e 100644 --- a/include/nbl/ext/ImGui/builtin/hlsl/vertex.hlsl +++ b/include/nbl/ext/ImGui/builtin/hlsl/vertex.hlsl @@ -1,10 +1,8 @@ -#include "common.hlsl" +#include "pc.hlsl" #include "psinput.hlsl" using namespace nbl::ext::imgui; -[[vk::push_constant]] struct PushConstants pc; - struct VSInput { [[vk::location(0)]] float2 position : POSITION; diff --git a/src/nbl/asset/utils/CIESProfile.h b/src/nbl/asset/utils/CIESProfile.h index a165b0ae49..c09f2fd760 100644 --- a/src/nbl/asset/utils/CIESProfile.h +++ b/src/nbl/asset/utils/CIESProfile.h @@ -7,82 +7,81 @@ #include "nbl/asset/metadata/CIESProfileMetadata.h" #include "nbl/builtin/hlsl/ies/sampler.hlsl" -#include namespace nbl { - namespace asset - { - class CIESProfile +namespace asset +{ +class CIESProfile +{ + public: + struct properties_t : public nbl::hlsl::ies::ProfileProperties { - public: - struct properties_t : public nbl::hlsl::ies::ProfileProperties - { - NBL_CONSTEXPR_STATIC_INLINE auto IES_TEXTURE_STORAGE_FORMAT = asset::EF_R16_UNORM; - hlsl::uint32_t2 optimalIESResolution; //! Optimal resolution for IES CDC texture - }; + NBL_CONSTEXPR_STATIC_INLINE auto IES_TEXTURE_STORAGE_FORMAT = asset::EF_R16_UNORM; + hlsl::uint32_t2 optimalIESResolution; //! Optimal resolution for IES CDC texture + }; - struct accessor_t - { - using key_t = uint32_t; - using key_t2 = hlsl::uint32_t2; - using value_t = hlsl::float32_t; + struct accessor_t + { + using key_t = uint32_t; + using key_t2 = hlsl::uint32_t2; + using value_t = hlsl::float32_t; - accessor_t() = default; - accessor_t(const key_t2& resolution, const properties_t& props) : hAngles(resolution.x), vAngles(resolution.y), data(resolution.x * resolution.y), properties(props) {} - ~accessor_t() = default; + accessor_t() = default; + accessor_t(const key_t2& resolution, const properties_t& props) : hAngles(resolution.x), vAngles(resolution.y), data(resolution.x * resolution.y), properties(props) {} + ~accessor_t() = default; - template) - inline value_t vAngle(T j) const { return (value_t)vAngles[j]; } + template) + inline value_t vAngle(T j) const { return (value_t)vAngles[j]; } - template) - inline value_t hAngle(T i) const { return (value_t)hAngles[i]; } + template) + inline value_t hAngle(T i) const { return (value_t)hAngles[i]; } - template) - inline value_t value(T ij) const { return (value_t)data[vAnglesCount() * ij.x + ij.y]; } + template) + inline value_t value(T ij) const { return (value_t)data[vAnglesCount() * ij.x + ij.y]; } - template) - inline void setValue(T ij, value_t val) { data[vAnglesCount() * ij.x + ij.y] = val; } + template) + inline void setValue(T ij, value_t val) { data[vAnglesCount() * ij.x + ij.y] = val; } - inline key_t vAnglesCount() const { return (key_t)vAngles.size(); } - inline key_t hAnglesCount() const { return (key_t)hAngles.size(); } - inline properties_t::LuminairePlanesSymmetry symmetry() const { return properties.symmetry; } + inline key_t vAnglesCount() const { return (key_t)vAngles.size(); } + inline key_t hAnglesCount() const { return (key_t)hAngles.size(); } + inline properties_t::LuminairePlanesSymmetry symmetry() const { return properties.symmetry; } - core::vector hAngles; //! The angular displacement indegreesfrom straight down, a value represents spherical coordinate "theta" with physics convention. Note that if symmetry is OTHER_HALF_SYMMETRIC then real horizontal angle provided by IES data is (hAngles[index] + 90) - the reason behind it is we patch 1995 IES OTHER_HALF_SYMETRIC case to be HALF_SYMETRIC - core::vector vAngles; //! Measurements in degrees of angular displacement measured counterclockwise in a horizontal plane for Type C photometry and clockwise for Type A and B photometry, a value represents spherical coordinate "phi" with physics convention - core::vector data; //! Candela scalar values - properties_t properties; //! Profile properties - }; + core::vector hAngles; //! The angular displacement indegreesfrom straight down, a value represents spherical coordinate "theta" with physics convention. Note that if symmetry is OTHER_HALF_SYMMETRIC then real horizontal angle provided by IES data is (hAngles[index] + 90) - the reason behind it is we patch 1995 IES OTHER_HALF_SYMETRIC case to be HALF_SYMETRIC + core::vector vAngles; //! Measurements in degrees of angular displacement measured counterclockwise in a horizontal plane for Type C photometry and clockwise for Type A and B photometry, a value represents spherical coordinate "phi" with physics convention + core::vector data; //! Candela scalar values + properties_t properties; //! Profile properties + }; - using sampler_t = nbl::hlsl::ies::CandelaSampler; + using sampler_t = nbl::hlsl::ies::CandelaSampler; - CIESProfile() = default; - ~CIESProfile() = default; + CIESProfile() = default; + ~CIESProfile() = default; - inline const accessor_t& getAccessor() const { return accessor; } - - inline hlsl::float32_t getAvgEmmision(const bool fullDomain=false) const - { - if (fullDomain) - { - const float cosLo = std::cos(core::radians(accessor.vAngles.front())); - const float cosHi = std::cos(core::radians(accessor.vAngles.back())); - const float dsinTheta = cosLo - cosHi; - return accessor.properties.totalEmissionIntegral*(0.5/core::PI())/dsinTheta; - } - return accessor.properties.avgEmmision; - } - - template - core::smart_refctd_ptr createIESTexture(ExecutionPolicy&& policy, const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = properties_t::CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = properties_t::CDC_DEFAULT_TEXTURE_HEIGHT) const; - core::smart_refctd_ptr createIESTexture(const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = properties_t::CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = properties_t::CDC_DEFAULT_TEXTURE_HEIGHT) const; - - private: - CIESProfile(const properties_t& props, const hlsl::uint32_t2& resolution) : accessor(resolution, props) {} - accessor_t accessor; - friend class CIESProfileParser; - }; - } + inline const accessor_t& getAccessor() const { return accessor; } + + inline hlsl::float32_t getAvgEmmision(const bool fullDomain=false) const + { + if (fullDomain) + { + const float cosLo = std::cos(core::radians(accessor.vAngles.front())); + const float cosHi = std::cos(core::radians(accessor.vAngles.back())); + const float dsinTheta = cosLo - cosHi; + return accessor.properties.totalEmissionIntegral*(0.5/core::PI())/dsinTheta; + } + return accessor.properties.avgEmmision; + } + + template + core::smart_refctd_ptr createIESTexture(ExecutionPolicy&& policy, const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = properties_t::CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = properties_t::CDC_DEFAULT_TEXTURE_HEIGHT) const; + core::smart_refctd_ptr createIESTexture(const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = properties_t::CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = properties_t::CDC_DEFAULT_TEXTURE_HEIGHT) const; + + private: + CIESProfile(const properties_t& props, const hlsl::uint32_t2& resolution) : accessor(resolution, props) {} + accessor_t accessor; + friend class CIESProfileParser; +}; +} } #endif // __NBL_ASSET_C_IES_PROFILE_H_INCLUDED__ \ No newline at end of file diff --git a/src/nbl/ext/ImGui/CMakeLists.txt b/src/nbl/ext/ImGui/CMakeLists.txt index 2c339b2b00..e46d93b952 100644 --- a/src/nbl/ext/ImGui/CMakeLists.txt +++ b/src/nbl/ext/ImGui/CMakeLists.txt @@ -33,13 +33,14 @@ target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ABSOLUTE_ENTRY_PATH_="${ target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ENTRY_KEY_="${_ARCHIVE_ENTRY_KEY_}") if(NBL_EMBED_BUILTIN_RESOURCES) - # (*) -> I wish we could just take NSC, offline-precompile to SPIRV, embed into builtin resource library (as we did!) but then be smart & adjust at runtime OpDecorate of our resources according to wishes - unfortunately no linker yet we have and we are not going to make one ourselves so we compile imgui shaders at runtime set(_BR_TARGET_ extImguibuiltinResourceData) + # TODO: one uber shader and common.hlsl LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "common.hlsl") + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "pc.hlsl") LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "psinput.hlsl") - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "vertex.hlsl") # (*) -> this we could precompile [no resources for which set/binding Ixs could be adjusted] but I'm not going to mix stuff - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "fragment.hlsl") # (*) -> but this we could not since we let users to provide external descriptor set layout + ImGUI textures & sampler state set/binding Ixs (for pipeline layout) at runtime + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "vertex.hlsl") + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "fragment.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}" "${_ARCHIVE_ENTRY_KEY_}" "nbl::ext::imgui::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${LIB_NAME} ${_BR_TARGET_}) From 0e81eff6b0ce2eeab41fb05fd452f822ee9cd00d Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 24 Nov 2025 17:20:35 +0100 Subject: [PATCH 127/472] Fixed quaternion bug --- examples_tests | 2 +- include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 16f06ed439..1a9b50718a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 16f06ed43909092d43dbe2ea30f10aa1b8e7e5a1 +Subproject commit 1a9b50718aafe8a53229e1f5aa231b64441ac8f4 diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl index bc0286e778..9e42747b28 100644 --- a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl @@ -68,7 +68,10 @@ struct quaternion const float spsy = sp * sy; quaternion output; - output.data = float32_t4(sr, cr, cr, cr) * float32_t4(cpcy, spcy, cpsy, cpcy) + float32_t4(-cr, sr, -sr, sr) * float32_t4(spsy, cpsy, spcy, spsy); + output.data[3] = cr * cp * cy + sr * sp * sy; // w + output.data[0] = cr * sp * cy + sr * cp * sy; // x + output.data[1] = cr * cp * sy - sr * sp * cy; // y + output.data[2] = sr * cp * cy - cr * sp * sy; // z return output; } From 1a25fc0800786ad2a55201f97e1ee384db6fcf66 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 25 Nov 2025 09:54:15 +0100 Subject: [PATCH 128/472] fix another ambiguity, finally my unified IES shader compiled --- include/nbl/builtin/hlsl/surface_transform.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/surface_transform.h b/include/nbl/builtin/hlsl/surface_transform.h index 2d7afa7add..bfe2f2cbba 100644 --- a/include/nbl/builtin/hlsl/surface_transform.h +++ b/include/nbl/builtin/hlsl/surface_transform.h @@ -174,8 +174,7 @@ inline float32_t2 applyToNDC(const FLAG_BITS transform, const float32_t2 ndc) template TwoColumns applyToDerivatives(const FLAG_BITS transform, TwoColumns dDx_dDy) { - using namespace glsl; // IN HLSL mode, C++ doens't need this to access `inverse` - return mul(inverse(transformMatrix(transform)),dDx_dDy); + return mul(glsl::inverse(transformMatrix(transform)),dDx_dDy); } } From 83da9c59c8c4ae0584872f99f1ce95c3abea5dab Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 25 Nov 2025 10:40:47 +0100 Subject: [PATCH 129/472] ah I broke nabla build, this one fixes the ambiguity and compiles for both cpp and hlsl --- include/nbl/builtin/hlsl/surface_transform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/surface_transform.h b/include/nbl/builtin/hlsl/surface_transform.h index bfe2f2cbba..0b93434fe0 100644 --- a/include/nbl/builtin/hlsl/surface_transform.h +++ b/include/nbl/builtin/hlsl/surface_transform.h @@ -174,7 +174,7 @@ inline float32_t2 applyToNDC(const FLAG_BITS transform, const float32_t2 ndc) template TwoColumns applyToDerivatives(const FLAG_BITS transform, TwoColumns dDx_dDy) { - return mul(glsl::inverse(transformMatrix(transform)),dDx_dDy); + return mul(inverse(transformMatrix(transform)),dDx_dDy); } } From 7c5b658d310cd0a6a0357a5450bb4e157cd2cf2a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 25 Nov 2025 11:17:25 +0100 Subject: [PATCH 130/472] add source groups, include HLSL files in solution --- src/nbl/CMakeLists.txt | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 5ab4061eb8..3ebecfbb28 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -103,7 +103,7 @@ if (NBL_COMPILE_WITH_CUDA) endif() endif() -# => TODO: clean +# => TODO: clean! # a little bit of globbing for headers never hurt anyone file(GLOB_RECURSE TEMP_GLOB_RES "${NBL_ROOT_PATH}/include/*.h") set(NABLA_HEADERS_PUBLIC ${NABLA_HEADERS_PUBLIC} ${TEMP_GLOB_RES}) @@ -112,6 +112,9 @@ set(NABLA_HEADERS_PUBLIC ${NABLA_HEADERS_PUBLIC} ${TEMP_GLOB_RES}) file(GLOB_RECURSE TEMP_GLOB_RES "${NBL_ROOT_PATH}/src/*.h") set(NABLA_HEADERS_PUBLIC2 ${NABLA_HEADERS_PUBLIC2} ${TEMP_GLOB_RES}) # TODO: we should have moved all headers from src directory to /include, but we need it for install target +file(GLOB_RECURSE NABLA_HLSL_PUBLIC "${NBL_ROOT_PATH}/include/*.hlsl") +list(APPEND NABLA_HEADERS_PUBLIC ${NABLA_HLSL_PUBLIC}) + foreach(NBL_CURRENT_HEADER IN LISTS NABLA_HEADERS_PUBLIC2) cmake_path(GET NBL_CURRENT_HEADER PARENT_PATH NBL_CURRENT_HEADER_DIR) file(RELATIVE_PATH NBL_TMP_REL_DESTINATION "${NBL_ROOT_PATH}/src" ${NBL_CURRENT_HEADER_DIR}) @@ -355,6 +358,7 @@ else() ) endif() add_library(Nabla::Nabla ALIAS Nabla) +set_source_files_properties(${NABLA_HLSL_PUBLIC} PROPERTIES HEADER_FILE_ONLY ON) # from old build config template: # @@ -851,4 +855,29 @@ target_compile_definitions(Nabla INTERFACE NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY="${_NBL_DXC_PACKAGE_RUNTIME_DLL_DIR_PATH_}" ) -NBL_ADJUST_FOLDERS(src) \ No newline at end of file +NBL_ADJUST_FOLDERS(src) +function(EXTRACT_BY_REGEX OUT REGEX) + set(tmp "${ARGN}") + list(FILTER tmp INCLUDE REGEX "${REGEX}") + set(${OUT} "${tmp}" PARENT_SCOPE) +endfunction() + +get_target_property(SRC Nabla SOURCES) +EXTRACT_BY_REGEX(NABLA_HLSL_FILES ".*\\.hlsl$" ${SRC}) +EXTRACT_BY_REGEX(NABLA_HEADER_FILES ".*\\.(h|hpp|tcc)$" ${SRC}) +EXTRACT_BY_REGEX(NABLA_SOURCE_FILES ".*\\.(c|cpp)$" ${SRC}) + +source_group(TREE "${NBL_ROOT_PATH}" + PREFIX "HLSL Files" + FILES ${NABLA_HLSL_FILES} +) + +source_group(TREE "${NBL_ROOT_PATH}" + PREFIX "Header Files" + FILES ${NABLA_HEADER_FILES} +) + +source_group(TREE "${NBL_ROOT_PATH}" + PREFIX "Source Files" + FILES ${NABLA_SOURCE_FILES} +) \ No newline at end of file From 67518feab7f409c3aa2bcb6ccd0058f727a531d8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 25 Nov 2025 12:33:32 +0100 Subject: [PATCH 131/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 65e0126cac..e00a669c1f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 65e0126cac320c10efe34ebbc144da45d587a54e +Subproject commit e00a669c1f9914a850f13f8a0659edccb6350831 From 1ed5d82a43fbf77b060f36de2672f079ba66254d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 25 Nov 2025 15:42:46 +0100 Subject: [PATCH 132/472] add shadertoolsconfig.json, include in source group, leave some comments after my R&D --- include/shadertoolsconfig.json | 8 ++++++++ src/nbl/CMakeLists.txt | 14 ++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 include/shadertoolsconfig.json diff --git a/include/shadertoolsconfig.json b/include/shadertoolsconfig.json new file mode 100644 index 0000000000..4c16b1f743 --- /dev/null +++ b/include/shadertoolsconfig.json @@ -0,0 +1,8 @@ +{ + "root": true, + "hlsl.preprocessorDefinitions": { + }, + "hlsl.additionalIncludeDirectories": [ + "." + ] +} \ No newline at end of file diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 3ebecfbb28..3b9fd0d3fd 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -880,4 +880,18 @@ source_group(TREE "${NBL_ROOT_PATH}" source_group(TREE "${NBL_ROOT_PATH}" PREFIX "Source Files" FILES ${NABLA_SOURCE_FILES} +) + +# we want HLSL intelisense with all fancy features +# https://marketplace.visualstudio.com/items?itemName=TimGJones.HLSLToolsforVisualStudio +# but intellisense doesn't work in VS2026 with the ext even tho I seem to have correct config, +# there is syntax highlighting however it cannot resolve any #include file and +# in VS2022 this ext literally breaks the IDE making it unresponsive. +# One could not use it at all and rely on C++ intellisense for .hlsl files, +# but then I found I must right click in a .hlsl file -> Rescan -> Rescan File +# to resolve include files, "Rescan solution" doesn't work +target_sources(Nabla PRIVATE "${NBL_ROOT_PATH}/include/shadertoolsconfig.json") +source_group(TREE "${NBL_ROOT_PATH}" + PREFIX "HLSL Tools" + FILES "${NBL_ROOT_PATH}/include/shadertoolsconfig.json" ) \ No newline at end of file From edee6547e2c5e5e1b1d74be25b0c5a18762e290a Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 25 Nov 2025 16:34:23 +0100 Subject: [PATCH 133/472] Fixed projection and quaternion --- examples_tests | 2 +- .../hlsl/math/quaternion/quaternion.hlsl | 27 ++++----- .../transformation_matrix_utils.hlsl | 12 +++- .../builtin/hlsl/projection/projection.hlsl | 60 +++++++++---------- 4 files changed, 52 insertions(+), 49 deletions(-) diff --git a/examples_tests b/examples_tests index 1a9b50718a..c256c8dd59 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1a9b50718aafe8a53229e1f5aa231b64441ac8f4 +Subproject commit c256c8dd5984036d35af7a615eb27d9454eda431 diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl index 9e42747b28..aba9ebbd57 100644 --- a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl @@ -48,30 +48,23 @@ struct quaternion static inline quaternion create(float_t pitch, float_t yaw, float_t roll) { - float angle; + const float rollDiv2 = roll * 0.5f; + const float sr = sinf(rollDiv2); + const float cr = cosf(rollDiv2); - angle = roll * 0.5f; - const float sr = sinf(angle); - const float cr = cosf(angle); + const float pitchDiv2 = pitch * 0.5f; + const float sp = sinf(pitchDiv2); + const float cp = cosf(pitchDiv2); - angle = pitch * 0.5f; - const float sp = sinf(angle); - const float cp = cos(angle); - - angle = yaw * 0.5f; - const float sy = sinf(angle); - const float cy = cosf(angle); - - const float cpcy = cp * cy; - const float spcy = sp * cy; - const float cpsy = cp * sy; - const float spsy = sp * sy; + const float yawDiv2 = yaw * 0.5f; + const float sy = sinf(yawDiv2); + const float cy = cosf(yawDiv2); quaternion output; - output.data[3] = cr * cp * cy + sr * sp * sy; // w output.data[0] = cr * sp * cy + sr * cp * sy; // x output.data[1] = cr * cp * sy - sr * sp * cy; // y output.data[2] = sr * cp * cy - cr * sp * sy; // z + output.data[3] = cr * cp * cy + sr * sp * sy; // w return output; } diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index d1a628ccc0..7d5fc74ee2 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -67,6 +67,16 @@ inline matrix getMatrix3x4As4x4(NBL_CONST_REF_ARG(matrix) mat) return output; } +template +inline matrix extractSub3x4From4x4Matrix(NBL_CONST_REF_ARG(matrix) mat) +{ + matrix output; + for (int i = 0; i < 3; ++i) + output[i] = mat[i]; + + return output; +} + template inline matrix getSub3x3(NBL_CONST_REF_ARG(matrix) mat) { @@ -135,7 +145,7 @@ inline bool getSub3x3InverseTranspose(NBL_CONST_REF_ARG(matrix) matIn, // TODO: use portable_float when merged //! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) template -inline matrix concatenateBFollowedByA(NBL_CONST_REF_ARG(matrix) a, NBL_CONST_REF_ARG(const matrix) b) +inline matrix concatenateBFollowedByA(NBL_CONST_REF_ARG(matrix) a, NBL_CONST_REF_ARG(matrix) b) { // TODO // static_assert(N == 3 || N == 4); diff --git a/include/nbl/builtin/hlsl/projection/projection.hlsl b/include/nbl/builtin/hlsl/projection/projection.hlsl index caff793083..b667d6d9b3 100644 --- a/include/nbl/builtin/hlsl/projection/projection.hlsl +++ b/include/nbl/builtin/hlsl/projection/projection.hlsl @@ -8,69 +8,69 @@ namespace nbl namespace hlsl { // TODO: use glm instead for c++ -template -inline matrix buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +template +inline matrix buildProjectionMatrixPerspectiveFovRH(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) { - const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + const FloatingPoint h = core::reciprocal(tan(fieldOfViewRadians * 0.5f)); _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero const float w = h / aspectRatio; _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - matrix m; - m[0] = vector(w, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -h, 0.f, 0.f); - m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); - m[3] = vector(0.f, 0.f, -1.f, 0.f); + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, -1.f, 0.f); return m; } -template -inline matrix buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +template +inline matrix buildProjectionMatrixPerspectiveFovLH(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) { - const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + const FloatingPoint h = core::reciprocal(tan(fieldOfViewRadians * 0.5f)); _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero const float w = h / aspectRatio; _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - matrix m; - m[0] = vector(w, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -h, 0.f, 0.f); - m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 1.f, 0.f); + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 1.f, 0.f); return m; } -template -inline matrix buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +template +inline matrix buildProjectionMatrixOrthoRH(FloatingPoint widthOfViewVolume, FloatingPoint heightOfViewVolume, FloatingPoint zNear, FloatingPoint zFar) { _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - matrix m; - m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); - m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 0.f, 1.f); + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); return m; } -template -inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +template +inline matrix buildProjectionMatrixOrthoLH(FloatingPoint widthOfViewVolume, FloatingPoint heightOfViewVolume, FloatingPoint zNear, FloatingPoint zFar) { _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - matrix m; - m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); - m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 0.f, 1.f); + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); return m; } From 563be557e2dbebcdf00e6b2244f6a339d0bb5b07 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 25 Nov 2025 16:48:09 +0100 Subject: [PATCH 134/472] Added requirements to projection matrix creation functions --- .../transformation_matrix_utils.hlsl | 16 ---------------- .../nbl/builtin/hlsl/projection/projection.hlsl | 9 +++++---- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 7d5fc74ee2..c96a52edea 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -40,22 +40,6 @@ MatT identity() return diagonal(1); } -// TODO: this is temporary function, delete when removing vectorSIMD -#ifndef __HLSL_VERSION -template -inline core::vectorSIMDf transformVector(NBL_CONST_REF_ARG(matrix) mat, NBL_CONST_REF_ARG(core::vectorSIMDf) vec) -{ - core::vectorSIMDf output; - float32_t4 tmp; - for (int i = 0; i < 4; ++i) // rather do that that reinterpret_cast for safety - tmp[i] = output[i]; - - for (int i = 0; i < 4; ++i) - output[i] = hlsl::dot(mat[i], tmp); - - return output; -} -#endif template inline matrix getMatrix3x4As4x4(NBL_CONST_REF_ARG(matrix) mat) { diff --git a/include/nbl/builtin/hlsl/projection/projection.hlsl b/include/nbl/builtin/hlsl/projection/projection.hlsl index b667d6d9b3..94023e2d05 100644 --- a/include/nbl/builtin/hlsl/projection/projection.hlsl +++ b/include/nbl/builtin/hlsl/projection/projection.hlsl @@ -2,13 +2,14 @@ #define _NBL_BUILTIN_HLSL_PROJECTION_PROJECTION_INCLUDED_ #include +#include namespace nbl { namespace hlsl { // TODO: use glm instead for c++ -template +template) inline matrix buildProjectionMatrixPerspectiveFovRH(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) { const FloatingPoint h = core::reciprocal(tan(fieldOfViewRadians * 0.5f)); @@ -25,7 +26,7 @@ inline matrix buildProjectionMatrixPerspectiveFovRH(Floatin return m; } -template +template) inline matrix buildProjectionMatrixPerspectiveFovLH(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) { const FloatingPoint h = core::reciprocal(tan(fieldOfViewRadians * 0.5f)); @@ -43,7 +44,7 @@ inline matrix buildProjectionMatrixPerspectiveFovLH(Floatin return m; } -template +template) inline matrix buildProjectionMatrixOrthoRH(FloatingPoint widthOfViewVolume, FloatingPoint heightOfViewVolume, FloatingPoint zNear, FloatingPoint zFar) { _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero @@ -59,7 +60,7 @@ inline matrix buildProjectionMatrixOrthoRH(FloatingPoint wi return m; } -template +template) inline matrix buildProjectionMatrixOrthoLH(FloatingPoint widthOfViewVolume, FloatingPoint heightOfViewVolume, FloatingPoint zNear, FloatingPoint zFar) { _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero From 9814b88a082d2b62dda559d5662237a2e6d7b7f3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 26 Nov 2025 12:38:54 +0100 Subject: [PATCH 135/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index e00a669c1f..06bad177bd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit e00a669c1f9914a850f13f8a0659edccb6350831 +Subproject commit 06bad177bdb18772c8b4c6c4289a22159e7c97c0 From e60596975286c8ba927d3e14105987bf8dce7f2c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 26 Nov 2025 18:36:29 +0100 Subject: [PATCH 136/472] update dxc submodule, pull microsoft:main and s-perron:i7945 --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index dafad1d9a3..155f57ad15 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit dafad1d9a370d17ac9ce69928ef518f842cb5191 +Subproject commit 155f57ad15ca7eabeaa5e5a892cfdeb7bffe8fc3 From 53e4ab1855b1474d35888e84a9b23d1636e12177 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 26 Nov 2025 19:30:28 +0100 Subject: [PATCH 137/472] update dxc submodule, it crashes! --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index 155f57ad15..dafad1d9a3 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit 155f57ad15ca7eabeaa5e5a892cfdeb7bffe8fc3 +Subproject commit dafad1d9a370d17ac9ce69928ef518f842cb5191 From 0879ce7b6fc300efe18e88269100e382de0c2749 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Nov 2025 15:47:52 +0700 Subject: [PATCH 138/472] fix + optimize aabb vertex calc, includes --- .../builtin/hlsl/aabb_instances.vertex.hlsl | 16 +--------------- .../nbl/ext/DebugDraw/builtin/hlsl/common.hlsl | 12 +++++++++++- .../DebugDraw/builtin/hlsl/single.vertex.hlsl | 16 +--------------- 3 files changed, 13 insertions(+), 31 deletions(-) diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl index 5f67aa2f1e..451243bbcc 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -1,8 +1,5 @@ #pragma shader_stage(vertex) -#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" -#include "nbl/builtin/hlsl/bda/__ptr.hlsl" #include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" using namespace nbl::hlsl; @@ -13,19 +10,8 @@ using namespace nbl::ext::debug_draw; [shader("vertex")] PSInput main() { - const float32_t3 unitAABBVertices[8] = { - float32_t3(0.0, 0.0, 0.0), - float32_t3(1.0, 0.0, 0.0), - float32_t3(0.0, 0.0, 1.0), - float32_t3(1.0, 0.0, 1.0), - float32_t3(0.0, 1.0, 0.0), - float32_t3(1.0, 1.0, 0.0), - float32_t3(0.0, 1.0, 1.0), - float32_t3(1.0, 1.0, 1.0) - }; - PSInput output; - float32_t3 vertex = unitAABBVertices[glsl::gl_VertexIndex()]; + const float32_t3 vertex = getUnitAABBVertex(); InstanceData instance = vk::BufferPointer(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); output.position = math::linalg::promoted_mul(instance.transform, vertex); diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index a178d45465..ac5deaef5f 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -2,6 +2,11 @@ #define _NBL_DEBUG_DRAW_EXT_COMMON_HLSL #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#ifdef __HLSL_VERSION +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#endif namespace nbl { @@ -30,8 +35,13 @@ struct SPushConstants struct PSInput { float32_t4 position : SV_Position; - float32_t4 color : TEXCOORD0; + nointerpolation float32_t4 color : TEXCOORD0; }; + +float32_t3 getUnitAABBVertex() +{ + return (hlsl::promote(hlsl::glsl::gl_VertexIndex()) >> uint32_t3(0,2,1)) & 0x1u; +} #endif } diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl index 64ca75d5ab..5b4f2a39a7 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl @@ -1,8 +1,5 @@ #pragma shader_stage(vertex) -#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" -#include "nbl/builtin/hlsl/bda/__ptr.hlsl" #include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" using namespace nbl::hlsl; @@ -13,19 +10,8 @@ using namespace nbl::ext::debug_draw; [shader("vertex")] PSInput main() { - const float32_t3 unitAABBVertices[8] = { - float32_t3(0.0, 0.0, 0.0), - float32_t3(1.0, 0.0, 0.0), - float32_t3(0.0, 0.0, 1.0), - float32_t3(1.0, 0.0, 1.0), - float32_t3(0.0, 1.0, 0.0), - float32_t3(1.0, 1.0, 0.0), - float32_t3(0.0, 1.0, 1.0), - float32_t3(1.0, 1.0, 1.0) - }; - PSInput output; - float32_t3 vertex = unitAABBVertices[glsl::gl_VertexIndex()]; + float32_t3 vertex = getUnitAABBVertex(); output.position = math::linalg::promoted_mul(pc.instance.transform, vertex); output.color = pc.instance.color; From 1f73ca9549e1cba5febead9112125d68136c4c01 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Nov 2025 16:09:08 +0700 Subject: [PATCH 139/472] changed debug_draw library target usage --- examples_tests | 2 +- src/nbl/ext/DebugDraw/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index c297eaa98c..292cbbfb7f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c297eaa98c7ef73bbc9c58a57e6878dafc13ff28 +Subproject commit 292cbbfb7f9007f00b92b8a6ae0c23e5a3a07687 diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 4cb2ee54cf..7a89caca0d 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -40,4 +40,4 @@ if(NBL_EMBED_BUILTIN_RESOURCES) LINK_BUILTIN_RESOURCES_TO_TARGET(${LIB_NAME} ${_BR_TARGET_}) endif() -set(NBL_EXT_DEBUG_DRAW_TARGET ${LIB_NAME} CACHE INTERNAL "Nabla's Debug Draw logical target name") +add_library(Nabla::ext::DebugDraw ALIAS ${LIB_NAME}) From a40f5402a70c38c1e509d61ca0186e039b1ce5cf Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Nov 2025 16:58:28 +0700 Subject: [PATCH 140/472] some fixes to draw aabb --- examples_tests | 2 +- include/nbl/ext/DebugDraw/CDrawAABB.h | 106 +++++++++++++++++++++-- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 116 ++++++-------------------- 3 files changed, 126 insertions(+), 98 deletions(-) diff --git a/examples_tests b/examples_tests index 292cbbfb7f..8af66823a5 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 292cbbfb7f9007f00b92b8a6ae0c23e5a3a07687 +Subproject commit 8af66823a545c281582a053b095f8a305769f784 diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index a8e3205f22..68b7ae0e2a 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -2,21 +2,21 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_EXT_DRAW_AABB_H_ -#define _NBL_EXT_DRAW_AABB_H_ +#ifndef _NBL_EXT_DEBUG_DRAW_DRAW_AABB_H_ +#define _NBL_EXT_DEBUG_DRAW_DRAW_AABB_H_ #include "nbl/video/declarations.h" #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/shapes/aabb.hlsl" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" namespace nbl::ext::debug_draw { -class DrawAABB final : public core::IReferenceCounted -{ + class DrawAABB final : public core::IReferenceCounted + { public: static constexpr inline uint32_t IndicesCount = 24u; - static constexpr inline uint32_t VerticesCount = 8u; enum DrawMode : uint16_t { @@ -39,7 +39,8 @@ class DrawAABB final : public core::IReferenceCounted //! optional, default MDI buffer allocated if not provided core::smart_refctd_ptr streamingBuffer = nullptr; }; - + + // only used to make the 24 element index buffer and instanced pipeline on create struct SCreationParameters : SCachedCreationParameters { video::IQueue* transfer = nullptr; @@ -48,6 +49,29 @@ class DrawAABB final : public core::IReferenceCounted core::smart_refctd_ptr singlePipelineLayout; core::smart_refctd_ptr batchPipelineLayout; core::smart_refctd_ptr renderpass = nullptr; + + inline bool validate() const + { + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), + std::make_pair(bool(renderpass), "Invalid `creationParams.renderpass` is nullptr!"), + (assetManager && utilities && transfer && renderpass) ? std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") + }); + + system::logger_opt_ptr logger = utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + return true; + } }; // creates an instance that can draw one AABB via push constant or multiple using streaming buffer @@ -67,7 +91,73 @@ class DrawAABB final : public core::IReferenceCounted // records draw command for single AABB, user has to set pipeline outside bool renderSingle(video::IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat); - bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat); + // records draw command for rendering batch of AABB instances as InstanceData + // user has to set span of filled-in InstanceData; camera matrix used in push constant + inline bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) + { + if (!(m_cachedCreationParams.drawMode & ADM_DRAW_BATCH)) + { + m_cachedCreationParams.utilities->getLogger()->log("DrawAABB has not been enabled for draw batches!", system::ILogger::ELL_ERROR); + return false; + } + + using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; + constexpr auto MdiSizes = std::to_array({ sizeof(hlsl::float32_t3), sizeof(InstanceData) }); + // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all + constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); + // allocator initialization needs us to round up to PoT + const auto MaxPOTAlignment = hlsl::roundUpToPoT(MaxAlignment); + + auto* streaming = m_cachedCreationParams.streamingBuffer.get(); + + auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); + assert(streamingPtr); + + commandBuffer->bindGraphicsPipeline(m_batchPipeline.get()); + commandBuffer->setLineWidth(1.f); + asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; + commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); + + std::vector instances(aabbInstances.size()); + for (uint32_t i = 0; i < aabbInstances.size(); i++) + { + auto& inst = instances[i]; + inst = aabbInstances[i]; + inst.transform = hlsl::mul(cameraMat, inst.transform); + } + + auto instancesIt = instances.begin(); + const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); + using suballocator_t = core::LinearAddressAllocatorST; + while (instancesIt != instances.end()) + { + const uint32_t instanceCount = hlsl::min(instancesPerIter, instances.size()); + offset_t inputOffset = 0u; + offset_t ImaginarySizeUpperBound = 0x1 << 30; + suballocator_t imaginaryChunk(nullptr, inputOffset, 0, hlsl::roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); + uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); + const uint32_t totalSize = imaginaryChunk.get_allocated_size(); + + inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; + std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); + streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); + + memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); + instancesIt += instanceCount; + + assert(!streaming->needsManualFlushOrInvalidate()); + + SPushConstants pc; + pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; + + commandBuffer->pushConstants(m_batchPipeline->getLayout(), asset::IShader::E_SHADER_STAGE::ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); + + streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); + } + + return true; + } static hlsl::float32_t4x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); @@ -77,7 +167,7 @@ class DrawAABB final : public core::IReferenceCounted ~DrawAABB() override; private: - static bool validateCreationParameters(SCreationParameters& params); + //static bool validateCreationParameters(SCreationParameters& params); static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index d231f21e3e..60079f71fc 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -3,7 +3,6 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/ext/DebugDraw/CDrawAABB.h" -#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #ifdef NBL_EMBED_BUILTIN_RESOURCES #include "nbl/ext/debug_draw/builtin/CArchive.h" @@ -23,7 +22,7 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) { auto* const logger = params.utilities->getLogger(); - if (!validateCreationParameters(params)) + if (!params.validate()) { logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); return nullptr; @@ -102,28 +101,28 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l return smart_refctd_ptr(archive); } -bool DrawAABB::validateCreationParameters(SCreationParameters& creationParams) -{ - const auto validation = std::to_array - ({ - std::make_pair(bool(creationParams.assetManager), "Invalid `creationParams.assetManager` is nullptr!"), - std::make_pair(bool(creationParams.assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), - std::make_pair(bool(creationParams.utilities), "Invalid `creationParams.utilities` is nullptr!"), - std::make_pair(bool(creationParams.transfer), "Invalid `creationParams.transfer` is nullptr!"), - std::make_pair(bool(creationParams.renderpass), "Invalid `creationParams.renderpass` is nullptr!"), - (creationParams.assetManager && creationParams.utilities && creationParams.transfer && creationParams.renderpass) ? std::make_pair(bool(creationParams.utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[creationParams.transfer->getFamilyIndex()].queueFlags.hasFlags(IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") - }); - - system::logger_opt_ptr logger = creationParams.utilities->getLogger(); - for (const auto& [ok, error] : validation) - if (!ok) - { - logger.log(error, ILogger::ELL_ERROR); - return false; - } - - return true; -} +//bool DrawAABB::validateCreationParameters(SCreationParameters& creationParams) +//{ +// const auto validation = std::to_array +// ({ +// std::make_pair(bool(creationParams.assetManager), "Invalid `creationParams.assetManager` is nullptr!"), +// std::make_pair(bool(creationParams.assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), +// std::make_pair(bool(creationParams.utilities), "Invalid `creationParams.utilities` is nullptr!"), +// std::make_pair(bool(creationParams.transfer), "Invalid `creationParams.transfer` is nullptr!"), +// std::make_pair(bool(creationParams.renderpass), "Invalid `creationParams.renderpass` is nullptr!"), +// (creationParams.assetManager && creationParams.utilities && creationParams.transfer && creationParams.renderpass) ? std::make_pair(bool(creationParams.utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[creationParams.transfer->getFamilyIndex()].queueFlags.hasFlags(IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") +// }); +// +// system::logger_opt_ptr logger = creationParams.utilities->getLogger(); +// for (const auto& [ok, error] : validation) +// if (!ok) +// { +// logger.log(error, ILogger::ELL_ERROR); +// return false; +// } +// +// return true; +//} smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath) { @@ -346,71 +345,10 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes return true; } -bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) -{ - if (!(m_cachedCreationParams.drawMode & ADM_DRAW_BATCH)) - { - m_cachedCreationParams.utilities->getLogger()->log("DrawAABB has not been enabled for draw batches!", ILogger::ELL_ERROR); - return false; - } - - using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; - constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); - // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all - constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); - // allocator initialization needs us to round up to PoT - const auto MaxPOTAlignment = roundUpToPoT(MaxAlignment); - - auto* streaming = m_cachedCreationParams.streamingBuffer.get(); - - auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); - assert(streamingPtr); - - commandBuffer->bindGraphicsPipeline(m_batchPipeline.get()); - commandBuffer->setLineWidth(1.f); - asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; - commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); - - std::vector instances(aabbInstances.size()); - for (uint32_t i = 0; i < aabbInstances.size(); i++) - { - auto& inst = instances[i]; - inst = aabbInstances[i]; - inst.transform = hlsl::mul(cameraMat, inst.transform); - } - - auto instancesIt = instances.begin(); - const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); - using suballocator_t = core::LinearAddressAllocatorST; - while (instancesIt != instances.end()) - { - const uint32_t instanceCount = min(instancesPerIter, instances.size()); - offset_t inputOffset = 0u; - offset_t ImaginarySizeUpperBound = 0x1 << 30; - suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); - uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); - const uint32_t totalSize = imaginaryChunk.get_allocated_size(); - - inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; - std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); - streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); - - memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); - instancesIt += instanceCount; - - assert(!streaming->needsManualFlushOrInvalidate()); - - SPushConstants pc; - pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; - - commandBuffer->pushConstants(m_batchPipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); - commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); - - streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); - } - - return true; -} +//bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) +//{ +// +//} hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) { From 83d27c9b1050be68bddd5c5b4abed0f2c3fca94c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 27 Nov 2025 20:55:09 +0700 Subject: [PATCH 141/472] NBL_CONSTEXPR_INLINE_VAR for template constexpr variable --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 ++ include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 16 ++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index f871e2a23d..84d7b9d8b0 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -13,6 +13,7 @@ #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline +#define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) @@ -44,6 +45,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_INLINE_FUNC inline +#define NBL_CONSTEXPR_INLINE_VAR inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 89c9e2e733..2214835df9 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -435,13 +435,13 @@ struct minus > }; template<> -NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -475,13 +475,13 @@ struct minus_assign > }; template<> -NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; // ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- From f0f9957d6030e0fbbf8d9f5fdf93321b93ec9e1c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 28 Nov 2025 10:22:35 +0700 Subject: [PATCH 142/472] removed commented out bit --- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 60079f71fc..18684ea479 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -101,29 +101,6 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l return smart_refctd_ptr(archive); } -//bool DrawAABB::validateCreationParameters(SCreationParameters& creationParams) -//{ -// const auto validation = std::to_array -// ({ -// std::make_pair(bool(creationParams.assetManager), "Invalid `creationParams.assetManager` is nullptr!"), -// std::make_pair(bool(creationParams.assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), -// std::make_pair(bool(creationParams.utilities), "Invalid `creationParams.utilities` is nullptr!"), -// std::make_pair(bool(creationParams.transfer), "Invalid `creationParams.transfer` is nullptr!"), -// std::make_pair(bool(creationParams.renderpass), "Invalid `creationParams.renderpass` is nullptr!"), -// (creationParams.assetManager && creationParams.utilities && creationParams.transfer && creationParams.renderpass) ? std::make_pair(bool(creationParams.utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[creationParams.transfer->getFamilyIndex()].queueFlags.hasFlags(IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") -// }); -// -// system::logger_opt_ptr logger = creationParams.utilities->getLogger(); -// for (const auto& [ok, error] : validation) -// if (!ok) -// { -// logger.log(error, ILogger::ELL_ERROR); -// return false; -// } -// -// return true; -//} - smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath) { auto system = smart_refctd_ptr(params.assetManager->getSystem()); From fdd675b2dfd28ecc49135ef91189efa62782a154 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 28 Nov 2025 15:24:16 +0700 Subject: [PATCH 143/472] create pipelineLayout util can takes mode, also create layout if missing in params struct --- include/nbl/ext/DebugDraw/CDrawAABB.h | 29 +++++++++++---------------- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 22 +++++++++++--------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 68b7ae0e2a..8c5806c48e 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -52,23 +52,18 @@ namespace nbl::ext::debug_draw inline bool validate() const { - const auto validation = std::to_array - ({ - std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), - std::make_pair(bool(assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), - std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), - std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), - std::make_pair(bool(renderpass), "Invalid `creationParams.renderpass` is nullptr!"), - (assetManager && utilities && transfer && renderpass) ? std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") - }); + assert(bool(assetManager)); + assert(bool(assetManager->getSystem())); + assert(bool(utilities)); + assert(bool(transfer)); + assert(bool(renderpass)); system::logger_opt_ptr logger = utilities->getLogger(); - for (const auto& [ok, error] : validation) - if (!ok) - { - logger.log(error, system::ILogger::ELL_ERROR); - return false; - } + if (!bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT))) + { + logger.log("Invalid `creationParams.transfer` is not capable of transfer operations!", system::ILogger::ELL_ERROR); + return false; + } return true; } @@ -80,8 +75,8 @@ namespace nbl::ext::debug_draw // creates pipeline layout from push constant range static core::smart_refctd_ptr createPipelineLayoutFromPCRange(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); - // creates default pipeline layout for streaming version - static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); + // creates default pipeline layout for pipeline specified by draw mode (note: if mode==BOTH, returns layout for BATCH mode) + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, DrawMode mode = ADM_DRAW_BATCH); //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 18684ea479..0f0e951bf6 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -31,7 +31,10 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) smart_refctd_ptr singlePipeline = nullptr; if (params.drawMode & ADM_DRAW_SINGLE) { - singlePipeline = createPipeline(params, params.singlePipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); + auto pipelineLayout = params.singlePipelineLayout; + if (!pipelineLayout) + pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_SINGLE); + singlePipeline = createPipeline(params, pipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); if (!singlePipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); @@ -42,7 +45,10 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) smart_refctd_ptr batchPipeline = nullptr; if (params.drawMode & ADM_DRAW_BATCH) { - batchPipeline = createPipeline(params, params.batchPipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); + auto pipelineLayout = params.batchPipelineLayout; + if (!pipelineLayout) + pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_BATCH); + batchPipeline = createPipeline(params, pipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); if (!batchPipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); @@ -287,14 +293,15 @@ core::smart_refctd_ptr DrawAABB::createPipelineLayout return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); } -core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device) +core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, DrawMode mode) { + const uint32_t pcSize = (mode & ADM_DRAW_BATCH) ? sizeof(SPushConstants) : sizeof(SSinglePushConstants); SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, .offset = 0, - .size = sizeof(SPushConstants) + .size = pcSize }; - return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); + return createPipelineLayoutFromPCRange(device, pcRange); } bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat) @@ -322,11 +329,6 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes return true; } -//bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) -//{ -// -//} - hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) { const auto diagonal = aabb.getExtent(); From 37cc5518c73322e37dda088a13df596bdc4137f3 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 28 Nov 2025 16:19:10 +0700 Subject: [PATCH 144/472] aabb local transform is 3x4, common draw param struct between single and batch --- include/nbl/ext/DebugDraw/CDrawAABB.h | 18 +++++++++++++----- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 15 +++++++-------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 8c5806c48e..3a4096adfa 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -69,6 +69,13 @@ namespace nbl::ext::debug_draw } }; + struct DrawParameters + { + video::IGPUCommandBuffer* commandBuffer = nullptr; + hlsl::float32_t4x4 cameraMat = hlsl::float32_t4x4(1); + float lineWidth = 1.f; + }; + // creates an instance that can draw one AABB via push constant or multiple using streaming buffer static core::smart_refctd_ptr create(SCreationParameters&& params); @@ -84,11 +91,11 @@ namespace nbl::ext::debug_draw inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } // records draw command for single AABB, user has to set pipeline outside - bool renderSingle(video::IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat); + bool renderSingle(const DrawParameters& params, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color); // records draw command for rendering batch of AABB instances as InstanceData // user has to set span of filled-in InstanceData; camera matrix used in push constant - inline bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) + inline bool render(const DrawParameters& params, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances) { if (!(m_cachedCreationParams.drawMode & ADM_DRAW_BATCH)) { @@ -108,8 +115,9 @@ namespace nbl::ext::debug_draw auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); assert(streamingPtr); + auto& commandBuffer = params.commandBuffer; commandBuffer->bindGraphicsPipeline(m_batchPipeline.get()); - commandBuffer->setLineWidth(1.f); + commandBuffer->setLineWidth(params.lineWidth); asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); @@ -118,7 +126,7 @@ namespace nbl::ext::debug_draw { auto& inst = instances[i]; inst = aabbInstances[i]; - inst.transform = hlsl::mul(cameraMat, inst.transform); + inst.transform = hlsl::mul(params.cameraMat, inst.transform); } auto instancesIt = instances.begin(); @@ -154,7 +162,7 @@ namespace nbl::ext::debug_draw return true; } - static hlsl::float32_t4x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); + static hlsl::float32_t3x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); protected: DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 0f0e951bf6..6302cd358e 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -304,7 +304,7 @@ core::smart_refctd_ptr DrawAABB::createDefaultPipelin return createPipelineLayoutFromPCRange(device, pcRange); } -bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat) +bool DrawAABB::renderSingle(const DrawParameters& params, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color) { if (!(m_cachedCreationParams.drawMode & ADM_DRAW_SINGLE)) { @@ -312,15 +312,15 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes return false; } + auto& commandBuffer = params.commandBuffer; commandBuffer->bindGraphicsPipeline(m_singlePipeline.get()); - commandBuffer->setLineWidth(1.f); + commandBuffer->setLineWidth(params.lineWidth); asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); SSinglePushConstants pc; - - hlsl::float32_t4x4 instanceTransform = getTransformFromAABB(aabb); - pc.instance.transform = hlsl::mul(cameraMat, instanceTransform); + hlsl::float32_t3x4 instanceTransform = getTransformFromAABB(aabb); + pc.instance.transform = math::linalg::promoted_mul(params.cameraMat, instanceTransform); pc.instance.color = color; commandBuffer->pushConstants(m_singlePipeline->getLayout(), ESS_VERTEX, 0, sizeof(SSinglePushConstants), &pc); @@ -329,14 +329,13 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes return true; } -hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) +hlsl::float32_t3x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) { const auto diagonal = aabb.getExtent(); - hlsl::float32_t4x4 transform; + hlsl::float32_t3x4 transform; transform[0][3] = aabb.minVx.x; transform[1][3] = aabb.minVx.y; transform[2][3] = aabb.minVx.z; - transform[3][3] = 1.f; transform[0][0] = diagonal.x; transform[1][1] = diagonal.y; transform[2][2] = diagonal.z; From ba2860f7178625c2b2d94be3d324017551957c90 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 28 Nov 2025 16:43:20 +0700 Subject: [PATCH 145/472] write instances data directly to streaming buffer mem --- include/nbl/ext/DebugDraw/CDrawAABB.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 3a4096adfa..e7af675324 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -121,20 +121,22 @@ namespace nbl::ext::debug_draw asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); - std::vector instances(aabbInstances.size()); - for (uint32_t i = 0; i < aabbInstances.size(); i++) - { - auto& inst = instances[i]; - inst = aabbInstances[i]; - inst.transform = hlsl::mul(params.cameraMat, inst.transform); - } + auto setInstancesRange = [&](InstanceData* data, uint32_t count) -> void { + for (uint32_t i = 0; i < count; i++) + { + auto inst = data + i; + *inst = aabbInstances[i]; + inst->transform = hlsl::mul(params.cameraMat, inst->transform); + } + }; - auto instancesIt = instances.begin(); + const uint32_t numInstances = aabbInstances.size(); const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); using suballocator_t = core::LinearAddressAllocatorST; - while (instancesIt != instances.end()) + uint32_t beginOffset = 0; + while (beginOffset < numInstances) { - const uint32_t instanceCount = hlsl::min(instancesPerIter, instances.size()); + const uint32_t instanceCount = hlsl::min(instancesPerIter, numInstances); offset_t inputOffset = 0u; offset_t ImaginarySizeUpperBound = 0x1 << 30; suballocator_t imaginaryChunk(nullptr, inputOffset, 0, hlsl::roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); @@ -145,8 +147,9 @@ namespace nbl::ext::debug_draw std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); - memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); - instancesIt += instanceCount; + auto* const streamingInstancesPtr = reinterpret_cast(streamingPtr + instancesByteOffset); + setInstancesRange(streamingInstancesPtr, instanceCount); + beginOffset += instanceCount; assert(!streaming->needsManualFlushOrInvalidate()); From c49691656ac8f54e5fbcaa298c2f75050235f682 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:07:48 +0700 Subject: [PATCH 146/472] Promote and Truncate take vector and scalar by value and the rest by reference --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 14 ++++++++------ include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 9 +++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 27461d5949..e267895ed5 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - NBL_CONSTEXPR_FUNC T operator()(const U v) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) { return T(v); } @@ -26,7 +26,7 @@ struct Promote template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v)}; return promoted; @@ -36,7 +36,7 @@ struct Promote, U> template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v), Scalar(v)}; return promoted; @@ -46,7 +46,7 @@ struct Promote, U> template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -56,7 +56,7 @@ struct Promote, U> template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -67,13 +67,15 @@ struct Promote, U> } +// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template -NBL_CONSTEXPR_FUNC T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) +NBL_CONSTEXPR_FUNC T promote(NBL_CONST_REF_ARG(U) v) { impl::Promote _promote; return _promote(v); } + } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index a95df183be..1e6b5b0f94 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -24,7 +24,7 @@ struct Truncate template NBL_PARTIAL_REQ_TOP(concepts::Scalar) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0] }; return truncated; @@ -34,7 +34,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0], v[1]}; return truncated; @@ -44,7 +44,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0], v[1], v[2] }; return truncated; @@ -54,7 +54,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0], v[1], v[2], v[3] }; return truncated; @@ -63,6 +63,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } //namespace impl +// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) { From d7bd053e5c129bdad2ced9a399d182a51ba239e6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:10:57 +0700 Subject: [PATCH 147/472] Remove promote and truncate comment about specialization --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 1 - include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 1 - 2 files changed, 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index e267895ed5..6e75a55b1b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -67,7 +67,6 @@ struct Promote, U> } -// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template NBL_CONSTEXPR_FUNC T promote(NBL_CONST_REF_ARG(U) v) { diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index 1e6b5b0f94..63e0ab7b93 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -63,7 +63,6 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } //namespace impl -// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) { From 3f3a23e13548f0d140c1dfa86a4e877b0d29214f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:11:12 +0700 Subject: [PATCH 148/472] Add comment to rename log2 --- include/nbl/builtin/hlsl/mpl.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 4594662969..7de4983c8e 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -110,6 +110,7 @@ struct round_up_to_pot : integral_constant NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; +// TODO: should rename log2 to log2_floor template struct round_down_to_pot : integral_constant > {}; template From 8dcdfdd930a99487134e65de707bb2d675cf5446 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:17:54 +0700 Subject: [PATCH 149/472] Change dimension type from uint16_t to int32_t --- include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index 63e0ab7b93..38467942f9 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -21,7 +21,7 @@ struct Truncate } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) @@ -31,7 +31,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) @@ -41,7 +41,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) @@ -51,7 +51,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) From 92cd9e775f49d87c46a8c24228dcfbfa909e0866 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 18:21:10 +0700 Subject: [PATCH 150/472] Redefine some macro --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 84d7b9d8b0..bbb2a73ee7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -9,6 +9,7 @@ #define ARROW -> #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR +#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline @@ -16,6 +17,9 @@ #define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_CONSTEXPR_FUNC_SCOPE_VAR constexpr +#define NBL_CONSTEXPR_OOL_MEMBER constexpr +#define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) namespace nbl::hlsl @@ -41,13 +45,17 @@ namespace nbl::hlsl #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR +#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_INLINE_FUNC inline -#define NBL_CONSTEXPR_INLINE_VAR inline +#define NBL_CONSTEXPR_INLINE_VAR static const #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC +#define NBL_CONSTEXPR_FUNC_SCOPE_VAR const +#define NBL_CONSTEXPR_OOL_MEMBER const +#define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) namespace nbl From 7f6d8b82bdd3825e2ad99b9c60695ef7d1ed0c6f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 18:42:41 +0700 Subject: [PATCH 151/472] use const instead of static const for local variable in hlsl --- include/nbl/builtin/hlsl/functional.hlsl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 76b527f6bd..fd23ad388c 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -321,7 +321,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -336,7 +336,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -351,7 +351,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -366,7 +366,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -416,7 +416,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -431,7 +431,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -446,7 +446,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -461,7 +461,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] From 1d9ce208cf873228ff7de438608b7b59af2506ca Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 20:26:21 +0700 Subject: [PATCH 152/472] Rename NBL_CONSTEXPR_INLINE to NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR --- include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl | 2 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 4 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 2 +- include/nbl/builtin/hlsl/math/functions.hlsl | 4 +- .../math/quadrature/gauss_legendre/impl.hlsl | 56 +++++++++---------- include/nbl/builtin/hlsl/mpl.hlsl | 24 ++++---- include/nbl/builtin/hlsl/numbers.hlsl | 28 +++++----- include/nbl/builtin/hlsl/type_traits.hlsl | 26 ++++----- .../hlsl/workgroup2/arithmetic_config.hlsl | 2 +- 9 files changed, 74 insertions(+), 74 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl index b27c892abe..40f64d9cf8 100644 --- a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl @@ -406,7 +406,7 @@ template struct is_ggx : impl::is_ggx {}; template -NBL_CONSTEXPR bool is_ggx_v = is_ggx::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_ggx_v = is_ggx::value; } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index bbb2a73ee7..89c10d14fd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -9,7 +9,6 @@ #define ARROW -> #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline @@ -17,6 +16,7 @@ #define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR constexpr inline #define NBL_CONSTEXPR_FUNC_SCOPE_VAR constexpr #define NBL_CONSTEXPR_OOL_MEMBER constexpr #define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline @@ -45,7 +45,6 @@ namespace nbl::hlsl #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static @@ -53,6 +52,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_VAR static const #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC +#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR const static #define NBL_CONSTEXPR_FUNC_SCOPE_VAR const #define NBL_CONSTEXPR_OOL_MEMBER const #define NBL_CONSTEXPR_INLINE_OOL_MEMBER const diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 242e30dfbe..7850fd7cf3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -27,7 +27,7 @@ template::type; - NBL_CONSTEXPR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1; + NBL_CONSTEXPR_FUNC_SCOPE_VAR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1; UnsignedInteger absVal = val & Mask; return absVal > (ieee754::traits::specialValueExp << ieee754::traits::mantissaBitCnt); } diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index 20442c467b..21f0e6ef2b 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -123,9 +123,9 @@ void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb) { #ifdef __HLSL_VERSION - NBL_CONSTEXPR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); + NBL_CONSTEXPR_FUNC_SCOPE_VAR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); #else - NBL_CONSTEXPR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); + NBL_CONSTEXPR_FUNC_SCOPE_VAR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); #endif const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl index 3bcfbb2388..cd402d0cd4 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl @@ -14,25 +14,25 @@ namespace float_t_namespace { -NBL_CONSTEXPR float_t xi_2[2] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_2[2] = { TYPED_NUMBER(-0.5773502691896257), TYPED_NUMBER(0.5773502691896257) }; -NBL_CONSTEXPR float_t xi_3[3] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_3[3] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.7745966692414833), TYPED_NUMBER(0.7745966692414833) }; -NBL_CONSTEXPR float_t xi_4[4] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_4[4] = { TYPED_NUMBER(-0.3399810435848562), TYPED_NUMBER(0.3399810435848562), TYPED_NUMBER(-0.8611363115940525), TYPED_NUMBER(0.8611363115940525) }; -NBL_CONSTEXPR float_t xi_5[5] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_5[5] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.5384693101056830), TYPED_NUMBER(0.5384693101056830), @@ -40,7 +40,7 @@ NBL_CONSTEXPR float_t xi_5[5] = { TYPED_NUMBER(0.9061798459386639) }; -NBL_CONSTEXPR float_t xi_6[6] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_6[6] = { TYPED_NUMBER(0.6612093864662645), TYPED_NUMBER(-0.6612093864662645), TYPED_NUMBER(-0.2386191860831969), @@ -49,7 +49,7 @@ NBL_CONSTEXPR float_t xi_6[6] = { TYPED_NUMBER(0.9324695142031520) }; -NBL_CONSTEXPR float_t xi_7[7] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_7[7] = { TYPED_NUMBER(0.0), TYPED_NUMBER(0.4058451513773971), TYPED_NUMBER(-0.4058451513773971), @@ -59,7 +59,7 @@ NBL_CONSTEXPR float_t xi_7[7] = { TYPED_NUMBER(0.9491079123427585) }; -NBL_CONSTEXPR float_t xi_8[8] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_8[8] = { TYPED_NUMBER(-0.1834346424956498), TYPED_NUMBER(0.1834346424956498), TYPED_NUMBER(-0.5255324099163289), @@ -70,7 +70,7 @@ NBL_CONSTEXPR float_t xi_8[8] = { TYPED_NUMBER(0.9602898564975362) }; -NBL_CONSTEXPR float_t xi_9[9] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_9[9] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.8360311073266357), TYPED_NUMBER(0.8360311073266357), @@ -82,7 +82,7 @@ NBL_CONSTEXPR float_t xi_9[9] = { TYPED_NUMBER(0.6133714327005903) }; -NBL_CONSTEXPR float_t xi_10[10] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_10[10] = { TYPED_NUMBER(-0.1488743389816312), TYPED_NUMBER(0.1488743389816312), TYPED_NUMBER(-0.4333953941292471), @@ -95,7 +95,7 @@ NBL_CONSTEXPR float_t xi_10[10] = { TYPED_NUMBER(0.9739065285171717) }; -NBL_CONSTEXPR float_t xi_11[11] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_11[11] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2695431559523449), TYPED_NUMBER(0.2695431559523449), @@ -109,7 +109,7 @@ NBL_CONSTEXPR float_t xi_11[11] = { TYPED_NUMBER(0.9782286581460569) }; -NBL_CONSTEXPR float_t xi_12[12] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_12[12] = { TYPED_NUMBER(-0.1252334085114689), TYPED_NUMBER(0.1252334085114689), TYPED_NUMBER(-0.3678314989981801), @@ -124,7 +124,7 @@ NBL_CONSTEXPR float_t xi_12[12] = { TYPED_NUMBER(0.9815606342467192) }; -NBL_CONSTEXPR float_t xi_13[13] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_13[13] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2304583159551347), TYPED_NUMBER(0.2304583159551347), @@ -140,7 +140,7 @@ NBL_CONSTEXPR float_t xi_13[13] = { TYPED_NUMBER(0.9841830547185881) }; -NBL_CONSTEXPR float_t xi_14[14] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_14[14] = { TYPED_NUMBER(-0.1080549487073436), TYPED_NUMBER(0.1080549487073436), TYPED_NUMBER(-0.3191123689278897), @@ -157,7 +157,7 @@ NBL_CONSTEXPR float_t xi_14[14] = { TYPED_NUMBER(0.9862838086968123) }; -NBL_CONSTEXPR float_t xi_15[15] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_15[15] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2011940939974345), TYPED_NUMBER(0.2011940939974345), @@ -175,25 +175,25 @@ NBL_CONSTEXPR float_t xi_15[15] = { TYPED_NUMBER(0.9879925180204854) }; -NBL_CONSTEXPR float_t wi_2[2] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_2[2] = { TYPED_NUMBER(1.0000000000000000), TYPED_NUMBER(1.0000000000000000) }; -NBL_CONSTEXPR float_t wi_3[3] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_3[3] = { TYPED_NUMBER(0.8888888888888888), TYPED_NUMBER(0.5555555555555555), TYPED_NUMBER(0.5555555555555555) }; -NBL_CONSTEXPR float_t wi_4[4] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_4[4] = { TYPED_NUMBER(0.6521451548625461), TYPED_NUMBER(0.6521451548625461), TYPED_NUMBER(0.3478548451374538), TYPED_NUMBER(0.3478548451374538) }; -NBL_CONSTEXPR float_t wi_5[5] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_5[5] = { TYPED_NUMBER(0.5688888888888888), TYPED_NUMBER(0.4786286704993664), TYPED_NUMBER(0.4786286704993664), @@ -201,7 +201,7 @@ NBL_CONSTEXPR float_t wi_5[5] = { TYPED_NUMBER(0.2369268850561890) }; -NBL_CONSTEXPR float_t wi_6[6] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_6[6] = { TYPED_NUMBER(0.3607615730481386), TYPED_NUMBER(0.3607615730481386), TYPED_NUMBER(0.4679139345726910), @@ -210,7 +210,7 @@ NBL_CONSTEXPR float_t wi_6[6] = { TYPED_NUMBER(0.1713244923791703) }; -NBL_CONSTEXPR float_t wi_7[7] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_7[7] = { TYPED_NUMBER(0.4179591836734693), TYPED_NUMBER(0.3818300505051189), TYPED_NUMBER(0.3818300505051189), @@ -220,7 +220,7 @@ NBL_CONSTEXPR float_t wi_7[7] = { TYPED_NUMBER(0.1294849661688696) }; -NBL_CONSTEXPR float_t wi_8[8] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_8[8] = { TYPED_NUMBER(0.3626837833783619), TYPED_NUMBER(0.3626837833783619), TYPED_NUMBER(0.3137066458778872), @@ -231,7 +231,7 @@ NBL_CONSTEXPR float_t wi_8[8] = { TYPED_NUMBER(0.1012285362903762) }; -NBL_CONSTEXPR float_t wi_9[9] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_9[9] = { TYPED_NUMBER(0.3302393550012597), TYPED_NUMBER(0.1806481606948574), TYPED_NUMBER(0.1806481606948574), @@ -243,7 +243,7 @@ NBL_CONSTEXPR float_t wi_9[9] = { TYPED_NUMBER(0.2606106964029354) }; -NBL_CONSTEXPR float_t wi_10[10] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_10[10] = { TYPED_NUMBER(0.2955242247147528), TYPED_NUMBER(0.2955242247147528), TYPED_NUMBER(0.2692667193099963), @@ -256,7 +256,7 @@ NBL_CONSTEXPR float_t wi_10[10] = { TYPED_NUMBER(0.0666713443086881) }; -NBL_CONSTEXPR float_t wi_11[11] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_11[11] = { TYPED_NUMBER(0.2729250867779006), TYPED_NUMBER(0.2628045445102466), TYPED_NUMBER(0.2628045445102466), @@ -270,7 +270,7 @@ NBL_CONSTEXPR float_t wi_11[11] = { TYPED_NUMBER(0.0556685671161736) }; -NBL_CONSTEXPR float_t wi_12[12] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_12[12] = { TYPED_NUMBER(0.2491470458134027), TYPED_NUMBER(0.2491470458134027), TYPED_NUMBER(0.2334925365383548), @@ -285,7 +285,7 @@ NBL_CONSTEXPR float_t wi_12[12] = { TYPED_NUMBER(0.0471753363865118) }; -NBL_CONSTEXPR float_t wi_13[13] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_13[13] = { TYPED_NUMBER(0.2325515532308739), TYPED_NUMBER(0.2262831802628972), TYPED_NUMBER(0.2262831802628972), @@ -301,7 +301,7 @@ NBL_CONSTEXPR float_t wi_13[13] = { TYPED_NUMBER(0.0404840047653158) }; -NBL_CONSTEXPR float_t wi_14[14] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_14[14] = { TYPED_NUMBER(0.2152638534631577), TYPED_NUMBER(0.2152638534631577), TYPED_NUMBER(0.2051984637212956), @@ -318,7 +318,7 @@ NBL_CONSTEXPR float_t wi_14[14] = { TYPED_NUMBER(0.0351194603317518) }; -NBL_CONSTEXPR float_t wi_15[15] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_15[15] = { TYPED_NUMBER(0.2025782419255612), TYPED_NUMBER(0.1984314853271115), TYPED_NUMBER(0.1984314853271115), diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 7de4983c8e..7734dea15f 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -41,12 +41,12 @@ struct countl_zero : impl::countl_zero static_assert(is_integral::value, "countl_zero type parameter must be an integral type"); }; template -NBL_CONSTEXPR T countl_zero_v = countl_zero::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T countl_zero_v = countl_zero::value; template struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; template -NBL_CONSTEXPR bool is_pot_v = is_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_pot_v = is_pot::value; template struct log2 @@ -54,12 +54,12 @@ struct log2 NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero::value-1 : -1ull; }; template -NBL_CONSTEXPR uint16_t log2_v = log2::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_v = log2::value; template struct log2_ceil : integral_constant + uint16_t(!is_pot_v)> {}; template -NBL_CONSTEXPR uint16_t log2_ceil_v = log2_ceil::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_ceil_v = log2_ceil::value; template struct rotl @@ -69,7 +69,7 @@ struct rotl NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X << r) | (X >> (N - r))) : (X >> (-r)) | (X << (N - (-r))); }; template -NBL_CONSTEXPR T rotl_v = rotl::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotl_v = rotl::value; template struct rotr @@ -79,7 +79,7 @@ struct rotr NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X >> r) | (X << (N - r))) : (X << (-r)) | (X >> (N - (-r))); }; template -NBL_CONSTEXPR T rotr_v = rotr::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotr_v = rotr::value; template struct align_up @@ -87,7 +87,7 @@ struct align_up NBL_CONSTEXPR_STATIC_INLINE uint64_t value = X ? (((X-1)/M+1)*M):0; }; template -NBL_CONSTEXPR uint64_t align_up_v = align_up::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t align_up_v = align_up::value; template struct max @@ -95,7 +95,7 @@ struct max NBL_CONSTEXPR_STATIC_INLINE T value = X -NBL_CONSTEXPR T max_v = max::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T max_v = max::value; template struct min @@ -103,18 +103,18 @@ struct min NBL_CONSTEXPR_STATIC_INLINE T value = X -NBL_CONSTEXPR T min_v = min::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T min_v = min::value; template struct round_up_to_pot : integral_constant > {}; template -NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_up_to_pot_v = round_up_to_pot::value; // TODO: should rename log2 to log2_floor template struct round_down_to_pot : integral_constant > {}; template -NBL_CONSTEXPR uint64_t round_down_to_pot_v = round_down_to_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_down_to_pot_v = round_down_to_pot::value; template struct find_lsb @@ -122,7 +122,7 @@ struct find_lsb NBL_CONSTEXPR_STATIC_INLINE uint16_t value = log2::value; }; template -NBL_CONSTEXPR uint64_t find_lsb_v = find_lsb::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t find_lsb_v = find_lsb::value; } } } diff --git a/include/nbl/builtin/hlsl/numbers.hlsl b/include/nbl/builtin/hlsl/numbers.hlsl index 6671a44756..4594596590 100644 --- a/include/nbl/builtin/hlsl/numbers.hlsl +++ b/include/nbl/builtin/hlsl/numbers.hlsl @@ -11,33 +11,33 @@ namespace numbers { template -NBL_CONSTEXPR float_t e = float_t(2.718281828459045); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t e = float_t(2.718281828459045); template -NBL_CONSTEXPR float_t log2e = float_t(1.4426950408889634); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log2e = float_t(1.4426950408889634); template -NBL_CONSTEXPR float_t log10e = float_t(0.4342944819032518); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log10e = float_t(0.4342944819032518); template -NBL_CONSTEXPR float_t pi = float_t(3.141592653589793); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t pi = float_t(3.141592653589793); template -NBL_CONSTEXPR float_t inv_pi = float_t(0.3183098861837907); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_pi = float_t(0.3183098861837907); template -NBL_CONSTEXPR float_t inv_sqrtpi = float_t(0.5641895835477563); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrtpi = float_t(0.5641895835477563); template -NBL_CONSTEXPR float_t ln2 = float_t(0.6931471805599453); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln2 = float_t(0.6931471805599453); template -NBL_CONSTEXPR float_t inv_ln2 = float_t(1.44269504088896); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_ln2 = float_t(1.44269504088896); template -NBL_CONSTEXPR float_t ln10 = float_t(2.302585092994046); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln10 = float_t(2.302585092994046); template -NBL_CONSTEXPR float_t sqrt2 = float_t(1.4142135623730951); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt2 = float_t(1.4142135623730951); template -NBL_CONSTEXPR float_t sqrt3 = float_t(1.7320508075688772); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt3 = float_t(1.7320508075688772); template -NBL_CONSTEXPR float_t inv_sqrt3 = float_t(0.5773502691896257); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrt3 = float_t(0.5773502691896257); template -NBL_CONSTEXPR float_t egamma = float_t(0.5772156649015329); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t egamma = float_t(0.5772156649015329); template -NBL_CONSTEXPR float_t phi = float_t(1.618033988749895); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t phi = float_t(1.618033988749895); } } diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index bf2a35ede9..b682b8da8b 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -638,25 +638,25 @@ using conditional_t = typename conditional::type; // Template Variables template -NBL_CONSTEXPR T integral_constant_v = integral_constant::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T integral_constant_v = integral_constant::value; template -NBL_CONSTEXPR bool is_same_v = is_same::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_same_v = is_same::value; template -NBL_CONSTEXPR bool is_unsigned_v = is_unsigned::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_unsigned_v = is_unsigned::value; template -NBL_CONSTEXPR bool is_integral_v = is_integral::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_integral_v = is_integral::value; template -NBL_CONSTEXPR bool is_floating_point_v = is_floating_point::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_floating_point_v = is_floating_point::value; template -NBL_CONSTEXPR bool is_signed_v = is_signed::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_signed_v = is_signed::value; template -NBL_CONSTEXPR bool is_scalar_v = is_scalar::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_scalar_v = is_scalar::value; template -NBL_CONSTEXPR uint64_t size_of_v = size_of::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t size_of_v = size_of::value; template -NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t alignment_of_v = alignment_of::value; template -NBL_CONSTEXPR bool is_fundamental_v = is_fundamental::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_fundamental_v = is_fundamental::value; // Overlapping definitions @@ -685,7 +685,7 @@ template struct is_vector > : bool_constant {}; template -NBL_CONSTEXPR bool is_vector_v = is_vector::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_vector_v = is_vector::value; #ifndef __HLSL_VERSION template @@ -696,7 +696,7 @@ template struct is_matrix > : bool_constant {}; template -NBL_CONSTEXPR bool is_matrix_v = is_matrix::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_matrix_v = is_matrix::value; template @@ -741,7 +741,7 @@ struct extent, I> : integral_constant: // Template Variables template -NBL_CONSTEXPR uint64_t extent_v = extent::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t extent_v = extent::value; template::value> diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl index 03ccd64d4e..22c93ce193 100644 --- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl +++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl @@ -225,7 +225,7 @@ template struct is_configuration > : bool_constant {}; template -NBL_CONSTEXPR bool is_configuration_v = is_configuration::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_configuration_v = is_configuration::value; } } From 5b79bb4c9d77f1c605d2e8371b8efb8b2944b2ed Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 30 Nov 2025 15:42:06 +0100 Subject: [PATCH 153/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 06bad177bd..b659f1a3e6 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 06bad177bdb18772c8b4c6c4289a22159e7c97c0 +Subproject commit b659f1a3e624bd4d6b87629f2740d048c2db8b17 From 0db984bf7a371d967fcd7a60ef94e4e39d98c406 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 30 Nov 2025 16:21:43 +0100 Subject: [PATCH 154/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index b659f1a3e6..d8f82f0d59 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b659f1a3e624bd4d6b87629f2740d048c2db8b17 +Subproject commit d8f82f0d593d04b437ca64ddece9c32112ed5b12 From 5ce6fa26bef0b7c8d329fdc75177ad769b19db17 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 30 Nov 2025 18:05:50 +0100 Subject: [PATCH 155/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index d8f82f0d59..9c83531c63 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit d8f82f0d593d04b437ca64ddece9c32112ed5b12 +Subproject commit 9c83531c63490bf743dee9bddcfbd5d729e1c916 From 2678ffc5799695770d5eb210f82b949d9ef34502 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 1 Dec 2025 12:19:14 +0700 Subject: [PATCH 156/472] use single use cmdbuf to fill indices buffer --- examples_tests | 2 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 70 +++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/examples_tests b/examples_tests index 8af66823a5..cc341e74bc 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8af66823a545c281582a053b095f8a305769f784 +Subproject commit cc341e74bcc7fdf70fa6dbc312c4a6042a8eafff diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 6302cd358e..fa07d9805f 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -186,8 +186,6 @@ bool DrawAABB::createStreamingBuffer(SCreationParameters& params) { bitflag flags(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS); - if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT)) - flags |= IDeviceMemoryAllocation::EMCAF_READ; if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT)) flags |= IDeviceMemoryAllocation::EMCAF_WRITE; @@ -274,16 +272,72 @@ smart_refctd_ptr DrawAABB::createIndicesBuffer(SCreationParameters& unitAABBIndices[22] = 3; unitAABBIndices[23] = 7; + auto* device = params.utilities->getLogicalDevice(); + smart_refctd_ptr cmdbuf; + { + smart_refctd_ptr cmdpool = device->createCommandPool(params.transfer->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &cmdbuf, 1 })) + { + params.utilities->getLogger()->log("Failed to create Command Buffer for index buffer!\n"); + return nullptr; + } + } + IGPUBuffer::SCreationParams bufparams; bufparams.size = sizeof(uint32_t) * unitAABBIndices.size(); - bufparams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + bufparams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF; smart_refctd_ptr indicesBuffer; - params.utilities->createFilledDeviceLocalBufferOnDedMem( - SIntendedSubmitInfo{ .queue = params.transfer }, - std::move(bufparams), - unitAABBIndices.data() - ).move_into(indicesBuffer); + { + indicesBuffer = device->createBuffer(std::move(bufparams)); + if (!indicesBuffer) + { + params.utilities->getLogger()->log("Failed to create index buffer!\n"); + return nullptr; + } + + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = indicesBuffer->getMemoryReqs(); + reqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + + auto bufMem = device->allocate(reqs, indicesBuffer.get()); + if (!bufMem.isValid()) + { + params.utilities->getLogger()->log("Failed to allocate device memory compatible with index buffer!\n"); + return nullptr; + } + } + + { + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->beginDebugMarker("Fill indices buffer begin"); + + SBufferRange bufRange = { .offset = 0, .size = indicesBuffer->getSize(), .buffer = indicesBuffer }; + cmdbuf->updateBuffer(bufRange, unitAABBIndices.data()); + + cmdbuf->endDebugMarker(); + cmdbuf->end(); + } + + smart_refctd_ptr idxBufProgress; + constexpr auto FinishedValue = 25; + { + constexpr auto StartedValue = 0; + idxBufProgress = device->createSemaphore(StartedValue); + + IQueue::SSubmitInfo submitInfos[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = cmdbuf.get()} }; + submitInfos[0].commandBuffers = cmdbufs; + const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = idxBufProgress.get(),.value = FinishedValue,.stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS} }; + submitInfos[0].signalSemaphores = signals; + + params.transfer->submit(submitInfos); + } + + const ISemaphore::SWaitInfo waitInfos[] = { { + .semaphore = idxBufProgress.get(), + .value = FinishedValue + } }; + device->blockForSemaphores(waitInfos); return indicesBuffer; } From 1eded124d9a0f26251e6e7ad22843ac57e0f288b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 14:59:38 +0700 Subject: [PATCH 157/472] Refactor emulated_integral_64 --- .../emulated/int64_common_member_inc.hlsl | 155 ++++++++ .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 332 ++++++------------ include/nbl/builtin/hlsl/functional.hlsl | 2 +- 3 files changed, 261 insertions(+), 228 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl new file mode 100644 index 0000000000..2dd7bafa41 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl @@ -0,0 +1,155 @@ + +storage_t data; + +/** +* @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern +* +* @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) +*/ +NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) +{ + this_t retVal; + retVal.data = _data; + return retVal; +} + +/** +* @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern +* +* @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated +* @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated +*/ +NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) +{ + return create(storage_t(lo, hi)); +} + +// ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- +// GLM requires these for vector casts + +#ifndef __HLSL_VERSION + +template +constexpr explicit operator I() const noexcept; + +#endif + +// ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC +{ + return data.x; +} + +NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC +{ + return data.y; +} + +// ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data & rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data | rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data ^ rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(~data); + return retVal; +} + +// Only valid in CPP +#ifndef __HLSL_VERSION +constexpr inline this_t operator>>(uint32_t bits) const; + +constexpr inline this_t operator<<(uint32_t bits) const; + +constexpr inline this_t& operator&=(const this_t& val) +{ + data &= val.data; + return *this; +} + +constexpr inline this_t& operator|=(const this_t& val) +{ + data |= val.data; + return *this; +} + +constexpr inline this_t& operator^=(const this_t& val) +{ + data ^= val.data; + return *this; +} + +#endif + +// ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); + return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); +} + +NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); + return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); +} + +// ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- +NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + equal_to equals; + return all(equals(data, rhs.data)); +} + +NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + not_equal_to notEquals; + return any(notEquals(data, rhs.data)); +} + +NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less + // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + const bool LSB = __getLSB() < rhs.__getLSB(); + return MSBEqual ? LSB : MSB; +} + +NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + // Same reasoning as above + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + const bool LSB = __getLSB() > rhs.__getLSB(); + return MSBEqual ? LSB : MSB; +} + +NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + return !operator>(rhs); +} + +NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + return !operator<(rhs); +} diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 2214835df9..ce98d5268f 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -15,199 +15,92 @@ namespace nbl namespace hlsl { -template -struct emulated_int64_base +struct emulated_int64_t; + +struct emulated_uint64_t { using storage_t = vector; - using this_t = emulated_int64_base; - using this_signed_t = emulated_int64_base; - - storage_t data; + using this_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC_INLINE bool Signed = false; - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + #include "int64_common_member_inc.hlsl" #ifndef __HLSL_VERSION - - emulated_int64_base() = default; - + emulated_uint64_t() = default; // GLM requires these to cast vectors because it uses a native `static_cast` template - constexpr explicit emulated_int64_base(const I& toEmulate); - - constexpr explicit emulated_int64_base(const emulated_int64_base& other) : data(other.data) {} + constexpr explicit emulated_uint64_t(const I& toEmulate); + constexpr explicit emulated_uint64_t(const emulated_int64_t& other); #endif +}; - /** - * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) - */ - NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - this_t retVal; - retVal.data = _data; - return retVal; - } - - /** - * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern - * - * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated - * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated - */ - NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) - { - return create(storage_t(lo, hi)); - } - - // ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- - // GLM requires these for vector casts +struct emulated_int64_t +{ + using storage_t = vector; + using this_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC_INLINE bool Signed = true; + + #include "int64_common_member_inc.hlsl" + #ifndef __HLSL_VERSION - + emulated_int64_t() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` template - constexpr explicit operator I() const noexcept; + constexpr explicit emulated_int64_t(const I& toEmulate); + constexpr explicit emulated_int64_t(const emulated_uint64_t& other); #endif - // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- - - NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC { - return data.x; - } - - NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC - { - return data.y; - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data & rhs.data); - return retVal; - } - - NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data | rhs.data); - return retVal; + storage_t inverted = ~data; + return create(_static_cast(inverted)) + _static_cast(1); } +}; - NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data ^ rhs.data); - return retVal; - } +// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- - NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(~data); - return retVal; - } +template<> +struct is_signed : bool_constant {}; - // Only valid in CPP - #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint32_t bits) const; - constexpr inline this_t operator>>(uint32_t bits) const; +template<> +struct is_unsigned : bool_constant {}; - constexpr inline this_t& operator&=(const this_t& val) - { - data &= val.data; - return *this; - } +// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- +namespace concepts +{ - constexpr inline this_t& operator|=(const this_t& val) - { - data |= val.data; - return *this; - } +template +NBL_BOOL_CONCEPT ImitationIntegral64Scalar = same_as || same_as; - constexpr inline this_t& operator^=(const this_t& val) - { - data ^= val.data; - return *this; - } - - #endif - - // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC - { - vector negated = -data; - return this_signed_t::create(_static_cast(negated)); - } - - NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); - return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); - } - - NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); - return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); - } - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - equal_to equals; - return all(equals(data, rhs.data)); - } - - NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - not_equal_to notEquals; - return any(notEquals(data, rhs.data)); - } +namespace impl +{ - NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less - // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) - const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); - const bool LSB = __getLSB() < rhs.__getLSB(); - return MSBEqual ? LSB : MSB; - } +template<> +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; - NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - // Same reasoning as above - const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); - const bool LSB = __getLSB() > rhs.__getLSB(); - return MSBEqual ? LSB : MSB; - } +template<> +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; +} - NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator>(rhs); - } - NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator<(rhs); - } -}; +} -using emulated_uint64_t = emulated_int64_base; -using emulated_int64_t = emulated_int64_base; namespace impl { -template -struct static_cast_helper, emulated_int64_base > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar && concepts::ImitationIntegral64Scalar && !concepts::same_as) +struct static_cast_helper && concepts::ImitationIntegral64Scalar && !concepts::same_as) > { - using To = emulated_int64_base; - using From = emulated_int64_base; NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) { @@ -217,12 +110,9 @@ struct static_cast_helper, emulated_int64_base NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) -struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = I; - using From = emulated_int64_base; - // Return only the lowest bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { @@ -230,24 +120,18 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) -struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = I; - using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { return bit_cast(val.data); } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = emulated_int64_base; - using From = I; - // Set only lower bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { @@ -255,12 +139,9 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = emulated_int64_base; - using From = I; - NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { // `bit_cast` blocked by GLM vectors using a union @@ -280,16 +161,30 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con #ifndef __HLSL_VERSION -template +constexpr emulated_int64_t::emulated_int64_t(const emulated_uint64_t& other) : data(other.data) {} + +constexpr emulated_uint64_t::emulated_uint64_t(const emulated_int64_t& other) : data(other.data) {} + +template +constexpr emulated_int64_t::emulated_int64_t(const I& toEmulate) +{ + *this = _static_cast(toEmulate); +} + template -constexpr emulated_int64_base::emulated_int64_base(const I& toEmulate) +constexpr emulated_uint64_t::emulated_uint64_t(const I& toEmulate) { - *this = _static_cast>(toEmulate); + *this = _static_cast(toEmulate); } -template template -constexpr emulated_int64_base::operator I() const noexcept +constexpr emulated_int64_t::operator I() const noexcept +{ + return _static_cast(*this); +} + +template +constexpr emulated_uint64_t::operator I() const noexcept { return _static_cast(*this); } @@ -298,28 +193,27 @@ constexpr emulated_int64_base::operator I() const noexcept // ---------------------- Functional operators ------------------------ -template -struct left_shift_operator > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct left_shift_operator) > { - using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) + const T shifted = T::create(bigShift ? vector(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - ternary_operator ternary; + ternary_operator ternary; return ternary(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, T bits) { return operator()(operand, _static_cast(bits)); } @@ -381,21 +275,24 @@ struct arithmetic_right_shift_operator #ifndef __HLSL_VERSION -template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint32_t bits) const +constexpr inline emulated_int64_t emulated_int64_t::operator<<(uint32_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint32_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); } -template<> constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); } -template<> constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; @@ -404,14 +301,15 @@ constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) co #endif + // ---------------------- STD arithmetic operators ------------------------ // Specializations of the structs found in functional.hlsl // These all have to be specialized because of the identity that can't be initialized inside the struct definition -template -struct plus > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct plus) > { - using type_t = emulated_int64_base; + using type_t = T; type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { @@ -421,10 +319,10 @@ struct plus > const static type_t identity; }; -template -struct minus > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct minus) > { - using type_t = emulated_int64_base; + using type_t = T; type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { @@ -446,10 +344,10 @@ NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus::identity = _s // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl -template -struct plus_assign > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct plus_assign) > { - using type_t = emulated_int64_base; + using type_t = T; using base_t = plus; base_t baseOp; void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) @@ -460,10 +358,10 @@ struct plus_assign > const static type_t identity; }; -template -struct minus_assign > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct minus_assign) > { - using type_t = emulated_int64_base; + using type_t = T; using base_t = minus; base_t baseOp; void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) @@ -483,26 +381,6 @@ NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::iden template<> NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; -// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- - -template<> -struct is_signed : bool_constant {}; - -template<> -struct is_unsigned : bool_constant {}; - -// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- -namespace concepts -{ -namespace impl -{ -template -struct is_emulating_integral_scalar > -{ - NBL_CONSTEXPR_STATIC_INLINE bool value = true; -}; -} -} } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index fd23ad388c..98858bae80 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -219,7 +219,7 @@ NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual) // ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS -------------------------------------------------------------------- -#define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ +#define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ using type_t = T; \ using base_t = NAME ; \ base_t baseOp; \ From aa9e24daf8bb7bae9ff743f1db899234819ac17f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 18:42:11 +0700 Subject: [PATCH 158/472] Add unary_minus_operator class --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 ++ .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 36 ++++++++++++++----- include/nbl/builtin/hlsl/functional.hlsl | 10 ++++++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 89c10d14fd..b51860a399 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -8,6 +8,7 @@ #include #define ARROW -> +#define NBL_DEREF_THIS (*this) #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static @@ -44,6 +45,7 @@ namespace nbl::hlsl #else #define ARROW .arrow(). +#define NBL_DEREF_THIS this #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index ce98d5268f..ba4facad01 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -53,11 +53,8 @@ struct emulated_int64_t constexpr explicit emulated_int64_t(const emulated_uint64_t& other); #endif - NBL_CONSTEXPR_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC - { - storage_t inverted = ~data; - return create(_static_cast(inverted)) + _static_cast(1); - } + NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC; + }; // ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- @@ -196,24 +193,25 @@ constexpr emulated_uint64_t::operator I() const noexcept template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) struct left_shift_operator) > { + using type_t = T; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const T shifted = T::create(bigShift ? vector(0, operand.__getLSB() << shift) + const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - ternary_operator ternary; + ternary_operator ternary; return ternary(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, T bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -381,6 +379,26 @@ NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::iden template<> NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; +// --------------------------------- Unary operators ------------------------------------------ +// Specializations of the structs found in functional.hlsl +template<> +struct unary_minus_operator +{ + using type_t = emulated_int64_t; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand) + { + using storage_t = type_t::storage_t; + storage_t inverted = ~operand.data; + return type_t::create(_static_cast(inverted)) + _static_cast(1); + } +}; + +NBL_CONSTEXPR_INLINE_FUNC emulated_int64_t emulated_int64_t::operator-() NBL_CONST_MEMBER_FUNC +{ + unary_minus_operator unaryMinus; + return unaryMinus(NBL_DEREF_THIS); +} } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 98858bae80..f0730a12d2 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -487,7 +487,17 @@ struct logical_right_shift_operator } }; +// ----------------------------------------------------------------- UNARY OPERATORS -------------------------------------------------------------------- +template +struct unary_minus_operator +{ + using type_t = T; + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) + { + return -operand; + } +}; } //namespace nbl } //namespace hlsl From 6683cd5a0f7965caa8484ea40b3847bab23b54a0 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 18:42:29 +0700 Subject: [PATCH 159/472] Remove commented code on emulated/vector_t.hlsl --- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index cdeddeb105..f153fb1062 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -40,9 +40,6 @@ struct _2_component_vec return y; // TODO: avoid code duplication, make it constexpr - //using TAsUint = typename unsigned_integer_of_size::type; - //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); - //return nbl::hlsl::bit_cast(invalidComponentValue); return nbl::hlsl::undef(); } @@ -77,9 +74,6 @@ struct _3_component_vec return z; // TODO: avoid code duplication, make it constexpr - //using TAsUint = typename unsigned_integer_of_size::type; - //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - //return nbl::hlsl::bit_cast(invalidComponentValue); return nbl::hlsl::undef(); } @@ -118,9 +112,6 @@ struct _4_component_vec return w; // TODO: avoid code duplication, make it constexpr - //using TAsUint = typename unsigned_integer_of_size::type; - //uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - //return nbl::hlsl::bit_cast(invalidComponentValue); return nbl::hlsl::undef(); } From 5612363097db9d8f56b612e2d0713bcae20a8fd0 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 13:57:18 +0100 Subject: [PATCH 160/472] Created `nbl::system::to_string` utility function --- examples_tests | 2 +- include/nbl/system/to_string.h | 84 ++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 include/nbl/system/to_string.h diff --git a/examples_tests b/examples_tests index 2b4db21239..158e58891d 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2b4db2123918f380cc0a35f6889315a02f84ea73 +Subproject commit 158e58891d6395df2566013b3590fdfe475aae8d diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h new file mode 100644 index 0000000000..70ecfba211 --- /dev/null +++ b/include/nbl/system/to_string.h @@ -0,0 +1,84 @@ +#ifndef _NBL_SYSTEM_TO_STRING_INCLUDED_ +#define _NBL_SYSTEM_TO_STRING_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace system +{ +namespace impl +{ + +template +struct to_string_helper +{ + static std::string __call(const T& value) + { + return std::to_string(value); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_uint64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_int64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template +struct to_string_helper> +{ + static std::string __call(const hlsl::vector& value) + { + std::stringstream output; + output << "{ "; + for (int i = 0; i < N; ++i) + { + output << to_string_helper::__call(value[i]); + + if (i < N - 1) + output << ", "; + } + output << " }"; + + return output.str(); + } +}; + +template +struct to_string_helper> +{ + using value_t = hlsl::morton::code; + static std::string __call(value_t value) + { + TestValueToTextConverter mortonCodeDataToTextConverter; + return mortonCodeDataToTextConverter(value.value); + } +}; + + +} + +template +std::string to_string(T value) +{ + return impl::to_string_helper::__call(value); +} +} +} + +#endif \ No newline at end of file From 21a576573db68a59f026c3a16fb5042ab3be3126 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 1 Dec 2025 14:50:38 +0100 Subject: [PATCH 161/472] update dxc to https://github.com/Devsh-Graphics-Programming/DirectXShaderCompiler/commit/1e5414bcc21b002d795f97075dff63e387fc668f --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index dafad1d9a3..1e5414bcc2 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit dafad1d9a370d17ac9ce69928ef518f842cb5191 +Subproject commit 1e5414bcc21b002d795f97075dff63e387fc668f From cdb6ad7d3865af1e3390127af5da008e44ead6ce Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 21:00:47 +0700 Subject: [PATCH 162/472] Unify all Truncate specializaton for vector type --- .../nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 50 ++++++------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index 38467942f9..ffe3d12641 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -9,6 +9,12 @@ namespace nbl namespace hlsl { +namespace concepts +{ + template + NBL_BOOL_CONCEPT can_truncate_vector = concepts::Vectorial && concepts::Vectorial && concepts::same_as::scalar_type, typename vector_traits::scalar_type > && vector_traits::Dimension <= vector_traits::Dimension; +} + namespace impl { @@ -21,44 +27,20 @@ struct Truncate } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::can_truncate_vector) +struct Truncate) > { - NBL_CONSTEXPR_FUNC vector operator()(const vector v) + NBL_CONSTEXPR_FUNC To operator()(const From v) { - vector truncated = { v[0] }; - return truncated; + array_get::scalar_type> getter; + array_set::scalar_type> setter; + To output; + [[unroll]] + for (int i = 0; i < vector_traits::Dimension; ++i) + setter(output, i, getter(v, i)); + return output; } -}; - -template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > -{ - NBL_CONSTEXPR_FUNC vector operator()(const vector v) - { - vector truncated = { v[0], v[1]}; - return truncated; - } -}; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > -{ - NBL_CONSTEXPR_FUNC vector operator()(const vector v) - { - vector truncated = { v[0], v[1], v[2] }; - return truncated; - } -}; - -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > -{ - NBL_CONSTEXPR_FUNC vector operator()(const vector v) - { - vector truncated = { v[0], v[1], v[2], v[3] }; - return truncated; - } }; } //namespace impl From 2a43ae8981e0c984631176b0fb7caada4d39cd40 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 15:37:21 +0100 Subject: [PATCH 163/472] Removed from the `to_string` function specialization of types not present yet in the master branch --- include/nbl/system/to_string.h | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 70ecfba211..92888704c0 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -2,8 +2,6 @@ #define _NBL_SYSTEM_TO_STRING_INCLUDED_ #include -#include -#include namespace nbl { @@ -21,24 +19,6 @@ struct to_string_helper } }; -template<> -struct to_string_helper -{ - static std::string __call(const hlsl::emulated_uint64_t& value) - { - return std::to_string(static_cast(value)); - } -}; - -template<> -struct to_string_helper -{ - static std::string __call(const hlsl::emulated_int64_t& value) - { - return std::to_string(static_cast(value)); - } -}; - template struct to_string_helper> { @@ -59,18 +39,6 @@ struct to_string_helper> } }; -template -struct to_string_helper> -{ - using value_t = hlsl::morton::code; - static std::string __call(value_t value) - { - TestValueToTextConverter mortonCodeDataToTextConverter; - return mortonCodeDataToTextConverter(value.value); - } -}; - - } template From a9e107835c0c0735cb0a83c70908c8375c7e544a Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 13:57:18 +0100 Subject: [PATCH 164/472] Resolved conflicts cherry-picked from the `unified_testing_interface` branch --- examples_tests | 2 +- include/nbl/system/to_string.h | 84 ++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 include/nbl/system/to_string.h diff --git a/examples_tests b/examples_tests index 829ea34183..e5d5ae2ca9 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 829ea34183a0a62a3bd68ded4dd9e451b97126d4 +Subproject commit e5d5ae2ca9137a6966d00aa039f3e6dae7c23fb9 diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h new file mode 100644 index 0000000000..70ecfba211 --- /dev/null +++ b/include/nbl/system/to_string.h @@ -0,0 +1,84 @@ +#ifndef _NBL_SYSTEM_TO_STRING_INCLUDED_ +#define _NBL_SYSTEM_TO_STRING_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace system +{ +namespace impl +{ + +template +struct to_string_helper +{ + static std::string __call(const T& value) + { + return std::to_string(value); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_uint64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_int64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template +struct to_string_helper> +{ + static std::string __call(const hlsl::vector& value) + { + std::stringstream output; + output << "{ "; + for (int i = 0; i < N; ++i) + { + output << to_string_helper::__call(value[i]); + + if (i < N - 1) + output << ", "; + } + output << " }"; + + return output.str(); + } +}; + +template +struct to_string_helper> +{ + using value_t = hlsl::morton::code; + static std::string __call(value_t value) + { + TestValueToTextConverter mortonCodeDataToTextConverter; + return mortonCodeDataToTextConverter(value.value); + } +}; + + +} + +template +std::string to_string(T value) +{ + return impl::to_string_helper::__call(value); +} +} +} + +#endif \ No newline at end of file From 1e7ea64e23be96f204d87dc98b5913cb5db44664 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 15:37:21 +0100 Subject: [PATCH 165/472] Removed from the `to_string` function specialization of types not present yet in the master branch --- include/nbl/system/to_string.h | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 70ecfba211..92888704c0 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -2,8 +2,6 @@ #define _NBL_SYSTEM_TO_STRING_INCLUDED_ #include -#include -#include namespace nbl { @@ -21,24 +19,6 @@ struct to_string_helper } }; -template<> -struct to_string_helper -{ - static std::string __call(const hlsl::emulated_uint64_t& value) - { - return std::to_string(static_cast(value)); - } -}; - -template<> -struct to_string_helper -{ - static std::string __call(const hlsl::emulated_int64_t& value) - { - return std::to_string(static_cast(value)); - } -}; - template struct to_string_helper> { @@ -59,18 +39,6 @@ struct to_string_helper> } }; -template -struct to_string_helper> -{ - using value_t = hlsl::morton::code; - static std::string __call(value_t value) - { - TestValueToTextConverter mortonCodeDataToTextConverter; - return mortonCodeDataToTextConverter(value.value); - } -}; - - } template From b7f71690d99cd931d17e56b324e82547b8f1e3c2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 16:04:55 +0100 Subject: [PATCH 166/472] Restored the removed `system::to_string` specializations --- include/nbl/system/to_string.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 92888704c0..70ecfba211 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -2,6 +2,8 @@ #define _NBL_SYSTEM_TO_STRING_INCLUDED_ #include +#include +#include namespace nbl { @@ -19,6 +21,24 @@ struct to_string_helper } }; +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_uint64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_int64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + template struct to_string_helper> { @@ -39,6 +59,18 @@ struct to_string_helper> } }; +template +struct to_string_helper> +{ + using value_t = hlsl::morton::code; + static std::string __call(value_t value) + { + TestValueToTextConverter mortonCodeDataToTextConverter; + return mortonCodeDataToTextConverter(value.value); + } +}; + + } template From 486af6d3698534557809065b3fbf491c5078a47c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 16:14:26 +0100 Subject: [PATCH 167/472] Fixes --- examples_tests | 2 +- include/nbl/system/to_string.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/examples_tests b/examples_tests index 158e58891d..8842299b81 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 158e58891d6395df2566013b3590fdfe475aae8d +Subproject commit 8842299b81c2ab0a8951d042b1945372a930b863 diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 70ecfba211..3169503a06 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -65,8 +65,7 @@ struct to_string_helper> using value_t = hlsl::morton::code; static std::string __call(value_t value) { - TestValueToTextConverter mortonCodeDataToTextConverter; - return mortonCodeDataToTextConverter(value.value); + return to_string_helper::__call(value.value); } }; From c365240ed060b45d535e3a9293c91da2d9f01e61 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 23:04:34 +0700 Subject: [PATCH 168/472] Fix promote.hlsl and reduce the amount of specialization for Promote --- .../nbl/builtin/hlsl/cpp_compat/promote.hlsl | 49 ++++--------------- include/nbl/builtin/hlsl/morton.hlsl | 5 +- 2 files changed, 12 insertions(+), 42 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 6e75a55b1b..cd4ac3193c 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -21,50 +21,21 @@ struct Promote } }; -#ifdef __HLSL_VERSION - -template -struct Promote, U> +// TODO(kevinyu): Should we enable truncation from uint64_t to emulated_vector? +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && is_scalar_v && is_same_v::scalar_type, From>) +struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) + NBL_CONSTEXPR_FUNC To operator()(const From v) { - vector promoted = {Scalar(v)}; - return promoted; + array_set setter; + To output; + [[unroll]] + for (int i = 0; i < vector_traits::Dimension; ++i) + setter(output, i, v); + return output; } }; -template -struct Promote, U> -{ - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) - { - vector promoted = {Scalar(v), Scalar(v)}; - return promoted; - } -}; - -template -struct Promote, U> -{ - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) - { - vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; - return promoted; - } -}; - -template -struct Promote, U> -{ - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) - { - vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; - return promoted; - } -}; - -#endif - } template diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 696124ae0c..6968d414fc 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -137,8 +137,7 @@ struct Transcoder return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); } - template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + template 16), vector, vector > > /** * @brief Encodes a vector of cartesian coordinates as a Morton code * @@ -216,7 +215,7 @@ struct Equal NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); - const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); + const portable_vector_t zeros = promote>(_static_cast(0)); const portable_vector_t rhsCasted = _static_cast >(rhs); const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); From 256420229e8a3b65bb5f09fc0933e70e894338f0 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 17:05:34 +0100 Subject: [PATCH 169/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 8842299b81..44fdbe8d35 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8842299b81c2ab0a8951d042b1945372a930b863 +Subproject commit 44fdbe8d35a9505ac3474b708200cc7e039aae31 From e674772baba13c08445c29835cedc8276909a0f9 Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Mon, 1 Dec 2025 19:16:39 +0100 Subject: [PATCH 170/472] add `nbl::system::to_string` utility function all credit goes to @Przemog1 --- include/nbl/system/to_string.h | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 include/nbl/system/to_string.h diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h new file mode 100644 index 0000000000..3169503a06 --- /dev/null +++ b/include/nbl/system/to_string.h @@ -0,0 +1,83 @@ +#ifndef _NBL_SYSTEM_TO_STRING_INCLUDED_ +#define _NBL_SYSTEM_TO_STRING_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace system +{ +namespace impl +{ + +template +struct to_string_helper +{ + static std::string __call(const T& value) + { + return std::to_string(value); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_uint64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_int64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template +struct to_string_helper> +{ + static std::string __call(const hlsl::vector& value) + { + std::stringstream output; + output << "{ "; + for (int i = 0; i < N; ++i) + { + output << to_string_helper::__call(value[i]); + + if (i < N - 1) + output << ", "; + } + output << " }"; + + return output.str(); + } +}; + +template +struct to_string_helper> +{ + using value_t = hlsl::morton::code; + static std::string __call(value_t value) + { + return to_string_helper::__call(value.value); + } +}; + + +} + +template +std::string to_string(T value) +{ + return impl::to_string_helper::__call(value); +} +} +} + +#endif \ No newline at end of file From ec1d6745fb34451f9fda2f0a68a6047866630e14 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 06:09:26 +0700 Subject: [PATCH 171/472] Make promote constrainable --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index cd4ac3193c..9f2b58047f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -12,7 +12,7 @@ namespace impl { // partial specialize this for `T=matrix|vector` and `U=matrix|vector|scalar_t` -template +template struct Promote { NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) From 51e35cf27e59311b5abd586b424d63a6502fdeb3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 06:09:34 +0700 Subject: [PATCH 172/472] equal to _equal --- include/nbl/builtin/hlsl/morton.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 6968d414fc..d03a02a09c 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -215,12 +215,12 @@ struct Equal NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); - const portable_vector_t zeros = promote>(_static_cast(0)); + const portable_vector_t zeros = promote >(_static_cast(0)); const portable_vector_t rhsCasted = _static_cast >(rhs); const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); - equal_to > equal; - return equal(xored, zeros); + equal_to > _equal; + return _equal(xored, zeros); } }; From 062ce7b632b7fc90c1b3fdaec0fdce2ddb52c1b5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 06:18:19 +0700 Subject: [PATCH 173/472] Remove some constraint in morton::code::create --- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d03a02a09c..869425b856 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -326,7 +326,7 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> + NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; From 6c824283c493c050aa85bb7710fa3d22768b4341 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 12:40:28 +0700 Subject: [PATCH 174/472] Remove NBL_CONSTEXPR_STATIC_INLINE_VAR macro --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 -- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 16 ++++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index b51860a399..a5715efa15 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -14,7 +14,6 @@ #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const #define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR constexpr inline @@ -51,7 +50,6 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_INLINE_FUNC inline -#define NBL_CONSTEXPR_INLINE_VAR static const #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC #define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR const static diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index ba4facad01..0b890fb2b2 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -331,13 +331,13 @@ struct minus) > }; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -371,13 +371,13 @@ struct minus_assign }; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus_assign::identity = minus::identity; // --------------------------------- Unary operators ------------------------------------------ // Specializations of the structs found in functional.hlsl From ca2ac6f5151b35f4570f9232356e40fc85cdaf64 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 12:41:02 +0700 Subject: [PATCH 175/472] Remove Bit count constraint on some of Transcoder method due to redundancy. --- include/nbl/builtin/hlsl/morton.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 869425b856..e6deaf5be4 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -342,7 +342,7 @@ struct code * * @param [in] cartesian Coordinates to encode */ - template= Bits) + template inline explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); @@ -351,7 +351,7 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template= Bits && is_signed_v == Signed) + template == Signed) constexpr explicit operator vector() const noexcept; #endif @@ -521,8 +521,8 @@ namespace impl { // I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar) > { NBL_CONSTEXPR_STATIC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { @@ -606,7 +606,7 @@ constexpr morton::code morton::code&& D* Bits <= 64) -template = Bits && is_signed_v == Signed) +template == Signed) constexpr morton::code::operator vector() const noexcept { return _static_cast, morton::code>(*this); From 4c9635d5bf6ead8d39b2775a6c12c75930732aa3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 12:46:48 +0700 Subject: [PATCH 176/472] Use cpp syntax instead of portable macro wherever possible --- include/nbl/builtin/hlsl/functional.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index f0730a12d2..dc718e5928 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -91,7 +91,7 @@ struct reference_wrapper : enable_if_t< #else // CPP -#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; #endif @@ -136,7 +136,7 @@ ALIAS_STD(divides,/) #ifndef __HLSL_VERSION -template +template struct bit_not : std::bit_not { using type_t = T; @@ -184,11 +184,11 @@ ALIAS_STD(less_equal, <=) }; // GLM doesn't have operators on vectors #ifndef __HLSL_VERSION -#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ -struct NAME ) >\ +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template requires (concepts::Vectorial)\ +struct NAME \ {\ using type_t = T;\ - vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + vector::Dimension> operator()(const T& lhs, const T& rhs)\ {\ return glm::GLM_OP (lhs, rhs);\ }\ From fbfde73761bc84b4a78bed5a8b2ad45aff573cad Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 2 Dec 2025 17:04:02 +0700 Subject: [PATCH 177/472] change quaternion struct name to match what it will be --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 4 ++-- include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index aca8d1ff3c..8d50202f4e 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -15,9 +15,9 @@ namespace math { template -struct quaternion_t +struct quaternion { - using this_t = quaternion_t; + using this_t = quaternion; using scalar_type = T; using data_type = vector; using vector3_type = vector; diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 0c86b69793..c31e194788 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -51,7 +51,7 @@ struct SphericalTriangle { const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]); if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) - C_s += math::quaternion_t::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); + C_s += math::quaternion::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); } vector3_type retval = tri.vertex1; @@ -61,7 +61,7 @@ struct SphericalTriangle { const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) - retval += math::quaternion_t::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); + retval += math::quaternion::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); } return retval; } From 781df861c9ccc2441c08568d79275dfed6de8c47 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 2 Dec 2025 17:13:14 +0700 Subject: [PATCH 178/472] split out new quaternion hlsl stuff over from hlsl path tracer example --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 305 ++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 306 insertions(+) create mode 100644 include/nbl/builtin/hlsl/math/quaternions.hlsl diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl new file mode 100644 index 0000000000..834d41cb54 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -0,0 +1,305 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNIONS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_QUATERNIONS_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/tgmath.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace math +{ + +template +struct truncated_quaternion +{ + using this_t = truncated_quaternion; + using scalar_type = T; + using data_type = vector; + + static this_t create() + { + this_t q; + q.data = data_type(0.0, 0.0, 0.0); + return q; + } + + data_type data; +}; + +template +struct quaternion +{ + using this_t = quaternion; + using scalar_type = T; + using data_type = vector; + using vector3_type = vector; + using matrix_type = matrix; + + using AsUint = typename unsigned_integer_of_size::type; + + static this_t create() + { + this_t q; + q.data = data_type(0.0, 0.0, 0.0, 1.0); + return q; + } + + static this_t create(scalar_type x, scalar_type y, scalar_type z, scalar_type w) + { + this_t q; + q.data = data_type(x, y, z, w); + return q; + } + + static this_t create(NBL_CONST_REF_ARG(this_t) other) + { + return other; + } + + // angle: Rotation angle expressed in radians. + // axis: Rotation axis, must be normalized. + static this_t create(scalar_type angle, const vector3_type axis) + { + this_t q; + const scalar_type sinTheta = hlsl::sin(angle * 0.5); + const scalar_type cosTheta = hlsl::cos(angle * 0.5); + q.data = data_type(axis * sinTheta, cosTheta); + return q; + } + + + static this_t create(scalar_type pitch, scalar_type yaw, scalar_type roll) + { + const scalar_type rollDiv2 = roll * scalar_type(0.5); + const scalar_type sr = hlsl::sin(rollDiv2); + const scalar_type cr = hlsl::cos(rollDiv2); + + const scalar_type pitchDiv2 = pitch * scalar_type(0.5); + const scalar_type sp = hlsl::sin(pitchDiv2); + const scalar_type cp = hlsl::cos(pitchDiv2); + + const scalar_type yawDiv2 = yaw * scalar_type(0.5); + const scalar_type sy = hlsl::sin(yawDiv2); + const scalar_type cy = hlsl::cos(yawDiv2); + + this_t output; + output.data[0] = cr * sp * cy + sr * cp * sy; // x + output.data[1] = cr * cp * sy - sr * sp * cy; // y + output.data[2] = sr * cp * cy - cr * sp * sy; // z + output.data[3] = cr * cp * cy + sr * sp * sy; // w + + return output; + } + + static this_t create(NBL_CONST_REF_ARG(matrix_type) m) + { + const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2]; + const scalar_type neg_m00 = bit_cast(bit_cast(m00)^0x80000000u); + const scalar_type neg_m11 = bit_cast(bit_cast(m11)^0x80000000u); + const scalar_type neg_m22 = bit_cast(bit_cast(m22)^0x80000000u); + const data_type Qx = data_type(m00, m00, neg_m00, neg_m00); + const data_type Qy = data_type(m11, neg_m11, m11, neg_m11); + const data_type Qz = data_type(m22, neg_m22, neg_m22, m22); + + const data_type tmp = hlsl::promote(1.0) + Qx + Qy + Qz; + const data_type invscales = hlsl::promote(0.5) / hlsl::sqrt(tmp); + const data_type scales = tmp * invscales * hlsl::promote(0.5); + + // TODO: speed this up + this_t retval; + if (tmp.x > scalar_type(0.0)) + { + retval.data.x = (m[2][1] - m[1][2]) * invscales.x; + retval.data.y = (m[0][2] - m[2][0]) * invscales.x; + retval.data.z = (m[1][0] - m[0][1]) * invscales.x; + retval.data.w = scales.x; + } + else + { + if (tmp.y > scalar_type(0.0)) + { + retval.data.x = scales.y; + retval.data.y = (m[0][1] + m[1][0]) * invscales.y; + retval.data.z = (m[2][0] + m[0][2]) * invscales.y; + retval.data.w = (m[2][1] - m[1][2]) * invscales.y; + } + else if (tmp.z > scalar_type(0.0)) + { + retval.data.x = (m[0][1] + m[1][0]) * invscales.z; + retval.data.y = scales.z; + retval.data.z = (m[0][2] - m[2][0]) * invscales.z; + retval.data.w = (m[1][2] + m[2][1]) * invscales.z; + } + else + { + retval.data.x = (m[0][2] + m[2][0]) * invscales.w; + retval.data.y = (m[1][2] + m[2][1]) * invscales.w; + retval.data.z = scales.w; + retval.data.w = (m[1][0] - m[0][1]) * invscales.w; + } + } + + retval.data = hlsl::normalize(retval.data); + return retval; + } + + static this_t create(NBL_CONST_REF_ARG(truncated_quaternion) first3Components) + { + this_t retval; + retval.data.xyz = first3Components.data; + retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(first3Components.data, first3Components.data)); + return retval; + } + + this_t operator*(scalar_type scalar) + { + this_t output; + output.data = data * scalar; + return output; + } + + this_t operator*(NBL_CONST_REF_ARG(this_t) other) + { + return this_t::create( + data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z, + data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y, + data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x, + data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w + ); + } + + static this_t lerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle) + { + const AsUint negationMask = hlsl::bit_cast(totalPseudoAngle) & AsUint(0x80000000u); + const data_type adjEnd = hlsl::bit_cast(hlsl::bit_cast(end.data) ^ negationMask); + + this_t retval; + retval.data = hlsl::mix(start.data, adjEnd, fraction); + return retval; + } + + static this_t lerp(const this_t start, const this_t end, const scalar_type fraction) + { + return lerp(start, end, fraction, hlsl::dot(start.data, end.data)); + } + + static scalar_type __adj_interpolant(const scalar_type angle, const scalar_type fraction, const scalar_type interpolantPrecalcTerm2, const scalar_type interpolantPrecalcTerm3) + { + const scalar_type A = scalar_type(1.0904) + angle * (scalar_type(-3.2452) + angle * (scalar_type(3.55645) - angle * scalar_type(1.43519))); + const scalar_type B = scalar_type(0.848013) + angle * (scalar_type(-1.06021) + angle * scalar_type(0.215638)); + const scalar_type k = A * interpolantPrecalcTerm2 + B; + return fraction + interpolantPrecalcTerm3 * k; + } + + static this_t flerp(const this_t start, const this_t end, const scalar_type fraction) + { + const scalar_type pseudoAngle = hlsl::dot(start.data,end.data); + const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5); + const scalar_type interpolantPrecalcTerm3 = fraction * interpolantPrecalcTerm * (fraction - scalar_type(1.0)); + const scalar_type adjFrac = __adj_interpolant(hlsl::abs(pseudoAngle),fraction,interpolantPrecalcTerm*interpolantPrecalcTerm,interpolantPrecalcTerm3); + + this_t retval = lerp(start,end,adjFrac,pseudoAngle); + retval.data = hlsl::normalize(retval.data); + return retval; + } + + vector3_type transformVector(const vector3_type v) + { + scalar_type scale = hlsl::length(data); + vector3_type direction = data.xyz; + return v * scale + hlsl::cross(direction, v * data.w + hlsl::cross(direction, v)) * scalar_type(2.0); + } + + matrix_type constructMatrix() + { + matrix_type mat; + mat[0] = data.yzx * data.ywz + data.zxy * data.zyw * vector3_type( 1.0, 1.0,-1.0); + mat[1] = data.yzx * data.xzw + data.zxy * data.wxz * vector3_type(-1.0, 1.0, 1.0); + mat[2] = data.yzx * data.wyx + data.zxy * data.xwy * vector3_type( 1.0,-1.0, 1.0); + mat[0][0] = scalar_type(0.5) - mat[0][0]; + mat[1][1] = scalar_type(0.5) - mat[1][1]; + mat[2][2] = scalar_type(0.5) - mat[2][2]; + mat *= scalar_type(2.0); + return hlsl::transpose(mat); // TODO: double check transpose? + } + + static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart) + { + vector3_type planeNormal = hlsl::cross(start,preScaledWaypoint); + + cosAngleFromStart *= scalar_type(0.5); + const scalar_type sinAngle = hlsl::sqrt(scalar_type(0.5) - cosAngleFromStart); + const scalar_type cosAngle = hlsl::sqrt(scalar_type(0.5) + cosAngleFromStart); + + planeNormal *= sinAngle; + const vector3_type precompPart = hlsl::cross(planeNormal, start) * scalar_type(2.0); + + return precompPart * cosAngle + hlsl::cross(planeNormal, precompPart); + } + + this_t inverse() + { + this_t retval; + retval.data.x = bit_cast(bit_cast(data.x)^0x80000000u); + retval.data.y = bit_cast(bit_cast(data.y)^0x80000000u); + retval.data.z = bit_cast(bit_cast(data.z)^0x80000000u); + retval.data.w = data.w; + return retval; + } + + static this_t normalize(NBL_CONST_REF_ARG(this_t) q) + { + this_t retval; + retval.data = hlsl::normalize(q.data); + return retval; + } + + data_type data; +}; + +} + +namespace impl +{ + +template +struct static_cast_helper, math::truncated_quaternion > +{ + static inline math::quaternion cast(math::truncated_quaternion q) + { + return math::quaternion::create(q); + } +}; + +template +struct static_cast_helper, math::quaternion > +{ + static inline math::truncated_quaternion cast(math::quaternion q) + { + math::truncated_quaternion t; + t.data.x = t.data.x; + t.data.y = t.data.y; + t.data.z = t.data.z; + return t; + } +}; + +template +struct static_cast_helper, math::quaternion > +{ + static inline matrix cast(math::quaternion q) + { + return q.constructMatrix(); + } +}; +} + +} +} + +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index e8798499f9..37c5d2e43e 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -225,6 +225,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/polar.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/angle_adding.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") From 23292bd47b9ef9b5d1d4bae7f97be0fa19b68b2b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 3 Dec 2025 13:58:02 +0700 Subject: [PATCH 179/472] Fix morton code constraint --- include/nbl/builtin/hlsl/morton.hlsl | 30 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e6deaf5be4..41461a0841 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -108,18 +108,21 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) struct Transcoder { - template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + using decode_t = conditional_t < (Bits > 16), vector, vector >; + + template ) /** * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index * * @param [in] decodedValue Cartesian coordinates to interleave and shift */ - NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(T) decodedValue) { left_shift_operator > leftShift; portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; + // Read this to understand how interleaving and spreading bits works https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ {\ interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\ @@ -137,15 +140,15 @@ struct Transcoder return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); } - template 16), vector, vector > > + template /** * @brief Encodes a vector of cartesian coordinates as a Morton code * * @param [in] decodedValue Cartesian coordinates to encode */ - NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(T) decodedValue) { - const portable_vector_t interleaveShifted = interleaveShift(decodedValue); + const portable_vector_t interleaveShifted = interleaveShift(decodedValue); array_get, encode_t> getter; encode_t encoded = getter(interleaveShifted, 0); @@ -157,8 +160,6 @@ struct Transcoder return encoded; } - template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) /** * @brief Decodes a Morton code back to a vector of cartesian coordinates * @@ -231,7 +232,8 @@ struct Equal NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; - const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + using transcoder_t = Transcoder; + const portable_vector_t interleaved = _static_cast >(transcoder_t::interleaveShift(_static_cast(rhs))); return Equal::template __call(value, interleaved); } }; @@ -281,7 +283,8 @@ struct BaseComparison NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; - const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + using transcoder_t = Transcoder; + const portable_vector_t interleaved = _static_cast >(transcoder_t::interleaveShift(_static_cast(rhs))); return BaseComparison::template __call(value, interleaved); } }; @@ -309,6 +312,8 @@ struct code using this_signed_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; + + using transcoder_t = impl::Transcoder; storage_t value; @@ -326,11 +331,12 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed && sizeof(I) == sizeof(vector_traits::scalar_type)), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - retVal.value = impl::Transcoder::encode(cartesian); + using decode_t = typename transcoder_t::decode_t; + retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; } From 83a83a489cf338d95653653371579e76cb70c6e9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 3 Dec 2025 10:53:07 +0100 Subject: [PATCH 180/472] update ProfileProperties to tightly pack version, type and symmetry, add sample function overload to sampler.hlsl, create include/nbl/builtin/hlsl/ies/texture.hlsl to share texel write method between C++ & HLSL (the correct one including blending and corner sampling) TODO: need to update IES viewer example to compile again and clean bindings a bit (keep only octahedral image to write and sample from + add new ies::Texture::SInfo) --- include/nbl/builtin/hlsl/ies/profile.hlsl | 64 +++++++++++++--- include/nbl/builtin/hlsl/ies/sampler.hlsl | 14 +++- include/nbl/builtin/hlsl/ies/texture.hlsl | 90 +++++++++++++++++++++++ src/nbl/asset/utils/CIESProfile.cpp | 35 +-------- src/nbl/asset/utils/CIESProfile.h | 28 ++----- src/nbl/asset/utils/CIESProfileParser.cpp | 29 +++++--- 6 files changed, 186 insertions(+), 74 deletions(-) create mode 100644 include/nbl/builtin/hlsl/ies/texture.hlsl diff --git a/include/nbl/builtin/hlsl/ies/profile.hlsl b/include/nbl/builtin/hlsl/ies/profile.hlsl index 88e212f069..35210fedda 100644 --- a/include/nbl/builtin/hlsl/ies/profile.hlsl +++ b/include/nbl/builtin/hlsl/ies/profile.hlsl @@ -26,14 +26,25 @@ struct ProfileProperties NBL_CONSTEXPR_STATIC_INLINE float32_t MAX_VANGLE = 180.f; NBL_CONSTEXPR_STATIC_INLINE float32_t MAX_HANGLE = 360.f; - enum Version : uint16_t + // TODO: could change to uint8_t once we get implemented + // https://github.com/microsoft/hlsl-specs/pull/538 + using packed_flags_t = uint16_t; + + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t VERSION_BITS = 2u; + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t TYPE_BITS = 2u; + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t SYMM_BITS = 3u; + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t VERSION_MASK = (packed_flags_t(1u) << VERSION_BITS) - packed_flags_t(1u); + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t TYPE_MASK = (packed_flags_t(1u) << TYPE_BITS) - packed_flags_t(1u); + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t SYMM_MASK = (packed_flags_t(1u) << SYMM_BITS) - packed_flags_t(1u); + + enum Version : packed_flags_t { V_1995, V_2002, V_SIZE }; - enum PhotometricType : uint16_t + enum PhotometricType : packed_flags_t { TYPE_NONE, TYPE_C, @@ -41,7 +52,7 @@ struct ProfileProperties TYPE_A }; - enum LuminairePlanesSymmetry : uint16_t + enum LuminairePlanesSymmetry : packed_flags_t { ISOTROPIC, //! Only one horizontal angle present and a luminaire is assumed to be laterally axial symmetric QUAD_SYMETRIC, //! The luminaire is assumed to be symmetric in each quadrant @@ -50,13 +61,48 @@ struct ProfileProperties NO_LATERAL_SYMMET //! The luminaire is assumed to exhibit no lateral symmet }; - PhotometricType type; - Version version; - LuminairePlanesSymmetry symmetry; + Version getVersion() const + { + return static_cast( packed & VERSION_MASK ); + } + + PhotometricType getType() const + { + const packed_flags_t shift = VERSION_BITS; + return static_cast( (packed >> shift) & TYPE_MASK ); + } + + LuminairePlanesSymmetry getSymmetry() const + { + const packed_flags_t shift = VERSION_BITS + TYPE_BITS; + return static_cast( (packed >> shift) & SYMM_MASK ); + } + + void setVersion(Version v) + { + packed_flags_t vBits = static_cast(v) & VERSION_MASK; + packed = (packed & ~VERSION_MASK) | vBits; + } + + void setType(PhotometricType t) + { + const packed_flags_t shift = VERSION_BITS; + packed_flags_t tBits = (static_cast(t) & TYPE_MASK) << shift; + packed = (packed & ~(TYPE_MASK << shift)) | tBits; + } + + void setSymmetry(LuminairePlanesSymmetry s) + { + const packed_flags_t shift = VERSION_BITS + TYPE_BITS; + packed_flags_t sBits = (static_cast(s) & SYMM_MASK) << shift; + packed = (packed & ~(SYMM_MASK << shift)) | sBits; + } - float32_t maxCandelaValue; //! Max scalar value from candela data vector - float32_t totalEmissionIntegral; //! Total energy emitted - float32_t avgEmmision; //! totalEmissionIntegral / + float32_t maxCandelaValue; //! Max candela sample value + float32_t totalEmissionIntegral; //! Total emitted intensity (integral over full angular domain) + float32_t fullDomainAvgEmission; //! Mean intensity over full angular domain (including I == 0) + float32_t avgEmmision; //! Mean intensity over emitting solid angle (I > 0) + packed_flags_t packed = 0u; //! Packed version, type and symmetry flags }; } diff --git a/include/nbl/builtin/hlsl/ies/sampler.hlsl b/include/nbl/builtin/hlsl/ies/sampler.hlsl index 3f518ff21a..41f273e82c 100644 --- a/include/nbl/builtin/hlsl/ies/sampler.hlsl +++ b/include/nbl/builtin/hlsl/ies/sampler.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/math/polar.hlsl" +#include "nbl/builtin/hlsl/math/octahedral.hlsl" #include "nbl/builtin/hlsl/concepts.hlsl" #include "nbl/builtin/hlsl/ies/profile.hlsl" @@ -37,7 +38,7 @@ NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.vAnglesCount()), is_same_v, req_key_t)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.hAnglesCount()), is_same_v, req_key_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.symmetry()), is_same_v, ProfileProperties::LuminairePlanesSymmetry)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.getProperties()), is_same_v, ProfileProperties)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template vAngle((req_key_t)0)), is_same_v, req_value_t)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template hAngle((req_key_t)0)), is_same_v, req_value_t)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template value((req_key_t2)0)), is_same_v, req_value_t)) @@ -58,11 +59,13 @@ struct CandelaSampler using accessor_t = Accessor; using value_t = typename accessor_t::value_t; using symmetry_t = ProfileProperties::LuminairePlanesSymmetry; + using polar_t = math::Polar; + using octahedral_t = math::OctahedralTransform; static value_t sample(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(math::Polar) polar) { // TODO: DXC seems to have a bug and cannot use symmetry_t directly with == operator https://godbolt.devsh.eu/z/P9Kc5x - const ProfileProperties::LuminairePlanesSymmetry symmetry = accessor.symmetry(); + const ProfileProperties::LuminairePlanesSymmetry symmetry = accessor.getProperties().getSymmetry(); const float32_t vAngle = degrees(polar.theta); const float32_t hAngle = degrees(wrapPhi(polar.phi, symmetry)); @@ -87,6 +90,13 @@ struct CandelaSampler return s0 * (1.f - u) + s1 * u; } + static value_t sample(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(float32_t2) uv) + { + const float32_t3 dir = octahedral_t::uvToDir(uv); + const polar_t polar = polar_t::createFromCartesian(dir); + return sample(accessor, polar); + } + static float32_t wrapPhi(const float32_t phi, const symmetry_t symmetry) { switch (symmetry) diff --git a/include/nbl/builtin/hlsl/ies/texture.hlsl b/include/nbl/builtin/hlsl/ies/texture.hlsl new file mode 100644 index 0000000000..4372bb5544 --- /dev/null +++ b/include/nbl/builtin/hlsl/ies/texture.hlsl @@ -0,0 +1,90 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_IES_TEXTURE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_IES_TEXTURE_INCLUDED_ + +#include "nbl/builtin/hlsl/ies/sampler.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ies +{ + +template) +struct Texture +{ + using accessor_t = Accessor; + using value_t = typename accessor_t::value_t; + using sampler_t = CandelaSampler; + using polar_t = math::Polar; + using octahedral_t = math::OctahedralTransform; + + struct SInfo + { + float32_t2 inv; + float32_t flatten; + float32_t maxValueRecip; + float32_t flattenTarget; + float32_t domainLo; + float32_t domainHi; + bool fullDomainFlatten; + }; + + static SInfo createInfo(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(uint32_t2) size, float32_t flatten, bool fullDomainFlatten) + { + SInfo retval; + const ProfileProperties props = accessor.getProperties(); + + // There is one huge issue, the IES files love to give us values for degrees 0, 90, 180 an 360 + // So standard octahedral mapping won't work, because for above data points you need corner sampled images. + + retval.inv = float32_t2(1.f, 1.f) / float32_t2(size - 1u); + retval.flatten = flatten; + retval.maxValueRecip = 1.0f / props.maxCandelaValue; // Late Optimization TODO: Modify the Max Value for the UNORM texture to be the Max Value after flatten blending + retval.domainLo = radians(accessor.vAngle(0u)); + retval.domainHi = radians(accessor.vAngle(accessor.vAnglesCount() - 1u)); + retval.fullDomainFlatten = fullDomainFlatten; + + if(fullDomainFlatten) + retval.flattenTarget = props.fullDomainAvgEmission; + else + retval.flattenTarget = props.avgEmmision; + + return retval; + } + + static float32_t eval(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(SInfo) info, NBL_CONST_REF_ARG(uint32_t2) position) + { + // We don't currently support generating IES images that exploit symmetries or reduced domains, all are full octahederal mappings of a sphere. + // If we did, we'd rely on MIRROR and CLAMP samplers to do some of the work for us while handling the discontinuity due to corner sampling. + + const float32_t2 uv = float32_t2(position) * info.inv; + const float32_t3 dir = octahedral_t::uvToDir(uv); + const polar_t polar = polar_t::createFromCartesian(dir); + + sampler_t sampler; + const float32_t intensity = sampler.sample(accessor, polar); + + //! blend the IES texture with "flatten" + float32_t blendV = intensity * (1.f - info.flatten); + + const bool inDomain = (info.domainLo <= polar.theta) && (polar.theta <= info.domainHi); + + if ((info.fullDomainFlatten && inDomain) || intensity > 0.0f) + blendV += info.flattenTarget * info.flatten; + + blendV *= info.maxValueRecip; + + return blendV; + } +}; + +} +} +} + +#endif // _NBL_BUILTIN_HLSL_IES_TEXTURE_INCLUDED_ diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index dc66c9693a..b595f5a415 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -2,8 +2,6 @@ #include #include "nbl/asset/filters/CBasicImageFilterCommon.h" -#include "nbl/builtin/hlsl/math/octahedral.hlsl" -#include "nbl/builtin/hlsl/math/polar.hlsl" using namespace nbl; using namespace asset; @@ -70,42 +68,15 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu state.outRange.extent = creationParams.extent; const IImageFilter::IState::ColorValue::WriteMemoryInfo wInfo(creationParams.format, outImg->getBuffer()->getPointer()); + const auto tInfo = texture_t::createInfo(accessor, hlsl::uint32_t2(width, height), flatten, fullDomainFlatten); - // Late Optimization TODO: Modify the Max Value for the UNORM texture to be the Max Value after flatten blending - const auto maxValue = accessor.properties.maxCandelaValue; - const auto maxValueRecip = 1.f / maxValue; - - // There is one huge issue, the IES files love to give us values for degrees 0, 90, 180 an 360 - // So standard octahedral mapping won't work, because for above data points you need corner sampled images. - const float vertInv = 1.0 / (height-1); - const float horiInv = 1.0 / (width-1); - - const double flattenTarget = getAvgEmmision(fullDomainFlatten); - const double domainLo = core::radians(accessor.vAngles.front()); - const double domainHi = core::radians(accessor.vAngles.back()); auto fill = [&](uint32_t blockArrayOffset, core::vectorSIMDu32 position) -> void { - // We don't currently support generating IES images that exploit symmetries or reduced domains, all are full octahederal mappings of a sphere. - // If we did, we'd rely on MIRROR and CLAMP samplers to do some of the work for us while handling the discontinuity due to corner sampling. - - using Octahedral = hlsl::math::OctahedralTransform; - using Polar = hlsl::math::Polar; - const auto uv = Octahedral::vector2_type(position.x * vertInv, position.y * horiInv); - const auto dir = Octahedral::uvToDir(uv); - const auto polar = Polar::createFromCartesian(dir); - const auto intensity = sampler_t::sample(accessor, polar); - - //! blend the IES texture with "flatten" - float blendV = intensity * (1.f - flatten); - if (fullDomainFlatten && domainLo<= polar.theta && polar.theta<=domainHi || intensity >0.0) - blendV += flattenTarget * flatten; - - blendV *= maxValueRecip; + auto texel = texture_t::eval(accessor, tInfo, hlsl::uint32_t2(position.x, position.y)); asset::IImageFilter::IState::ColorValue color; - //asset::encodePixels(color.asDouble, &blendV); TODO: FIX THIS ENCODE, GIVES ARTIFACTS constexpr float UI16_MAX_D = static_cast(std::numeric_limits::max()); - const uint16_t encodeV = static_cast(std::clamp(blendV * UI16_MAX_D + 0.5f, 0.f, UI16_MAX_D)); + const uint16_t encodeV = static_cast(std::clamp(texel * UI16_MAX_D + 0.5f, 0.f, UI16_MAX_D)); // TODO: use asset::encodePixels when its fixed (no artifacts) *color.asUShort = encodeV; color.writeMemory(wInfo, blockArrayOffset); }; diff --git a/src/nbl/asset/utils/CIESProfile.h b/src/nbl/asset/utils/CIESProfile.h index c09f2fd760..2a063e7b15 100644 --- a/src/nbl/asset/utils/CIESProfile.h +++ b/src/nbl/asset/utils/CIESProfile.h @@ -6,7 +6,7 @@ #define __NBL_ASSET_C_IES_PROFILE_H_INCLUDED__ #include "nbl/asset/metadata/CIESProfileMetadata.h" -#include "nbl/builtin/hlsl/ies/sampler.hlsl" +#include "nbl/builtin/hlsl/ies/texture.hlsl" namespace nbl { @@ -15,10 +15,14 @@ namespace asset class CIESProfile { public: + CIESProfile() = default; + ~CIESProfile() = default; + struct properties_t : public nbl::hlsl::ies::ProfileProperties { + using base_t = nbl::hlsl::ies::ProfileProperties; NBL_CONSTEXPR_STATIC_INLINE auto IES_TEXTURE_STORAGE_FORMAT = asset::EF_R16_UNORM; - hlsl::uint32_t2 optimalIESResolution; //! Optimal resolution for IES CDC texture + hlsl::uint32_t2 optimalIESResolution; //! Optimal resolution for IES Octahedral Candela Map texture }; struct accessor_t @@ -45,33 +49,17 @@ class CIESProfile inline key_t vAnglesCount() const { return (key_t)vAngles.size(); } inline key_t hAnglesCount() const { return (key_t)hAngles.size(); } - inline properties_t::LuminairePlanesSymmetry symmetry() const { return properties.symmetry; } + inline const properties_t::base_t& getProperties() const { return static_cast(properties); } core::vector hAngles; //! The angular displacement indegreesfrom straight down, a value represents spherical coordinate "theta" with physics convention. Note that if symmetry is OTHER_HALF_SYMMETRIC then real horizontal angle provided by IES data is (hAngles[index] + 90) - the reason behind it is we patch 1995 IES OTHER_HALF_SYMETRIC case to be HALF_SYMETRIC core::vector vAngles; //! Measurements in degrees of angular displacement measured counterclockwise in a horizontal plane for Type C photometry and clockwise for Type A and B photometry, a value represents spherical coordinate "phi" with physics convention core::vector data; //! Candela scalar values properties_t properties; //! Profile properties }; - - using sampler_t = nbl::hlsl::ies::CandelaSampler; - - CIESProfile() = default; - ~CIESProfile() = default; + using texture_t = nbl::hlsl::ies::Texture; inline const accessor_t& getAccessor() const { return accessor; } - inline hlsl::float32_t getAvgEmmision(const bool fullDomain=false) const - { - if (fullDomain) - { - const float cosLo = std::cos(core::radians(accessor.vAngles.front())); - const float cosHi = std::cos(core::radians(accessor.vAngles.back())); - const float dsinTheta = cosLo - cosHi; - return accessor.properties.totalEmissionIntegral*(0.5/core::PI())/dsinTheta; - } - return accessor.properties.avgEmmision; - } - template core::smart_refctd_ptr createIESTexture(ExecutionPolicy&& policy, const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = properties_t::CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = properties_t::CDC_DEFAULT_TEXTURE_HEIGHT) const; core::smart_refctd_ptr createIESTexture(const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = properties_t::CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = properties_t::CDC_DEFAULT_TEXTURE_HEIGHT) const; diff --git a/src/nbl/asset/utils/CIESProfileParser.cpp b/src/nbl/asset/utils/CIESProfileParser.cpp index 96285b6a6d..4a5bc89969 100644 --- a/src/nbl/asset/utils/CIESProfileParser.cpp +++ b/src/nbl/asset/utils/CIESProfileParser.cpp @@ -101,8 +101,8 @@ bool CIESProfileParser::parse(CIESProfile& result) { CIESProfile::properties_t init; - init.type = type; - init.version = iesVersion; + init.setType(type); + init.setVersion(iesVersion); init.maxCandelaValue = 0.f; init.totalEmissionIntegral = 0.f; init.avgEmmision = 0.f; @@ -142,24 +142,24 @@ bool CIESProfileParser::parse(CIESProfile& result) const auto lastHAngle = hAngles.back(); if (lastHAngle == 0.f) - result.accessor.properties.symmetry = CIESProfile::properties_t::ISOTROPIC; + result.accessor.properties.setSymmetry(CIESProfile::properties_t::ISOTROPIC); else if (lastHAngle == 90.f) { - result.accessor.properties.symmetry = CIESProfile::properties_t::QUAD_SYMETRIC; + result.accessor.properties.setSymmetry(CIESProfile::properties_t::QUAD_SYMETRIC); fluxMultiplier = 4.f; } else if (lastHAngle == 180.f) { - result.accessor.properties.symmetry = CIESProfile::properties_t::HALF_SYMETRIC; + result.accessor.properties.setSymmetry(CIESProfile::properties_t::HALF_SYMETRIC); fluxMultiplier = 2.0; } else if (lastHAngle == 360.f) - result.accessor.properties.symmetry = CIESProfile::properties_t::NO_LATERAL_SYMMET; + result.accessor.properties.setSymmetry(CIESProfile::properties_t::NO_LATERAL_SYMMET); else { - if (firstHAngle == 90.f && lastHAngle == 270.f && result.accessor.properties.version == CIESProfile::properties_t::V_1995) + if (firstHAngle == 90.f && lastHAngle == 270.f && iesVersion == CIESProfile::properties_t::V_1995) { - result.accessor.properties.symmetry = CIESProfile::properties_t::OTHER_HALF_SYMMETRIC; + result.accessor.properties.setSymmetry(CIESProfile::properties_t::OTHER_HALF_SYMMETRIC); fluxMultiplier = 2.f; for (auto& angle : hAngles) @@ -169,6 +169,7 @@ bool CIESProfileParser::parse(CIESProfile& result) return false; } } + const auto symmetry = result.accessor.properties.getSymmetry(); { const double factor = ballastFactor * candelaMultiplier; @@ -181,7 +182,7 @@ bool CIESProfileParser::parse(CIESProfile& result) constexpr auto FULL_SOLID_ANGLE = 4.0f * core::PI(); // TODO: this code could have two separate inner for loops for `result.symmetry != CIESProfile::ISOTROPIC` cases - const auto H_ANGLES_I_RANGE = result.accessor.properties.symmetry != CIESProfile::properties_t::ISOTROPIC ? result.accessor.hAngles.size() - 1 : 1; + const auto H_ANGLES_I_RANGE = symmetry != CIESProfile::properties_t::ISOTROPIC ? result.accessor.hAngles.size() - 1 : 1; const auto V_ANGLES_I_RANGE = result.accessor.vAngles.size() - 1; float smallestRangeSolidAngle = FULL_SOLID_ANGLE; @@ -196,7 +197,7 @@ bool CIESProfileParser::parse(CIESProfile& result) float nonZeroStripDomain = 0.f; for (size_t i = 0; i < H_ANGLES_I_RANGE; i++) { - const float dPhiRad = result.accessor.properties.symmetry != CIESProfile::properties_t::ISOTROPIC ? core::radians(hAngles[i + 1] - hAngles[i]) : (core::PI() * 2.0f); + const float dPhiRad = symmetry != CIESProfile::properties_t::ISOTROPIC ? core::radians(hAngles[i + 1] - hAngles[i]) : (core::PI() * 2.0f); // TODO: in reality one should transform the 4 vertices (or 3) into octahedral map, work out the dUV/dPhi and dUV/dTheta vectors as-if for Anisotropic Filtering // then choose the minor axis length, and use that as a pixel size (since looking for smallest thing, dont have to worry about handling discont) const float solidAngle = dsinTheta * dPhiRad; @@ -206,7 +207,7 @@ bool CIESProfileParser::parse(CIESProfile& result) const auto candelaValue = result.accessor.value(hlsl::uint32_t2(i, j)); // interpolate candela value spanned onto a solid angle - const auto candelaAverage = result.accessor.properties.symmetry != CIESProfile::properties_t::ISOTROPIC ? + const auto candelaAverage = symmetry != CIESProfile::properties_t::ISOTROPIC ? 0.25f * (candelaValue + result.accessor.value(hlsl::uint32_t2(i + 1, j)) + result.accessor.value(hlsl::uint32_t2(i, j + 1)) + result.accessor.value(hlsl::uint32_t2(i + 1, j + 1))) : 0.5f * (candelaValue + result.accessor.value(hlsl::uint32_t2(i, j + 1))); @@ -235,6 +236,12 @@ bool CIESProfileParser::parse(CIESProfile& result) result.accessor.properties.avgEmmision = totalEmissionIntegral / static_cast(nonZeroEmissionDomainSize); result.accessor.properties.totalEmissionIntegral = totalEmissionIntegral * fluxMultiplier; // we use fluxMultiplier to calculate final total emission for case where we have some symmetry between planes (fluxMultiplier is 1.0f if ISOTROPIC or NO_LATERAL_SYMMET because they already have correct total emission integral calculated), also note it doesn't affect average emission at all + { + const float cosLo = std::cos(core::radians(result.accessor.vAngles.front())); + const float cosHi = std::cos(core::radians(result.accessor.vAngles.back())); + const float dsinTheta = cosLo - cosHi; + result.accessor.properties.fullDomainAvgEmission = result.accessor.properties.totalEmissionIntegral*(0.5f/core::PI())/dsinTheta; + } return !error; } \ No newline at end of file From b855e1c0761cdd4992f2a163e72aeb3aaddc8327 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 3 Dec 2025 15:03:59 +0100 Subject: [PATCH 181/472] NBL_HLSL_DEFINE_STRUCT for IESTextureInfo, update ies/profile.hlsl casts & use NBL_CONST_MEMBER_FUNC, update examples_tests submodule --- examples_tests | 2 +- include/nbl/builtin/hlsl/ies/profile.hlsl | 20 +++++++------- include/nbl/builtin/hlsl/ies/texture.hlsl | 33 ++++++++++++++--------- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/examples_tests b/examples_tests index 9c83531c63..92784f38d7 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 9c83531c63490bf743dee9bddcfbd5d729e1c916 +Subproject commit 92784f38d708b3577cfdff39341cd519052cfe9d diff --git a/include/nbl/builtin/hlsl/ies/profile.hlsl b/include/nbl/builtin/hlsl/ies/profile.hlsl index 35210fedda..50c370947e 100644 --- a/include/nbl/builtin/hlsl/ies/profile.hlsl +++ b/include/nbl/builtin/hlsl/ies/profile.hlsl @@ -61,40 +61,40 @@ struct ProfileProperties NO_LATERAL_SYMMET //! The luminaire is assumed to exhibit no lateral symmet }; - Version getVersion() const + Version getVersion() NBL_CONST_MEMBER_FUNC { - return static_cast( packed & VERSION_MASK ); + return (Version)( packed & VERSION_MASK ); } - PhotometricType getType() const + PhotometricType getType() NBL_CONST_MEMBER_FUNC { const packed_flags_t shift = VERSION_BITS; - return static_cast( (packed >> shift) & TYPE_MASK ); + return (PhotometricType)((packed >> shift) & TYPE_MASK); } - LuminairePlanesSymmetry getSymmetry() const + LuminairePlanesSymmetry getSymmetry() NBL_CONST_MEMBER_FUNC { const packed_flags_t shift = VERSION_BITS + TYPE_BITS; - return static_cast( (packed >> shift) & SYMM_MASK ); + return (LuminairePlanesSymmetry)((packed >> shift) & SYMM_MASK); } void setVersion(Version v) { - packed_flags_t vBits = static_cast(v) & VERSION_MASK; + packed_flags_t vBits = (packed_flags_t)(v) & VERSION_MASK; packed = (packed & ~VERSION_MASK) | vBits; } void setType(PhotometricType t) { const packed_flags_t shift = VERSION_BITS; - packed_flags_t tBits = (static_cast(t) & TYPE_MASK) << shift; + packed_flags_t tBits = ((packed_flags_t)(t) & TYPE_MASK) << shift; packed = (packed & ~(TYPE_MASK << shift)) | tBits; } void setSymmetry(LuminairePlanesSymmetry s) { const packed_flags_t shift = VERSION_BITS + TYPE_BITS; - packed_flags_t sBits = (static_cast(s) & SYMM_MASK) << shift; + packed_flags_t sBits = ((packed_flags_t)(s) & SYMM_MASK) << shift; packed = (packed & ~(SYMM_MASK << shift)) | sBits; } @@ -102,7 +102,7 @@ struct ProfileProperties float32_t totalEmissionIntegral; //! Total emitted intensity (integral over full angular domain) float32_t fullDomainAvgEmission; //! Mean intensity over full angular domain (including I == 0) float32_t avgEmmision; //! Mean intensity over emitting solid angle (I > 0) - packed_flags_t packed = 0u; //! Packed version, type and symmetry flags + packed_flags_t packed; //! Packed version, type and symmetry flags }; } diff --git a/include/nbl/builtin/hlsl/ies/texture.hlsl b/include/nbl/builtin/hlsl/ies/texture.hlsl index 4372bb5544..7f02290506 100644 --- a/include/nbl/builtin/hlsl/ies/texture.hlsl +++ b/include/nbl/builtin/hlsl/ies/texture.hlsl @@ -6,11 +6,28 @@ #define _NBL_BUILTIN_HLSL_IES_TEXTURE_INCLUDED_ #include "nbl/builtin/hlsl/ies/sampler.hlsl" +#include "nbl/builtin/hlsl/bda/struct_declare.hlsl" namespace nbl { namespace hlsl { + +// TODO(?): should be in nbl::hlsl::ies (or in the Texutre struct) but I get +// error GA3909C62: class template specialization of 'member_count' not in a namespace enclosing 'bda' +// which I don't want to deal with rn to not (eventually) break stuff + +struct IESTextureInfo; +NBL_HLSL_DEFINE_STRUCT((IESTextureInfo), + ((inv, float32_t2)) + ((flatten, float32_t)) + ((maxValueRecip, float32_t)) + ((flattenTarget, float32_t)) + ((domainLo, float32_t)) + ((domainHi, float32_t)) + ((fullDomainFlatten, uint16_t)) // bool +); + namespace ies { @@ -22,19 +39,9 @@ struct Texture using sampler_t = CandelaSampler; using polar_t = math::Polar; using octahedral_t = math::OctahedralTransform; + using SInfo = nbl::hlsl::IESTextureInfo; - struct SInfo - { - float32_t2 inv; - float32_t flatten; - float32_t maxValueRecip; - float32_t flattenTarget; - float32_t domainLo; - float32_t domainHi; - bool fullDomainFlatten; - }; - - static SInfo createInfo(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(uint32_t2) size, float32_t flatten, bool fullDomainFlatten) + static inline SInfo createInfo(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(uint32_t2) size, float32_t flatten, bool fullDomainFlatten) { SInfo retval; const ProfileProperties props = accessor.getProperties(); @@ -57,7 +64,7 @@ struct Texture return retval; } - static float32_t eval(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(SInfo) info, NBL_CONST_REF_ARG(uint32_t2) position) + static inline float32_t eval(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(SInfo) info, NBL_CONST_REF_ARG(uint32_t2) position) { // We don't currently support generating IES images that exploit symmetries or reduced domains, all are full octahederal mappings of a sphere. // If we did, we'd rely on MIRROR and CLAMP samplers to do some of the work for us while handling the discontinuity due to corner sampling. From 33a324721ab1282432b2a0e854a579358092417b Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Wed, 3 Dec 2025 15:11:21 +0100 Subject: [PATCH 182/472] Make sure NBL_VALID_EXPRESSION works outside the `nbl::hlsl` namespace --- include/nbl/builtin/hlsl/concepts.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts.hlsl b/include/nbl/builtin/hlsl/concepts.hlsl index 6e0f380d01..3c40b3e6c6 100644 --- a/include/nbl/builtin/hlsl/concepts.hlsl +++ b/include/nbl/builtin/hlsl/concepts.hlsl @@ -128,9 +128,9 @@ NBL_CONSTEXPR bool NBL_CONCEPT_NAME = BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_CONCEPT_E namespace impl\ {\ template\ -struct CONCEPT_NAME : false_type {};\ +struct CONCEPT_NAME : ::nbl::hlsl::false_type {};\ template\ -struct CONCEPT_NAME > : true_type {};\ +struct CONCEPT_NAME > : ::nbl::hlsl::true_type {};\ }\ template\ NBL_BOOL_CONCEPT CONCEPT_NAME = impl::CONCEPT_NAME::value\ @@ -139,4 +139,4 @@ NBL_BOOL_CONCEPT CONCEPT_NAME = impl::CONCEPT_NAME Date: Wed, 3 Dec 2025 16:14:03 +0100 Subject: [PATCH 183/472] "Fix" `SelectIsCallable` its really HLSL's fault as a language --- include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index a7614469dd..b71558c49d 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -382,7 +382,8 @@ template && (!conc [[vk::ext_instruction(spv::OpSelect)]] T select(U a, T x, T y); -NBL_VALID_EXPRESSION(SelectIsCallable, (T)(U), select(experimental::declval(),experimental::declval(),experimental::declval())); +// need to use `spirv::` even in the namespace because it matches the HLSL intrinsic which is not namespaced at all, and will happily match anything +NBL_VALID_EXPRESSION(SelectIsCallable, (T)(U), spirv::select(experimental::declval(),experimental::declval(),experimental::declval())); } From 5da522e319acdf93c9f0bfd581791e2c25826354 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 3 Dec 2025 23:17:45 +0700 Subject: [PATCH 184/472] Add assert in morton code creation --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 ++ include/nbl/builtin/hlsl/morton.hlsl | 35 +++++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index a5715efa15..3ca499c567 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -21,6 +21,7 @@ #define NBL_CONSTEXPR_OOL_MEMBER constexpr #define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) +#define NBL_ASSERT(...) assert(__VA_ARGS__) namespace nbl::hlsl { @@ -57,6 +58,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_OOL_MEMBER const #define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) +#define NBL_ASSERT(...) namespace nbl { diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 41461a0841..9ee59b7e78 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -25,7 +25,31 @@ namespace impl template NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; -// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- +template && concepts::Scalar) +NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val) +{ + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = (~((T(1) << Bits) - 1)); + const bool allZero = ((val & mask) == 0); + NBL_IF_CONSTEXPR(is_signed_v) + { + const bool allOne = ((val & mask) == mask); + return allZero || allOne; + } + return allZero; +} + +template && concepts::Scalar) +NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegralVec(vector vec) +{ + array_get, T> getter; + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + if (!verifyAnyBitIntegral(getter(vec, i))) return false; + return true; +} + + +// --------------------------------------------------------- MORTON ENCOE/DECODE MASKS --------------------------------------------------- NBL_CONSTEXPR uint16_t CodingStages = 5; @@ -108,7 +132,8 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) struct Transcoder { - using decode_t = conditional_t < (Bits > 16), vector, vector >; + using decode_component_t = conditional_t<(Bits > 16), uint32_t, uint16_t>; + using decode_t = vector; template ) @@ -314,6 +339,9 @@ struct code using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; using transcoder_t = impl::Transcoder; + using decode_component_t = conditional_t, + typename transcoder_t::decode_component_t>; storage_t value; @@ -331,10 +359,11 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed && sizeof(I) == sizeof(vector_traits::scalar_type)), this_t> + NBL_CONSTEXPR_STATIC enable_if_t , this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; + NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian) == true)); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; From 812ae7b580ef4e283dc8b8c45e331a5ac85f08d9 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 3 Dec 2025 23:30:37 +0700 Subject: [PATCH 185/472] Fix is_emulating concepts --- include/nbl/builtin/hlsl/concepts/core.hlsl | 8 ++++---- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index 4a8b848cb8..e3ff3f611f 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -72,23 +72,23 @@ namespace impl template struct is_emulating_floating_point_scalar { - NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; + NBL_CONSTEXPR_STATIC_INLINE bool value = false; }; template struct is_emulating_integral_scalar { - NBL_CONSTEXPR_STATIC_INLINE bool value = IntegralScalar; + NBL_CONSTEXPR_STATIC_INLINE bool value = false; }; } //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) template -NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; +NBL_BOOL_CONCEPT FloatingPointLikeScalar = FloatingPointScalar || impl::is_emulating_floating_point_scalar::value; //! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t) template -NBL_BOOL_CONCEPT IntegralLikeScalar = impl::is_emulating_integral_scalar::value; +NBL_BOOL_CONCEPT IntegralLikeScalar = IntegralScalar || impl::is_emulating_integral_scalar::value; } } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9ee59b7e78..67e83f6169 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -28,7 +28,7 @@ NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; template && concepts::Scalar) NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val) { - NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = (~((T(1) << Bits) - 1)); + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((T(1) << Bits) - 1); const bool allZero = ((val & mask) == 0); NBL_IF_CONSTEXPR(is_signed_v) { From 341d6cd033969efcd214a2bb495d1612a591eb14 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:10:17 +0700 Subject: [PATCH 186/472] Move storage_t to common_inc --- include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl | 2 +- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl index 2dd7bafa41..3818814a49 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl @@ -1,4 +1,4 @@ - +using storage_t = vector; storage_t data; /** diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 0b890fb2b2..b44709bc01 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -19,7 +19,6 @@ struct emulated_int64_t; struct emulated_uint64_t { - using storage_t = vector; using this_t = emulated_uint64_t; NBL_CONSTEXPR_STATIC_INLINE bool Signed = false; @@ -38,7 +37,6 @@ struct emulated_uint64_t struct emulated_int64_t { - using storage_t = vector; using this_t = emulated_int64_t; NBL_CONSTEXPR_STATIC_INLINE bool Signed = true; From 2fd2cbaaedea4d20233d0869d8f2e8125398b46f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:10:49 +0700 Subject: [PATCH 187/472] Rename ImitationIntegral64Scalar to EmulatedIntegral64Scalar --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index b44709bc01..4c950859e6 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -68,7 +68,7 @@ namespace concepts { template -NBL_BOOL_CONCEPT ImitationIntegral64Scalar = same_as || same_as; +NBL_BOOL_CONCEPT EmulatedIntegralScalar64 = same_as || same_as; namespace impl { @@ -93,8 +93,8 @@ struct is_emulating_integral_scalar namespace impl { -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar && concepts::ImitationIntegral64Scalar && !concepts::same_as) -struct static_cast_helper && concepts::ImitationIntegral64Scalar && !concepts::same_as) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64 && concepts::EmulatedIntegralScalar64 && !concepts::same_as) +struct static_cast_helper && concepts::EmulatedIntegralScalar64 && !concepts::same_as) > { NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) @@ -105,8 +105,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { // Return only the lowest bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) @@ -115,8 +115,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { @@ -124,8 +124,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { // Set only lower bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) @@ -134,8 +134,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { @@ -188,8 +188,8 @@ constexpr emulated_uint64_t::operator I() const noexcept // ---------------------- Functional operators ------------------------ -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct left_shift_operator) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct left_shift_operator) > { using type_t = T; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); @@ -302,8 +302,8 @@ constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) co // Specializations of the structs found in functional.hlsl // These all have to be specialized because of the identity that can't be initialized inside the struct definition -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct plus) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct plus) > { using type_t = T; @@ -315,8 +315,8 @@ struct plus) > const static type_t identity; }; -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct minus) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct minus) > { using type_t = T; @@ -340,8 +340,8 @@ NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus::id // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct plus_assign) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct plus_assign) > { using type_t = T; using base_t = plus; @@ -354,8 +354,8 @@ struct plus_assign) const static type_t identity; }; -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct minus_assign) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct minus_assign) > { using type_t = T; using base_t = minus; From 1255d1c30e9f2f1b36c3f71bdad1ff26b5488038 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:21:36 +0700 Subject: [PATCH 188/472] Fix extent and remove duplicated extent specialization --- include/nbl/builtin/hlsl/type_traits.hlsl | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index b682b8da8b..257a753129 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -732,11 +732,11 @@ struct extent : integral_constant::value> {}; template struct extent : integral_constant::value> {}; -template -struct extent, 0> : integral_constant {}; +template +struct extent, I> : extent {}; template -struct extent, I> : integral_constant::value> {}; +struct extent, I> : extent {}; // Template Variables @@ -855,12 +855,6 @@ struct float_of_size<8> template using float_of_size_t = typename float_of_size::type; -template -struct extent, 0> : integral_constant {}; - -template -struct extent, 1> : integral_constant {}; - } } From 527129fa79399008977dfe730cf2d2ed11873fd1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:40:55 +0700 Subject: [PATCH 189/472] Remove redundant extent --- include/nbl/builtin/hlsl/concepts/vector.hlsl | 4 ---- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl index 3ea3199951..f132531cb9 100644 --- a/include/nbl/builtin/hlsl/concepts/vector.hlsl +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -46,10 +46,6 @@ NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::Signed } -template -NBL_PARTIAL_REQ_TOP(concepts::Vectorial) -struct extent) > : integral_constant::Dimension> {}; - } } #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index f153fb1062..82a1360b49 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -627,6 +627,8 @@ NBL_EMULATED_VEC_TRUNCATION(4, 4) } //namespace impl +template +struct extent, I> : extent {}; } } #endif \ No newline at end of file From ed696efd6ea9e715591c2b2d7e98e7f0f1a1eada Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 01:24:34 +0700 Subject: [PATCH 190/472] Fix unary_minus_operator --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 27 ++++--------------- include/nbl/builtin/hlsl/functional.hlsl | 12 ++++++++- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 4c950859e6..30c23d8693 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -51,7 +51,11 @@ struct emulated_int64_t constexpr explicit emulated_int64_t(const emulated_uint64_t& other); #endif - NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC; + NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC + { + storage_t inverted = ~data; + return create(_static_cast(inverted)) + _static_cast(1); + } }; @@ -377,27 +381,6 @@ NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus_assign NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus_assign::identity = minus::identity; -// --------------------------------- Unary operators ------------------------------------------ -// Specializations of the structs found in functional.hlsl -template<> -struct unary_minus_operator -{ - using type_t = emulated_int64_t; - - NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand) - { - using storage_t = type_t::storage_t; - storage_t inverted = ~operand.data; - return type_t::create(_static_cast(inverted)) + _static_cast(1); - } -}; - -NBL_CONSTEXPR_INLINE_FUNC emulated_int64_t emulated_int64_t::operator-() NBL_CONST_MEMBER_FUNC -{ - unary_minus_operator unaryMinus; - return unaryMinus(NBL_DEREF_THIS); -} - } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index dc718e5928..4d5889fe05 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -495,7 +495,17 @@ struct unary_minus_operator NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) { - return -operand; + return operand.operator-(); + } +}; + +template NBL_PARTIAL_REQ_TOP(is_fundamental_v) +struct unary_minus_operator) > +{ + using type_t = T; + NBL_CONSTEXPR_FUNC T operator()(const T operand) + { + return -operand; } }; From 4da1fb8ea99a06b39b4bb2c85c534bf538c3e78b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 01:24:58 +0700 Subject: [PATCH 191/472] Fix redundant extent specialization --- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 82a1360b49..4eb8b7bf06 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -491,6 +491,15 @@ DEFINE_SCALAR_OF_SPECIALIZATION(3) DEFINE_SCALAR_OF_SPECIALIZATION(4) #undef DEFINE_SCALAR_OF_SPECIALIZATION +#define DEFINE_EXTENT_SPECIALIZATION(DIMENSION)\ +template\ +struct extent, I> : extent {}; + +DEFINE_EXTENT_SPECIALIZATION(2) +DEFINE_EXTENT_SPECIALIZATION(3) +DEFINE_EXTENT_SPECIALIZATION(4) +#undef DEFINE_EXTENT_SPECIALIZATION + namespace impl { template @@ -627,8 +636,6 @@ NBL_EMULATED_VEC_TRUNCATION(4, 4) } //namespace impl -template -struct extent, I> : extent {}; } } #endif \ No newline at end of file From 14636d30b1f08c0f87a25dea12e2c900ad726981 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 3 Dec 2025 23:29:35 +0300 Subject: [PATCH 192/472] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 1508702f27..f18160276e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1508702f27dbd4c7fa9642e26b1047b0cd8889c9 +Subproject commit f18160276e78f860f64c45111c874e3351b44ffb From 402b8231a3c53090cfa5db751ed63fd2e328473f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 07:40:54 +0700 Subject: [PATCH 193/472] Replace [[unroll]] with NBL_UNROLL --- include/nbl/builtin/hlsl/morton.hlsl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 67e83f6169..d7a781fad9 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -42,7 +42,7 @@ template vec) { array_get, T> getter; - [[unroll]] + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) if (!verifyAnyBitIntegral(getter(vec, i))) return false; return true; @@ -178,7 +178,7 @@ struct Transcoder array_get, encode_t> getter; encode_t encoded = getter(interleaveShifted, 0); - [[unroll]] + NBL_UNROLL for (uint16_t i = 1; i < Dim; i++) encoded = encoded | getter(interleaveShifted, i); @@ -196,7 +196,7 @@ struct Transcoder portable_vector_t decoded; array_set, encode_t> setter; // Write initial values into decoded - [[unroll]] + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) setter(decoded, i, encodedRightShift(encodedValue, i)); @@ -363,7 +363,7 @@ struct code create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian) == true)); + NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian))); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; @@ -466,7 +466,7 @@ struct code array_get, storage_t> getter; this_t retVal; retVal.value = getter(interleaveShiftedResult, 0); - [[unroll]] + NBL_UNROLL for (uint16_t i = 1; i < D; i++) retVal.value = retVal.value | getter(interleaveShiftedResult, i); return retVal; @@ -486,7 +486,7 @@ struct code array_get, storage_t> getter; this_t retVal; retVal.value = getter(interleaveShiftedResult, 0); - [[unroll]] + NBL_UNROLL for (uint16_t i = 1; i < D; i++) retVal.value = retVal.value | getter(interleaveShiftedResult, i); From 99b25ffd97c9b5ace8213d5c5ca334361ea3ff75 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 08:18:24 +0700 Subject: [PATCH 194/472] Allow promote to work between emulated type and non emulated type --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 9f2b58047f..6a8476e644 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -21,8 +21,7 @@ struct Promote } }; -// TODO(kevinyu): Should we enable truncation from uint64_t to emulated_vector? -template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && is_scalar_v && is_same_v::scalar_type, From>) +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > { NBL_CONSTEXPR_FUNC To operator()(const From v) From 37d5c5a776b6667d08262374e3ee849e649942e6 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 4 Dec 2025 11:17:02 +0700 Subject: [PATCH 195/472] removed temp fix for mix_helper require --- include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 0c595bb0e2..cd89ce45d1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -256,8 +256,8 @@ struct mix_helper) > }; template -NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable && concepts::Boolean) -struct mix_helper && concepts::Boolean) > +NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable) +struct mix_helper) > { using return_t = conditional_t, vector::scalar_type, vector_traits::Dimension>, T>; // for a component of a that is false, the corresponding component of x is returned From 70a88fa975b91bad0d141e30b9b5ee9476c59f29 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 13:54:11 +0700 Subject: [PATCH 196/472] Refactor unary operator in hlsl functionals --- include/nbl/builtin/hlsl/functional.hlsl | 82 +++++++++--------------- 1 file changed, 29 insertions(+), 53 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 4d5889fe05..da416a538f 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -134,41 +134,6 @@ ALIAS_STD(divides,/) NBL_CONSTEXPR_STATIC_INLINE T identity = T(1); }; -#ifndef __HLSL_VERSION - -template -struct bit_not : std::bit_not -{ - using type_t = T; -}; - -#else - -template -struct bit_not -{ - using type_t = T; - - T operator()(NBL_CONST_REF_ARG(T) operand) - { - return ~operand; - } -}; - -// The default version above only works for fundamental scalars, vectors and matrices. This is because you can't call `~x` unless `x` is one of the former. -// Similarly, calling `x.operator~()` is not valid for the aforementioned, and only for types overriding this operator. So, we need a specialization. -template NBL_PARTIAL_REQ_TOP(!(concepts::Scalar || concepts::Vector || concepts::Matrix)) -struct bit_not || concepts::Vector || concepts::Matrix)) > -{ - using type_t = T; - - T operator()(NBL_CONST_REF_ARG(T) operand) - { - return operand.operator~(); - } -}; - -#endif ALIAS_STD(equal_to, ==) }; ALIAS_STD(not_equal_to, !=) }; @@ -488,27 +453,38 @@ struct logical_right_shift_operator }; // ----------------------------------------------------------------- UNARY OPERATORS -------------------------------------------------------------------- -template -struct unary_minus_operator -{ - using type_t = T; - - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) - { - return operand.operator-(); - } +#ifndef __HLSL_VERSION +#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template \ +struct NAME : std::NAME { \ + using type_t = T; \ }; +#else +#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template \ +struct NAME \ +{ \ + using type_t = T; \ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) \ + { \ + return operand.operator OP(); \ + } \ +}; \ +template NBL_PARTIAL_REQ_TOP(concepts::Scalar || concepts::Vector || concepts::Matrix ) \ +struct NAME || concepts::Vector || concepts::Matrix ) > \ +{ \ + using type_t = T; \ + NBL_CONSTEXPR_FUNC T operator()(const T operand) \ + { \ + return (OP operand); \ + } \ +}; +#endif + +NBL_UNARY_OP_SPECIALIZATION(bit_not, ~) +NBL_UNARY_OP_SPECIALIZATION(negate, -) -template NBL_PARTIAL_REQ_TOP(is_fundamental_v) -struct unary_minus_operator) > -{ - using type_t = T; - NBL_CONSTEXPR_FUNC T operator()(const T operand) - { - return -operand; - } -}; + +#endif } //namespace nbl } //namespace hlsl From ded5d8fcd8aa348b9934f02a786b740d68c5b7a7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 15:19:21 +0700 Subject: [PATCH 197/472] Fix misplaced #endif in functional.hlsl --- include/nbl/builtin/hlsl/functional.hlsl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index da416a538f..757ad7294d 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -482,10 +482,7 @@ struct NAME || concepts::Vector || NBL_UNARY_OP_SPECIALIZATION(bit_not, ~) NBL_UNARY_OP_SPECIALIZATION(negate, -) - - -#endif } //namespace nbl } //namespace hlsl -#endif \ No newline at end of file +#endif From 17d07177d87f38e7beff6cf8881e548ee670aa29 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 17:50:22 +0700 Subject: [PATCH 198/472] Fix ternary_operation --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 10 +++--- include/nbl/builtin/hlsl/functional.hlsl | 33 +++++++++++++++---- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 30c23d8693..1324998d1a 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -208,8 +208,7 @@ struct left_shift_operator(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - ternary_operator ternary; - return ternary(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined @@ -235,8 +234,8 @@ struct arithmetic_right_shift_operator const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); - ternary_operator ternary; - return ternary(bool(bits), shifted, operand); + + return select(bool(bits), shifted, operand); } // If `_bits > 63` the result is undefined @@ -262,8 +261,7 @@ struct arithmetic_right_shift_operator const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); - ternary_operator ternary; - return ternary(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 757ad7294d..7531c5cdb9 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -235,16 +235,35 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; -template +#ifndef __HLSL_VERSION +template requires(is_same_v, std::invoke_result_t()> ) struct ternary_operator { - using type_t = T; - - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select(condition, lhs, rhs); - } + using type_t = std::invoke_result_t; + + constexpr inline type_t operator()(const bool condition, const F1& lhs, const F2& rhs) + { + if (condition) + return std::invoke(lhs); + else + return std::invoke(rhs); + } }; +#else +template()()),decltype(experimental::declval()())> ) +struct ternary_operator +{ + using type_t = decltype(experimental::declval().operator()); + + NBL_CONSTEXPR_FUNC type_t operator()(const bool condition, NBL_CONST_REF_ARG(F1) lhs, NBL_CONST_REF_ARG(F2) rhs) + { + if (condition) + return lhs(); + else + return rhs(); + } +}; +#endif // ----------------------------------------------------------------- SHIFT OPERATORS -------------------------------------------------------------------- From 791b2b917d785f51616a13ea24510a5ae30b602e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 17:59:08 +0700 Subject: [PATCH 199/472] Improve some comment --- include/nbl/builtin/hlsl/morton.hlsl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d7a781fad9..e72ec9a76b 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -59,9 +59,7 @@ struct coding_mask; template NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); -// It's a complete cointoss whether template variables work or not, since it's a C++14 feature (not supported in HLSL2021). Most of the ones we use in Nabla work, -// but this one will only work for some parameters and not for others. Therefore, this was made into a macro to inline where used - +// constexpr vector is not supported since it is not a fundamental type, which means it cannot be stored or leaked outside of constexpr context, it can only exist transiently. So the only way to return vector is to make the function consteval. Thus, we use macro to inline where it is used. #define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast >(\ truncate >(\ vector(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\ From 8f548f6ba32baba77c76996124ad13a1680d78e3 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 4 Dec 2025 13:23:42 +0100 Subject: [PATCH 200/472] Updated DXC --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index 1e5414bcc2..ecd3f93521 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit 1e5414bcc21b002d795f97075dff63e387fc668f +Subproject commit ecd3f93521f1aceabff64b14857f47f9a32c9958 From ac2070e0998a977d87ac524412f63efa6f560ea4 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 19:38:54 +0700 Subject: [PATCH 201/472] Remove NBL_ASSERT --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 -- include/nbl/builtin/hlsl/morton.hlsl | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 3ca499c567..a5715efa15 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -21,7 +21,6 @@ #define NBL_CONSTEXPR_OOL_MEMBER constexpr #define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) -#define NBL_ASSERT(...) assert(__VA_ARGS__) namespace nbl::hlsl { @@ -58,7 +57,6 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_OOL_MEMBER const #define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) -#define NBL_ASSERT(...) namespace nbl { diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e72ec9a76b..08b2b1ccfb 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -362,6 +362,7 @@ struct code { this_t retVal; NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian))); + assert((impl::verifyAnyBitIntegralVec(cartesian))); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; From a4dabdf9f267c93da1a340aa51600ed9443004e2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 20:45:21 +0700 Subject: [PATCH 202/472] Simplify mix helper by using select_helper in some specialization --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 50 ++----------------- 1 file changed, 3 insertions(+), 47 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 7850fd7cf3..67a9f67d8f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -268,20 +268,6 @@ struct mix_helper) > } }; -template -NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable) -struct mix_helper) > -{ - using return_t = conditional_t, vector::scalar_type, vector_traits::Dimension>, T>; - // for a component of a that is false, the corresponding component of x is returned - // for a component of a that is true, the corresponding component of y is returned - // so we make sure this is correct when calling the operation - static inline return_t __call(const T x, const T y, const U a) - { - return spirv::select(a, y, x); - } -}; - template NBL_PARTIAL_REQ_TOP(matrix_traits::Square) struct determinant_helper::Square) > { @@ -980,43 +966,13 @@ struct mix_helper -NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins && concepts::BooleanScalar) -struct mix_helper && concepts::BooleanScalar) > +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && concepts::BooleanScalar) +struct mix_helper && concepts::BooleanScalar) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) { - using traitsT = hlsl::vector_traits; - array_get getterT; - array_set setter; - - return_t output; - for (uint32_t i = 0; i < traitsT::Dimension; ++i) - setter(output, i, mix_helper::__call(getterT(x, i), getterT(y, i), a)); - - return output; - } -}; - -template -NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins && concepts::Boolean && concepts::Vectorial && vector_traits::Dimension == vector_traits::Dimension) -struct mix_helper && concepts::Boolean && concepts::Vectorial && vector_traits::Dimension == vector_traits::Dimension) > -{ - using return_t = T; - static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) - { - using traitsT = hlsl::vector_traits; - using traitsU = hlsl::vector_traits; - array_get getterT; - array_get getterU; - array_set setter; - - return_t output; - for (uint32_t i = 0; i < traitsT::Dimension; ++i) - setter(output, i, mix_helper::__call(getterT(x, i), getterT(y, i), getterU(a, i))); - - return output; + return select_helper(a, y, x); } }; From 2c5974e994203aa00c6755e7a636a9142336e695 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 4 Dec 2025 15:32:18 +0100 Subject: [PATCH 203/472] fix optimal resolution bug, update examples_tests submodule --- examples_tests | 2 +- include/nbl/builtin/hlsl/ies/profile.hlsl | 5 +++++ src/nbl/asset/utils/CIESProfile.cpp | 7 ++----- src/nbl/asset/utils/CIESProfileParser.cpp | 4 +++- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/examples_tests b/examples_tests index 92784f38d7..49b18aa4ec 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 92784f38d708b3577cfdff39341cd519052cfe9d +Subproject commit 49b18aa4ec453fb53cf9bab6f28f83860818ff67 diff --git a/include/nbl/builtin/hlsl/ies/profile.hlsl b/include/nbl/builtin/hlsl/ies/profile.hlsl index 50c370947e..a85141aebd 100644 --- a/include/nbl/builtin/hlsl/ies/profile.hlsl +++ b/include/nbl/builtin/hlsl/ies/profile.hlsl @@ -20,6 +20,11 @@ struct ProfileProperties NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MAX_TEXTURE_WIDTH = 15360u; NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MAX_TEXTURE_HEIGHT = 8640u; + // TODO: This constraint is hack because the mitsuba loader and its material compiler use Virtual Texturing, and there's some bug with IES not sampling sub 128x128 mip levels + // don't want to spend time to fix this since we'll be using descriptor indexing for the next iteration + NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MIN_TEXTURE_WIDTH = 128u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MIN_TEXTURE_HEIGHT = 128u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_DEFAULT_TEXTURE_WIDTH = 1024u; NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_DEFAULT_TEXTURE_HEIGHT = 1024u; diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index b595f5a415..d4289191f6 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -19,11 +19,8 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu height = properties_t::CDC_MAX_TEXTURE_HEIGHT; // TODO: If no symmetry (no folding in half and abuse of mirror sampler) make dimensions odd-sized so middle texel taps the south pole - - // TODO: This is hack because the mitsuba loader and its material compiler use Virtual Texturing, and there's some bug with IES not sampling sub 128x128 mip levels - // don't want to spend time to fix this since we'll be using descriptor indexing for the next iteration - width = core::max(width,128); - height = core::max(height,128); + width = core::max(width,properties_t::CDC_MIN_TEXTURE_WIDTH); + height = core::max(height,properties_t::CDC_MIN_TEXTURE_HEIGHT); asset::ICPUImage::SCreationParams imgInfo; imgInfo.type = asset::ICPUImage::ET_2D; diff --git a/src/nbl/asset/utils/CIESProfileParser.cpp b/src/nbl/asset/utils/CIESProfileParser.cpp index 4a5bc89969..6d9bf1ea32 100644 --- a/src/nbl/asset/utils/CIESProfileParser.cpp +++ b/src/nbl/asset/utils/CIESProfileParser.cpp @@ -226,7 +226,9 @@ bool CIESProfileParser::parse(CIESProfile& result) { const uint32_t maxDimMeasureSize = core::sqrt(FULL_SOLID_ANGLE/smallestRangeSolidAngle); result.accessor.properties.optimalIESResolution = decltype(result.accessor.properties.optimalIESResolution){ maxDimMeasureSize, maxDimMeasureSize }; - result.accessor.properties.optimalIESResolution *= 2u; // safe bias for our bilinear interpolation to work nicely and increase resolution of a profile + auto& res = result.accessor.properties.optimalIESResolution *= 2u; // safe bias for our bilinear interpolation to work nicely and increase resolution of a profile + res.x = core::max(res.x,CIESProfile::properties_t::CDC_MIN_TEXTURE_WIDTH); + res.y = core::max(res.y,CIESProfile::properties_t::CDC_MIN_TEXTURE_HEIGHT); } assert(nonZeroEmissionDomainSize >= 0.f); From 777e443de6bdaff29c08d70b7265d60d16c12aaa Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 4 Dec 2025 16:28:30 +0100 Subject: [PATCH 204/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 49b18aa4ec..57bcf32016 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 49b18aa4ec453fb53cf9bab6f28f83860818ff67 +Subproject commit 57bcf320167405e32c4ad54f9e37106c1cd3a428 From 697190629696ff85ead679e38ee293c922380eb0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 5 Dec 2025 14:04:25 +0700 Subject: [PATCH 205/472] fixes more nan problems + a few bugs in iridescent fresnel --- examples_tests | 2 +- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 71 +++++++++++++--------- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/examples_tests b/examples_tests index dd7de7a89c..c0eda4b4ab 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit dd7de7a89cfa5a59970dde4d4744ecf746d77a4a +Subproject commit c0eda4b4ab50f8a7ad56bb32c98088d59c711b46 diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index f7655e9978..0f2b3486ab 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -508,25 +508,26 @@ struct iridescent_helper using scalar_type = typename vector_traits::scalar_type; using vector_type = T; - // returns reflectance R = (rp, rs), phi is the phase shift for each plane of polarization (p,s) - static void phase_shift(const vector_type orientedEta, const vector_type orientedEtak, const vector_type cosTheta, NBL_REF_ARG(vector_type) phiS, NBL_REF_ARG(vector_type) phiP) + // returns phi, the phase shift for each plane of polarization (p,s) + static void phase_shift(const vector_type ior1, const vector_type ior2, const vector_type iork2, const vector_type cosTheta, NBL_REF_ARG(vector_type) phiS, NBL_REF_ARG(vector_type) phiP) { - vector_type cosTheta_2 = cosTheta * cosTheta; - vector_type sinTheta2 = hlsl::promote(1.0) - cosTheta_2; - const vector_type eta2 = orientedEta*orientedEta; - const vector_type etak2 = orientedEtak*orientedEtak; + const vector_type cosTheta2 = cosTheta * cosTheta; + const vector_type sinTheta2 = hlsl::promote(1.0) - cosTheta2; + const vector_type ior1_2 = ior1*ior1; + const vector_type ior2_2 = ior2*ior2; + const vector_type iork2_2 = iork2*iork2; - vector_type z = eta2 - etak2 - sinTheta2; - vector_type w = hlsl::sqrt(z * z + scalar_type(4.0) * eta2 * eta2 * etak2); - vector_type a2 = (z + w) * hlsl::promote(0.5); - vector_type b2 = (w - z) * hlsl::promote(0.5); - vector_type b = hlsl::sqrt(b2); + const vector_type z = ior2_2 * (hlsl::promote(1.0) - iork2_2) - ior1_2 * sinTheta2; + const vector_type w = hlsl::sqrt(z*z + scalar_type(4.0) * ior2_2 * ior2_2 * iork2_2); + const vector_type a2 = hlsl::max(z + w, hlsl::promote(0.0)) * hlsl::promote(0.5); + const vector_type b2 = hlsl::max(w - z, hlsl::promote(0.0)) * hlsl::promote(0.5); + const vector_type a = hlsl::sqrt(a2); + const vector_type b = hlsl::sqrt(b2); - const vector_type t0 = eta2 + etak2; - const vector_type t1 = t0 * cosTheta_2; - - phiS = hlsl::atan2(hlsl::promote(2.0) * b * cosTheta, a2 + b2 - cosTheta_2); - phiP = hlsl::atan2(hlsl::promote(2.0) * eta2 * cosTheta * (hlsl::promote(2.0) * orientedEtak * hlsl::sqrt(a2) - etak2 * b), t1 - a2 + b2); + phiS = hlsl::atan2(scalar_type(2.0) * ior1 * b * cosTheta, a2 + b2 - ior1_2*cosTheta2); + const vector_type k2_plus_one = hlsl::promote(1.0) + iork2_2; + phiP = hlsl::atan2(scalar_type(2.0) * ior1 * ior2_2 * cosTheta * (scalar_type(2.0) * iork2 * a - (hlsl::promote(1.0) - iork2_2) * b), + ior2_2 * cosTheta2 * k2_plus_one * k2_plus_one - ior1_2*(a2+b2)); } // Evaluation XYZ sensitivity curves in Fourier space @@ -544,7 +545,8 @@ struct iridescent_helper } template - static T __call(const vector_type _D, const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta) + static T __call(const vector_type _D, const vector_type ior1, const vector_type ior2, const vector_type ior3, const vector_type iork3, + const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta) { const vector_type wavelengths = vector_type(Colorspace::wavelength_R, Colorspace::wavelength_G, Colorspace::wavelength_B); @@ -593,8 +595,8 @@ struct iridescent_helper vector_type I = hlsl::promote(0.0); // Evaluate the phase shift - phase_shift(eta12, hlsl::promote(0.0), hlsl::promote(cosTheta_1), phi21p, phi21s); - phase_shift(eta23, etak23, cosTheta_2, phi23p, phi23s); + phase_shift(ior1, ior2, hlsl::promote(0.0), hlsl::promote(cosTheta_1), phi21s, phi21p); + phase_shift(ior2, ior3, iork3, cosTheta_2, phi23s, phi23p); phi21p = hlsl::promote(numbers::pi) - phi21p; phi21s = hlsl::promote(numbers::pi) - phi21s; @@ -633,7 +635,7 @@ struct iridescent_helper I += Cm*Sm; } - return hlsl::max(colorspace::scRGB::FromXYZ(I), hlsl::promote(0.0)) * hlsl::promote(0.5); + return hlsl::max(colorspace::scRGB::FromXYZ(I) * hlsl::promote(0.5), hlsl::promote(0.0)); } }; @@ -643,11 +645,11 @@ struct iridescent_base using scalar_type = typename vector_traits::scalar_type; using vector_type = T; - vector_type getD() NBL_CONST_MEMBER_FUNC { return D; } - vector_type getEta12() NBL_CONST_MEMBER_FUNC { return eta12; } - vector_type getEta23() NBL_CONST_MEMBER_FUNC { return eta23; } - vector_type D; + vector_type ior1; + vector_type ior2; + vector_type ior3; + vector_type iork3; vector_type eta12; // outside (usually air 1.0) -> thin-film IOR vector_type eta23; // thin-film -> base material IOR }; @@ -679,6 +681,10 @@ struct Iridescent(2.0 * params.Dinc) * params.ior2; + retval.ior1 = params.ior1; + retval.ior2 = params.ior2; + retval.ior3 = params.ior3; + retval.iork3 = params.iork3; retval.eta12 = params.ior2/params.ior1; retval.eta23 = params.ior3/params.ior2; retval.etak23 = params.iork3/params.ior2; @@ -687,7 +693,8 @@ struct Iridescent::template __call(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta); + return impl::iridescent_helper::template __call(base_type::D, base_type::ior1, base_type::ior2, base_type::ior3, base_type::iork3, + base_type::eta12, base_type::eta23, getEtak23(), clampedCosTheta); } OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC @@ -731,6 +738,10 @@ struct Iridescent(2.0 * params.Dinc) * params.ior2; + retval.ior1 = params.ior1; + retval.ior2 = params.ior2; + retval.ior3 = params.ior3; + retval.iork3 = params.iork3; retval.eta12 = params.ior2/params.ior1; retval.eta23 = params.ior3/params.ior2; return retval; @@ -738,7 +749,8 @@ struct Iridescent::template __call(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta); + return impl::iridescent_helper::template __call(base_type::D, base_type::ior1, base_type::ior2, base_type::ior3, getEtak23(), + base_type::eta12, base_type::eta23, getEtak23(), clampedCosTheta); } scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta23[0]; } @@ -755,8 +767,11 @@ struct Iridescent(1.0)/base_type::eta12, flip); - orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta23, flip); + orientedFresnel.ior1 = base_type::ior3; + orientedFresnel.ior2 = base_type::ior2; + orientedFresnel.ior3 = base_type::ior1; + orientedFresnel.eta12 = hlsl::mix(base_type::eta12, hlsl::promote(1.0)/base_type::eta23, flip); + orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta12, flip); return orientedFresnel; } From cb689283e3b3ff3ddf12e4ec16961b3b3293ca9f Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 5 Dec 2025 16:13:32 +0700 Subject: [PATCH 206/472] fixes iridescent fresnel under transmission --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 0f2b3486ab..ad83da5cf7 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -563,7 +563,7 @@ struct iridescent_helper if (hlsl::any(notTIR)) { - Dielectric::__polarized(eta12, hlsl::promote(cosTheta_1), R12p, R12s); + Dielectric::__polarized(eta12 * eta12, hlsl::promote(cosTheta_1), R12p, R12s); // Reflected part by the base // if kappa==0, base material is dielectric @@ -741,7 +741,6 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / base_type::eta23[0]; + rcpEta.value = base_type::ior1[0] / base_type::ior3[0]; rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } @@ -767,9 +766,9 @@ struct Iridescent(1.0)/base_type::eta23, flip); orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta12, flip); return orientedFresnel; From 76ed66ca21dadddb6c1cd3576dd6a2cf423dca7d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 5 Dec 2025 16:34:58 +0700 Subject: [PATCH 207/472] fix wrong get refraction eta in iridescent transmission --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index ad83da5cf7..0c498efb79 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -752,7 +752,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; From 265100c26324b88cbcac5727862c0b14cac84847 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 5 Dec 2025 13:02:04 +0100 Subject: [PATCH 208/472] Extended NBL_CREATE_NSC_COMPILE_RULES function, now it creates precompiled shaders per build configuration --- cmake/common.cmake | 113 +++++++++++++++++++++++++++++---------------- examples_tests | 2 +- 2 files changed, 75 insertions(+), 40 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index ab215a59e3..010c7409dc 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1144,6 +1144,12 @@ define_property(TARGET PROPERTY NBL_MOUNT_POINT_DEFINES BRIEF_DOCS "List of preprocessor defines with mount points" ) +option(NSC_DEBUG_EDIF_FILE_BIT "Add \"-fspv-debug=file\" to NSC Debug CLI" ON) +option(NSC_DEBUG_EDIF_SOURCE_BIT "Add \"-fspv-debug=source\" to NSC Debug CLI" OFF) +option(NSC_DEBUG_EDIF_LINE_BIT "Add \"-fspv-debug=line\" to NSC Debug CLI" OFF) +option(NSC_DEBUG_EDIF_TOOL_BIT "Add \"-fspv-debug=tool\" to NSC Debug CLI" ON) +option(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT "Add \"-fspv-debug=vulkan-with-source\" to NSC Debug CLI" OFF) + function(NBL_CREATE_NSC_COMPILE_RULES) set(COMMENT "this code has been autogenerated with Nabla CMake NBL_CREATE_HLSL_COMPILE_RULES utility") set(DEVICE_CONFIG_VIEW @@ -1178,9 +1184,34 @@ struct DeviceConfigCaps -enable-16bit-types -Zpr -spirv - -fspv-target-env=vulkan1.3 + -fspv-target-env=vulkan1.3 + -WShadow + -WConversion + $<$:-O0> + $<$:-O3> + $<$:-O3> ) + if(NSC_DEBUG_EDIF_FILE_BIT) + list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=file>) + endif() + + if(NSC_DEBUG_EDIF_SOURCE_BIT) + list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=source>) + endif() + + if(NSC_DEBUG_EDIF_LINE_BIT) + list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=line>) + endif() + + if(NSC_DEBUG_EDIF_TOOL_BIT) + list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=tool>) + endif() + + if(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT) + list(APPEND REQUIRED_OPTIONS $<$:-fspv-debug=vulkan-with-source>) + endif() + if(NOT NBL_EMBED_BUILTIN_RESOURCES) list(APPEND REQUIRED_OPTIONS -I "${NBL_ROOT_PATH}/include" @@ -1210,12 +1241,12 @@ struct DeviceConfigCaps get_target_property(HEADER_RULE_GENERATED ${IMPL_TARGET} NBL_HEADER_GENERATED_RULE) if(NOT HEADER_RULE_GENERATED) - set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include") + set(INCLUDE_DIR "$/${IMPL_TARGET}/.cmake/include/$") set(INCLUDE_FILE "${INCLUDE_DIR}/$") set(INCLUDE_CONTENT $) file(GENERATE OUTPUT ${INCLUDE_FILE} - CONTENT ${INCLUDE_CONTENT} + CONTENT $ TARGET ${IMPL_TARGET} ) @@ -1420,7 +1451,7 @@ namespace @IMPL_NAMESPACE@ { nbl::core::string retval = "@BASE_KEY@"; @RETVAL_EVAL@ retval += ".spv"; - return retval; + return "$/" + retval; } } @@ -1444,46 +1475,50 @@ namespace @IMPL_NAMESPACE@ { function(GENERATE_KEYS PREFIX CAP_INDEX CAPS_EVAL_PART) if(NUM_CAPS EQUAL 0 OR CAP_INDEX EQUAL ${NUM_CAPS}) + # generate .config file set(FINAL_KEY "${BASE_KEY}${PREFIX}.spv") # always add ext even if its already there to make sure asset loader always is able to load as IShader - - set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY}") - set(CONFIG_FILE "${TARGET_OUTPUT}.config") + set(CONFIG_FILE_TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY}") + set(CONFIG_FILE "${CONFIG_FILE_TARGET_OUTPUT}.config") set(CAPS_EVAL "${CAPS_EVAL_PART}") - string(CONFIGURE "${DEVICE_CONFIG_VIEW}" CONFIG_CONTENT @ONLY) file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") - set(NBL_NSC_COMPILE_COMMAND - "$" - -Fc "${TARGET_OUTPUT}" - ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} - "${CONFIG_FILE}" - ) - - add_custom_command(OUTPUT "${TARGET_OUTPUT}" - COMMAND ${NBL_NSC_COMPILE_COMMAND} - DEPENDS ${DEPENDS_ON} - COMMENT "Creating \"${TARGET_OUTPUT}\"" - VERBATIM - COMMAND_EXPAND_LISTS - ) - - set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}") - target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) - - set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES - HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None - ) - - set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES - NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" - NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}" - NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" - NBL_SPIRV_ACCESS_KEY "${FINAL_KEY}" - ) - - set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") + # generate keys and commands for compiling shaders + foreach(BUILD_CONFIGURATION ${CMAKE_CONFIGURATION_TYPES}) + set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${BUILD_CONFIGURATION}/${FINAL_KEY}") + + set(NBL_NSC_COMPILE_COMMAND + "$" + -Fc "${TARGET_OUTPUT}" + ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} + "${CONFIG_FILE}" + ) + + add_custom_command(OUTPUT "${TARGET_OUTPUT}" + COMMAND ${NBL_NSC_COMPILE_COMMAND} + DEPENDS ${DEPENDS_ON} + COMMENT "Creating \"${TARGET_OUTPUT}\"" + VERBATIM + COMMAND_EXPAND_LISTS + ) + + set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}") + target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) + + set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES + HEADER_FILE_ONLY ON + VS_TOOL_OVERRIDE None + ) + + set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES + NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" + NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}" + NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" + NBL_SPIRV_ACCESS_KEY "${FINAL_KEY}" + ) + + set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") + endforeach() return() endif() diff --git a/examples_tests b/examples_tests index e1e8dd6fb0..eb7d4fe788 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit e1e8dd6fb0c46612defeea46c960a6b85f4b4155 +Subproject commit eb7d4fe788fb5e88b8b475c979586e050e202b00 From cab896a44474809f31b8711a7deec91def50b64b Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 5 Dec 2025 14:53:25 +0100 Subject: [PATCH 209/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 57bcf32016..fa797e4e17 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 57bcf320167405e32c4ad54f9e37106c1cd3a428 +Subproject commit fa797e4e17eb02c203227c58f805896cd65997c4 From 6887419b3d5a6b95851235fdbb2a1bae9c1f335f Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sat, 6 Dec 2025 21:03:27 +0300 Subject: [PATCH 210/472] updated examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f18160276e..93861bd59f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f18160276e78f860f64c45111c874e3351b44ffb +Subproject commit 93861bd59f85721993472e3de67f23bec6170363 From f32ddd2c45088bd715fe411b9d0ee3f5e93654fe Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sun, 7 Dec 2025 00:53:04 +0300 Subject: [PATCH 211/472] Update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 93861bd59f..008e2ee154 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 93861bd59f85721993472e3de67f23bec6170363 +Subproject commit 008e2ee154b6cf5ba725752a3f1b4dac5d37ff42 From 4a8f4dcf0f7defba037565da99a7999c5e757c4d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 8 Dec 2025 16:04:44 +0700 Subject: [PATCH 212/472] quantized sequence packing data --- .../hlsl/sampling/quantized_sequence.hlsl | 166 ++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 167 insertions(+) create mode 100644 include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl new file mode 100644 index 0000000000..788a38d499 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -0,0 +1,166 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/vector.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct QuantizedSequence; + +// byteslog2 = 1,2; dim = 1 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 0 && BytesLog2 < 3) +struct QuantizedSequence 0 && BytesLog2 < 3) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << BytesLog2; + using base_store_type = typename unsigned_integer_of_size::type; + + base_store_type getX() { return data; } + void setX(const base_store_type value) { data = value; } + + base_store_type data; +}; + +// byteslog2 = 3,4; dim = 1 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 2 && BytesLog2 < 5) +struct QuantizedSequence 2 && BytesLog2 < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = uint16_t(2u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; + using base_store_type = typename unsigned_integer_of_size::type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t num_components = uint16_t(1u) << (BytesLog2 - base_bytes_log2); + using store_type = vector; + + store_type getX() { return data; } + void setX(const store_type value) { data = value; } + + store_type data; +}; + +// byteslog2 = 2,3; dim = 2 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) +struct QuantizedSequence 2 && BytesLog2 < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; + using base_store_type = typename unsigned_integer_of_size::type; + using store_type = vector; + + base_store_type getX() { return data[0]; } + base_store_type getY() { return data[1]; } + void setX(const base_store_type value) { data[0] = value; } + void setY(const base_store_type value) { data[1] = value; } + + store_type data; +}; + +// byteslog2 = 1; dim = 2,3,4 +template NBL_PARTIAL_REQ_TOP(Dim > 1 && Dim < 5) +struct QuantizedSequence<1, Dim NBL_PARTIAL_REQ_BOT(Dim > 1 && Dim < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; + NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); + using base_store_type = uint16_t; + + base_store_type getX() { return data & MASK; } + base_store_type getY() { return (data >> bits_per_component) & MASK; } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + base_store_type getZ() { return (data >> (bits_per_component * uint16_t(2u))) & MASK; } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + base_store_type getW() { return (data >> (bits_per_component * uint16_t(3u))) & MASK; } + + void setX(const base_store_type value) + { + data &= ~MASK; + data |= value & MASK; + } + void setY(const base_store_type value) + { + const uint16_t mask = MASK << bits_per_component; + data &= ~mask; + data |= (value & MASK) << bits_per_component; + } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + void setZ(const base_store_type value) + { + const uint16_t bits = (bits_per_component * uint16_t(2u)); + const uint16_t mask = MASK << bits; + data &= ~mask; + data |= (value & MASK) << bits; + } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + void setW(const base_store_type value) + { + const uint16_t bits = (bits_per_component * uint16_t(3u)); + const uint16_t mask = MASK << bits; + data &= ~mask; + data |= (value & MASK) << bits; + } + + base_store_type data; +}; + +// byteslog2 = 2,3; dim = 3 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) +struct QuantizedSequence 2 && BytesLog2 < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; + NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / uint16_t(3u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); + using base_store_type = typename unsigned_integer_of_size::type; + using store_type = vector; + + base_store_type getX() { return data[0] & MASK; } + base_store_type getY() + { + base_store_type y = data[0] >> bits_per_component; + y |= (data[1] >> bits_per_component) << (store_bits-bits_per_component); + return y; + } + base_store_type getZ() { return data[1] & MASK; } + + void setX(base_store_type x) + { + data[0] &= ~MASK; + data[0] |= x & MASK; + } + void setY(base_store_type y) + { + const uint16_t ybits = store_bits-bits_per_component; + const uint16_t ymask = uint16_t(1u) << ybits; + data[0] &= MASK; + data[1] &= MASK; + data[0] |= (y & ymask) << bits_per_component; + data[1] |= (y >> (ybits) & ymask) << bits_per_component; + } + void setZ(base_store_type z) + { + data[1] &= ~MASK; + data[1] |= z & MASK; + } + + store_type data; +}; + +// not complete because we're changing the template params next commit + +} + +} +} + +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 736148fb21..30c8cdd8df 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -256,6 +256,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/basic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/linear.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/bilinear.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/quantized_sequence.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_triangle.hlsl") From 1a32ed257d717b57ba4a51b1a7f529a21c3cec1c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 8 Dec 2025 20:58:54 +0700 Subject: [PATCH 213/472] Remove NBL_ASSERT usage --- include/nbl/builtin/hlsl/morton.hlsl | 1 - 1 file changed, 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 08b2b1ccfb..4e90fd4c91 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -361,7 +361,6 @@ struct code create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian))); assert((impl::verifyAnyBitIntegralVec(cartesian))); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); From 183205914eaed37bacb146ee7c0d987ac09265c1 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:21:01 +0300 Subject: [PATCH 214/472] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 008e2ee154..91ae8657de 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 008e2ee154b6cf5ba725752a3f1b4dac5d37ff42 +Subproject commit 91ae8657dee9b4de82c81b97b23b83d3824a6011 From b79bf8f7f44b913766a4fedaf2b887912d766e7a Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:30:59 +0300 Subject: [PATCH 215/472] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 91ae8657de..0124cc9c0a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 91ae8657dee9b4de82c81b97b23b83d3824a6011 +Subproject commit 0124cc9c0ad83d4a38f1e8ac3ddcdf56125740ac From 49a017afca6718faac8b4bc08e55fe2d473f2d43 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:45:05 +0300 Subject: [PATCH 216/472] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 0124cc9c0a..a35eddd1bd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0124cc9c0ad83d4a38f1e8ac3ddcdf56125740ac +Subproject commit a35eddd1bd83fbf636e820b59c6eef939ed09668 From b8688bef70d2316a982a5caafa6ab065e7430cd6 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 9 Dec 2025 15:08:09 +0700 Subject: [PATCH 217/472] templated quantized sequence --- .../hlsl/sampling/quantized_sequence.hlsl | 295 ++++++++++++------ 1 file changed, 199 insertions(+), 96 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 788a38d499..5738dfec8c 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -15,31 +15,16 @@ namespace hlsl namespace sampling { -template +template struct QuantizedSequence; -// byteslog2 = 1,2; dim = 1 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 0 && BytesLog2 < 3) -struct QuantizedSequence 0 && BytesLog2 < 3) > -{ - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << BytesLog2; - using base_store_type = typename unsigned_integer_of_size::type; - - base_store_type getX() { return data; } - void setX(const base_store_type value) { data = value; } - - base_store_type data; -}; +#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 -// byteslog2 = 3,4; dim = 1 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 2 && BytesLog2 < 5) -struct QuantizedSequence 2 && BytesLog2 < 5) > +// all Dim=1 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) +struct QuantizedSequence { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = uint16_t(2u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; - using base_store_type = typename unsigned_integer_of_size::type; - NBL_CONSTEXPR_STATIC_INLINE uint16_t num_components = uint16_t(1u) << (BytesLog2 - base_bytes_log2); - using store_type = vector; + using store_type = T; store_type getX() { return data; } void setX(const store_type value) { data = value; } @@ -47,116 +32,234 @@ struct QuantizedSequence 2 && Bytes store_type data; }; -// byteslog2 = 2,3; dim = 2 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) -struct QuantizedSequence 2 && BytesLog2 < 5) > +// uint16_t, uint32_t; Dim=2,3,4 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) +struct QuantizedSequence::Dimension == 1 && Dim > 1 && Dim < 5) > { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; - using base_store_type = typename unsigned_integer_of_size::type; - using store_type = vector; + using store_type = T; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - base_store_type getX() { return data[0]; } - base_store_type getY() { return data[1]; } - void setX(const base_store_type value) { data[0] = value; } - void setY(const base_store_type value) { data[1] = value; } + store_type getX() { return data & Mask; } + store_type getY() { return (data >> (BitsPerComponent * uint16_t(1u))) & Mask; } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + store_type getZ() { return (data >> (BitsPerComponent * uint16_t(2u))) & Mask; } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + store_type getW() { return (data >> (BitsPerComponent * uint16_t(3u))) & Mask; } + + void setX(const store_type value) + { + data &= ~Mask; + data |= value & Mask; + } + void setY(const store_type value) + { + data &= ~(Mask << BitsPerComponent); + data |= (value & Mask) << BitsPerComponent; + } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + void setZ(const store_type value) + { + const uint16_t bits = (BitsPerComponent * uint16_t(2u)); + data &= ~(Mask << bits); + data |= (value & Mask) << bits; + } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + void setW(const store_type value) + { + const uint16_t bits = (BitsPerComponent * uint16_t(3u)); + data &= ~(Mask << bits); + data |= (value & Mask) << bits; + } store_type data; }; -// byteslog2 = 1; dim = 2,3,4 -template NBL_PARTIAL_REQ_TOP(Dim > 1 && Dim < 5) -struct QuantizedSequence<1, Dim NBL_PARTIAL_REQ_BOT(Dim > 1 && Dim < 5) > +// Dim 2,3,4 matches vector dim +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) +struct QuantizedSequence::Dimension == Dim && Dim > 1 && Dim < 5) > { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; - NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); - using base_store_type = uint16_t; - - base_store_type getX() { return data & MASK; } - base_store_type getY() { return (data >> bits_per_component) & MASK; } + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + + scalar_type getX() { return data[0]; } + scalar_type getY() { return data[1]; } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + scalar_type getZ() { return data[2]; } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + scalar_type getW() { return data[3]; } + + void setX(const scalar_type value) { data[0] = value; } + void setY(const scalar_type value) { data[1] = value; } template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - base_store_type getZ() { return (data >> (bits_per_component * uint16_t(2u))) & MASK; } + void setZ(const scalar_type value) { data[2] = value; } template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - base_store_type getW() { return (data >> (bits_per_component * uint16_t(3u))) & MASK; } + void setW(const scalar_type value) { data[3] = value; } + + store_type data; +}; - void setX(const base_store_type value) +// uint16_t2, uint32_t2; Dim=3 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 3) +struct QuantizedSequence::Dimension == 2 && Dim == 3) > +{ + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + + scalar_type getX() { return data[0] & Mask; } + scalar_type getY() { - data &= ~MASK; - data |= value & MASK; + scalar_type y = data[0] >> BitsPerComponent; + y |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); + return y; } - void setY(const base_store_type value) + scalar_type getZ() { return data[1] & Mask; } + + void setX(const scalar_type value) { - const uint16_t mask = MASK << bits_per_component; - data &= ~mask; - data |= (value & MASK) << bits_per_component; + data[0] &= ~Mask; + data[0] |= value & Mask; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - void setZ(const base_store_type value) + void setY(const scalar_type value) { - const uint16_t bits = (bits_per_component * uint16_t(2u)); - const uint16_t mask = MASK << bits; - data &= ~mask; - data |= (value & MASK) << bits; + const uint16_t ybits = StoreBits-BitsPerComponent; + const uint16_t ymask = uint16_t(1u) << ybits; + data[0] &= Mask; + data[1] &= Mask; + data[0] |= (value & ymask) << BitsPerComponent; + data[1] |= (value >> (ybits) & ymask) << BitsPerComponent; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - void setW(const base_store_type value) + void setZ(const scalar_type value) { - const uint16_t bits = (bits_per_component * uint16_t(3u)); - const uint16_t mask = MASK << bits; - data &= ~mask; - data |= (value & MASK) << bits; + data[1] &= ~Mask; + data[1] |= value & Mask; } - base_store_type data; + store_type data; +}; + +// uint16_t2, uint32_t2; Dim=4 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 4) +struct QuantizedSequence::Dimension == 2 && Dim == 4) > +{ + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + + scalar_type getX() { return data[0] & Mask; } + scalar_type getY() { return data[0] >> BitsPerComponent; } + scalar_type getZ() { return data[1] & Mask; } + scalar_type getW() { return data[1] >> BitsPerComponent; } + + void setX(const scalar_type value) + { + data[0] &= ~Mask; + data[0] |= value & Mask; + } + void setY(const scalar_type value) + { + data[0] &= Mask; + data[0] |= (value & Mask) << BitsPerComponent; + } + void setZ(const scalar_type value) + { + data[1] &= ~Mask; + data[1] |= value & Mask; + } + void setW(const scalar_type value) + { + data[1] &= Mask; + data[1] |= (value & Mask) << BitsPerComponent; + } + + store_type data; +}; + +// uint16_t4, uint32_t4; Dim=2 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 2) +struct QuantizedSequence::Dimension == 4 && Dim == 2) > +{ + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + using base_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + + base_type getX() { return data.xy; } + base_type getY() { return data.zw; } + + void setX(const base_type value) { data.xy = value; } + void setY(const base_type value) { data.zw = value; } + + store_type data; }; -// byteslog2 = 2,3; dim = 3 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) -struct QuantizedSequence 2 && BytesLog2 < 5) > +// uint16_t4, uint32_t4; Dim=3 +// uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y +// uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 3) +struct QuantizedSequence::Dimension == 4 && Dim == 3) > { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; - NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; - NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / uint16_t(3u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); - using base_store_type = typename unsigned_integer_of_size::type; - using store_type = vector; - - base_store_type getX() { return data[0] & MASK; } - base_store_type getY() - { - base_store_type y = data[0] >> bits_per_component; - y |= (data[1] >> bits_per_component) << (store_bits-bits_per_component); + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + using base_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); + + base_type getX() + { + base_type x; + x[0] = data[0]; + x[1] = data[3] & Mask; + return x; + } + base_type getY() + { + base_type y; + y[0] = data[1]; + y[1] = (data[3] >> LeftoverBitsPerComponent) & Mask; return y; } - base_store_type getZ() { return data[1] & MASK; } + base_type getZ() + { + base_type z; + z[0] = data[1]; + z[1] = (data[3] >> (LeftoverBitsPerComponent * uint16_t(2u))) & Mask; + return z; + } - void setX(base_store_type x) + void setX(const base_type value) { - data[0] &= ~MASK; - data[0] |= x & MASK; + data[0] = value[0]; + data[3] &= ~Mask; + data[3] |= value[1] & Mask; } - void setY(base_store_type y) + void setY(const base_type value) { - const uint16_t ybits = store_bits-bits_per_component; - const uint16_t ymask = uint16_t(1u) << ybits; - data[0] &= MASK; - data[1] &= MASK; - data[0] |= (y & ymask) << bits_per_component; - data[1] |= (y >> (ybits) & ymask) << bits_per_component; + data[1] = value[0]; + data[3] &= ~Mask; + data[3] |= (value[1] & Mask) << LeftoverBitsPerComponent; } - void setZ(base_store_type z) + void setZ(const base_type value) { - data[1] &= ~MASK; - data[1] |= z & MASK; + data[2] = value[0]; + data[3] &= ~Mask; + data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * uint16_t(2u)); } store_type data; }; -// not complete because we're changing the template params next commit +#undef SEQUENCE_SPECIALIZATION_CONCEPT } From 38bbf049215dc1c0d801c901de878bff9185ca19 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 9 Dec 2025 15:29:08 +0700 Subject: [PATCH 218/472] latest example --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c0eda4b4ab..c1c71ee83e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c0eda4b4ab50f8a7ad56bb32c98088d59c711b46 +Subproject commit c1c71ee83e9b017d2389022c5a6ecaf305f80bfd From fae7a80c24db5e281fe6a1929f356622ddb527d4 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 9 Dec 2025 16:12:58 +0700 Subject: [PATCH 219/472] quantized sequence decode --- examples_tests | 2 +- .../hlsl/sampling/quantized_sequence.hlsl | 83 +++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c1c71ee83e..fb24a25a44 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c1c71ee83e9b017d2389022c5a6ecaf305f80bfd +Subproject commit fb24a25a44b85a9cee830a3cafd86894ca137453 diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 5738dfec8c..fcb2488514 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -18,6 +18,89 @@ namespace sampling template struct QuantizedSequence; + +namespace impl +{ +template +struct decode_helper; + +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const scalar_type scrambleKey) + { + scalar_type seqVal = val.getX(); + seqVal ^= scrambleKey; + return hlsl::promote(seqVal) * bit_cast(0x2f800004u); + } +}; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + { + uvec_type seqVal; + seqVal[0] = val.getX(); + seqVal[1] = val.getY(); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast(0x2f800004u); + } +}; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + { + uvec_type seqVal; + seqVal[0] = val.getX(); + seqVal[1] = val.getY(); + seqVal[2] = val.getZ(); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast(0x2f800004u); + } +}; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + { + uvec_type seqVal; + seqVal[0] = val.getX(); + seqVal[1] = val.getY(); + seqVal[2] = val.getZ(); + seqVal[3] = val.getW(); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast(0x2f800004u); + } +}; +} + +template +vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +{ + return impl::decode_helper::__call(val, scrambleKey); +} + + #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 // all Dim=1 From 3f43fa6a71deec15d9750fd4f2ac4d0465d33232 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 9 Dec 2025 16:22:16 +0100 Subject: [PATCH 220/472] Corrections --- include/ICameraSceneNode.h | 13 +- .../nbl/builtin/hlsl/camera/view_matrix.hlsl | 51 ----- .../nbl/builtin/hlsl/cpp_compat/unroll.hlsl | 12 -- include/nbl/builtin/hlsl/macros.h | 2 + .../transformation_matrix_utils.hlsl | 127 +++++++++++ .../builtin/hlsl/math/linalg/transform.hlsl | 50 +++++ .../hlsl/math/quaternion/quaternion_impl.hlsl | 25 --- .../hlsl/math/thin_lens_projection.hlsl | 85 ++++++++ .../hlsl/matrix_utils/matrix_traits.hlsl | 42 +--- .../transformation_matrix_utils.hlsl | 197 ------------------ .../builtin/hlsl/projection/projection.hlsl | 1 - .../hlsl/vector_utils/vector_utils.hlsl | 21 -- include/nbl/core/math/plane3dSIMD.h | 11 +- src/nbl/builtin/CMakeLists.txt | 6 +- 14 files changed, 284 insertions(+), 359 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/camera/view_matrix.hlsl delete mode 100644 include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl create mode 100644 include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl delete mode 100644 include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl create mode 100644 include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl delete mode 100644 include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl delete mode 100644 include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl diff --git a/include/ICameraSceneNode.h b/include/ICameraSceneNode.h index 577b6d0fb6..e3975e3802 100644 --- a/include/ICameraSceneNode.h +++ b/include/ICameraSceneNode.h @@ -6,9 +6,6 @@ #ifndef __NBL_I_CAMERA_SCENE_NODE_H_INCLUDED__ #define __NBL_I_CAMERA_SCENE_NODE_H_INCLUDED__ -#include -#include - #include "ISceneNode.h" #include "matrixutil.h" @@ -49,17 +46,17 @@ class ICameraSceneNode : public ISceneNode The function will figure it out if you've set an orthogonal matrix. \param projection The new projection matrix of the camera. */ - virtual void setProjectionMatrix(const hlsl::float32_t4x4& projection) =0; + virtual void setProjectionMatrix(const core::matrix4SIMD& projection) =0; //! Gets the current projection matrix of the camera. /** \return The current projection matrix of the camera. */ - inline const hlsl::float32_t4x4& getProjectionMatrix() const { return projMatrix; } + inline const core::matrix4SIMD& getProjectionMatrix() const { return projMatrix; } //! Gets the current view matrix of the camera. /** \return The current view matrix of the camera. */ - virtual const hlsl::float32_t3x4& getViewMatrix() const =0; + virtual const core::matrix3x4SIMD& getViewMatrix() const =0; - virtual const hlsl::float32_t4x4& getConcatenatedMatrix() const =0; + virtual const core::matrix4SIMD& getConcatenatedMatrix() const =0; #if 0 //! It is possible to send mouse and key events to the camera. /** Most cameras may ignore this input, but camera scene nodes @@ -201,7 +198,7 @@ class ICameraSceneNode : public ISceneNode float ZFar; // Z-value of the far view-plane. // actual projection matrix used - hlsl::float32_t4x4 projMatrix; + core::matrix4SIMD projMatrix; bool leftHanded; }; diff --git a/include/nbl/builtin/hlsl/camera/view_matrix.hlsl b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl deleted file mode 100644 index 7752d9b6eb..0000000000 --- a/include/nbl/builtin/hlsl/camera/view_matrix.hlsl +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_CAMERA_VIEW_MATRIX_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CAMERA_VIEW_MATRIX_INCLUDED_ - -#include - -namespace nbl -{ -namespace hlsl -{ - -// /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) -template -inline matrix buildCameraLookAtMatrixLH( - const vector& position, - const vector& target, - const vector& upVector) -{ - const vector zaxis = hlsl::normalize(target - position); - const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); - const vector yaxis = hlsl::cross(zaxis, xaxis); - - matrix r; - r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); - r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); - r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); - - return r; -} - -template -inline matrix buildCameraLookAtMatrixRH( - const vector& position, - const vector& target, - const vector& upVector) -{ - const vector zaxis = hlsl::normalize(position - target); - const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); - const vector yaxis = hlsl::cross(zaxis, xaxis); - - matrix r; - r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); - r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); - r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); - - return r; -} - -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl b/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl deleted file mode 100644 index 36bcd944c6..0000000000 --- a/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_UNROLL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CPP_COMPAT_UNROLL_INCLUDED_ - -#ifdef __HLSL_VERSION -#define NBL_UNROLL [unroll] -#define NBL_UNROLL_LIMITED(LIMIT) [unroll(LIMIT)] -#else -#define NBL_UNROLL // can't be bothered / TODO -#define NBL_UNROLL_LIMITED(LIMIT) -#endif - -#endif diff --git a/include/nbl/builtin/hlsl/macros.h b/include/nbl/builtin/hlsl/macros.h index 944f06cdc9..70838c93d8 100644 --- a/include/nbl/builtin/hlsl/macros.h +++ b/include/nbl/builtin/hlsl/macros.h @@ -36,8 +36,10 @@ inline auto functionAlias(Args&&... args) -> decltype(origFunctionName(std::forw #ifdef __HLSL_VERSION #define NBL_UNROLL [[unroll]] +#define NBL_UNROLL_LIMITED(LIMIT) [unroll(LIMIT)] #else #define NBL_UNROLL +#define NBL_UNROLL_LIMITED(LIMIT) #endif #ifdef __HLSL_VERSION // cause DXC is insane diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl new file mode 100644 index 0000000000..6ae938865c --- /dev/null +++ b/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl @@ -0,0 +1,127 @@ +#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#include +// TODO: remove this header when deleting vectorSIMDf.hlsl +#ifndef __HLSL_VERSION +#include +#include "vectorSIMD.h" +#endif +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace math +{ +namespace linalg +{ + +template +MatT diagonal(typename matrix_traits::scalar_type diagonal = 1) +{ + MatT output; + output[0][1] = 124; + using RowT = matrix_traits::row_type; + + NBL_UNROLL for (uint32_t i = 0; i < matrix_traits::RowCount; ++i) + { + output[i] = promote(0.0); + if (matrix_traits::ColumnCount > i) + output[i][i] = diagonal; + } + + return output; +} + +template +MatT identity() +{ + // TODO + // static_assert(MatT::Square); + return diagonal(1); +} + +template +inline matrix extractSub3x4From4x4Matrix(NBL_CONST_REF_ARG(matrix) mat) +{ + matrix output; + for (int i = 0; i < 3; ++i) + output[i] = mat[i]; + + return output; +} + +template +inline matrix getSub3x3(NBL_CONST_REF_ARG(matrix) mat) +{ + return matrix(mat); +} + +//! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged +template +inline void setRotation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(nbl::hlsl::quaternion) quat) +{ + // TODO + //static_assert(N == 3 || N == 4); + + outMat[0] = vector( + 1 - 2 * (quat.data.y * quat.data.y + quat.data.z * quat.data.z), + 2 * (quat.data.x * quat.data.y - quat.data.z * quat.data.w), + 2 * (quat.data.x * quat.data.z + quat.data.y * quat.data.w), + + outMat[0][3] + ); + + outMat[1] = vector( + 2 * (quat.data.x * quat.data.y + quat.data.z * quat.data.w), + 1 - 2 * (quat.data.x * quat.data.x + quat.data.z * quat.data.z), + 2 * (quat.data.y * quat.data.z - quat.data.x * quat.data.w), + outMat[1][3] + ); + + outMat[2] = vector( + 2 * (quat.data.x * quat.data.z - quat.data.y * quat.data.w), + 2 * (quat.data.y * quat.data.z + quat.data.x * quat.data.w), + 1 - 2 * (quat.data.x * quat.data.x + quat.data.y * quat.data.y), + outMat[2][3] + ); +} + +} +} + +namespace impl +{ + /** + * @brief Enables type-safe casting between matrices of identical dimensions + * but different scalar types. + */ + template + struct static_cast_helper, matrix, void> + { + using To = matrix; + using From = matrix; + + static inline To cast(From mat) + { + To retval; + + NBL_UNROLL for (int i = 0; i < N; ++i) + { + NBL_UNROLL for (int j = 0; j < M; ++j) + { + retval[i][j] = hlsl::_static_cast(mat[i][j]); + } + } + + return retval; + } + }; +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl index 59ff142150..236c81a8b1 100644 --- a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl @@ -94,6 +94,56 @@ matrix promote_affine(const matrix inMatrix) return retval; } +// /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) +template +inline matrix lhLookAt( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = hlsl::normalize(target - position); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +template +inline matrix rhLookAt( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = hlsl::normalize(position - target); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +template +inline void setTranslation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) translation) +{ + // TODO: not sure if it will be compatible with hlsl + static_assert(M > 0 && N > 0); + static_assert(M >= VecN); + + NBL_CONSTEXPR int16_t indexOfTheLastRowComponent = M - 1; + + for(int i = 0; i < VecN; ++i) + outMat[i][indexOfTheLastRowComponent] = translation[i]; +} + } } } diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl deleted file mode 100644 index d00d9ce2c4..0000000000 --- a/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" -// For conditions of distribution and use, see copyright notice in nabla.h -// See the original file in irrlicht source for authors - -#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNION_IMPL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_QUATERNION_IMPL_INCLUDED_ - -#include - -namespace nbl -{ -namespace hlsl -{ - -namespace quaternion_impl -{ - -} - -} // end namespace core -} // nbl - -#endif - diff --git a/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl b/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl new file mode 100644 index 0000000000..ca43dcc0ba --- /dev/null +++ b/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl @@ -0,0 +1,85 @@ +#ifndef _NBL_BUILTIN_HLSL_MATH_THIN_LENS_PROJECTION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_THIN_LENS_PROJECTION_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace thin_lens +{ + +template) +inline matrix rhPerspectiveFovMatrix(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) +{ + const FloatingPoint h = core::reciprocal(tan(fieldOfViewRadians * 0.5f)); + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, -1.f, 0.f); + + return m; +} +template) +inline matrix lhPerspectiveFovMatrix(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) +{ + const FloatingPoint h = core::reciprocal(tan(fieldOfViewRadians * 0.5f)); + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 1.f, 0.f); + + return m; +} + +template) +inline matrix rhProjectionOrthoMatrix(FloatingPoint widthOfViewVolume, FloatingPoint heightOfViewVolume, FloatingPoint zNear, FloatingPoint zFar) +{ + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); + + return m; +} + +template) +inline matrix lhProjectionOrthoMatrix(FloatingPoint widthOfViewVolume, FloatingPoint heightOfViewVolume, FloatingPoint zNear, FloatingPoint zFar) +{ + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); + + return m; +} + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl b/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl index f9c031c8e7..f554be7abe 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl @@ -21,48 +21,18 @@ struct matrix_traits NBL_CONSTEXPR_STATIC_INLINE bool IsMatrix = false; }; -// i choose to implement it this way because of this DXC bug: https://github.com/microsoft/DirectXShaderCompiler/issues/7007 -#define DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(ROW_COUNT, COLUMN_COUNT) \ -template \ -struct matrix_traits > \ -{ \ - using scalar_type = T; \ - using row_type = vector; \ - using transposed_type = matrix; \ - NBL_CONSTEXPR_STATIC_INLINE uint32_t RowCount = ROW_COUNT; \ - NBL_CONSTEXPR_STATIC_INLINE uint32_t ColumnCount = COLUMN_COUNT; \ - NBL_CONSTEXPR_STATIC_INLINE bool Square = RowCount == ColumnCount; \ - NBL_CONSTEXPR_STATIC_INLINE bool IsMatrix = true; \ -}; - -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(1, 2) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(1, 3) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(1, 4) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(2, 1) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(2, 2) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(2, 3) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(2, 4) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(3, 1) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(3, 2) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(3, 3) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(3, 4) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(4, 1) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(4, 2) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(4, 3) -DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION(4, 4) - -#undef DEFINE_MATRIX_TRAITS_TEMPLATE_SPECIALIZATION - // TODO: when this bug: https://github.com/microsoft/DirectXShaderCompiler/issues/7007 is fixed, uncomment and delete template specializations -/*template +template struct matrix_traits > { using scalar_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint32_t RowCount = ROW_COUNT; - NBL_CONSTEXPR_STATIC_INLINE uint32_t ColumnCount = COLUMN_COUNT; + using row_type = vector; + using transposed_type = matrix; + NBL_CONSTEXPR_STATIC_INLINE uint32_t RowCount = N; + NBL_CONSTEXPR_STATIC_INLINE uint32_t ColumnCount = M; NBL_CONSTEXPR_STATIC_INLINE bool Square = RowCount == ColumnCount; + NBL_CONSTEXPR_STATIC_INLINE bool IsMatrix = true; }; -*/ } } diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl deleted file mode 100644 index c96a52edea..0000000000 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ /dev/null @@ -1,197 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ -#include -// TODO: remove this header when deleting vectorSIMDf.hlsl -#ifndef __HLSL_VERSION -#include -#include "vectorSIMD.h" -#endif -#include -#include "nbl/builtin/hlsl/cpp_compat/unroll.hlsl" - -namespace nbl -{ -namespace hlsl -{ - -template -MatT diagonal(float diagonal = 1) -{ - MatT output; - - NBL_UNROLL_LIMITED(4) - for (uint32_t i = 0; i < matrix_traits::RowCount; ++i) - NBL_UNROLL_LIMITED(4) - for (uint32_t j = 0; j < matrix_traits::ColumnCount; ++j) - output[i][j] = 0; - - NBL_UNROLL_LIMITED(4) - for (uint32_t diag = 0; diag < matrix_traits::RowCount; ++diag) - output[diag][diag] = diagonal; - - return output; -} - -template -MatT identity() -{ - // TODO - // static_assert(MatT::Square); - return diagonal(1); -} - -template -inline matrix getMatrix3x4As4x4(NBL_CONST_REF_ARG(matrix) mat) -{ - matrix output; - for (int i = 0; i < 3; ++i) - output[i] = mat[i]; - output[3] = float32_t4(0.0f, 0.0f, 0.0f, 1.0f); - - return output; -} - -template -inline matrix extractSub3x4From4x4Matrix(NBL_CONST_REF_ARG(matrix) mat) -{ - matrix output; - for (int i = 0; i < 3; ++i) - output[i] = mat[i]; - - return output; -} - -template -inline matrix getSub3x3(NBL_CONST_REF_ARG(matrix) mat) -{ - return matrix(mat); -} - -template -inline matrix getAs64BitPrecisionMatrix(NBL_CONST_REF_ARG(matrix) mat) -{ - matrix output; - for (int i = 0; i < N; ++i) - output[i] = mat[i]; - - return output; -} - -namespace transformation_matrix_utils_impl -{ - // This function calculates determinant using the scalar triple product. - template - inline T determinant_helper(NBL_CONST_REF_ARG(matrix) mat, NBL_REF_ARG(vector) r1crossr2) - { - r1crossr2 = hlsl::cross(mat[1], mat[2]); - return hlsl::dot(mat[0], r1crossr2); - } -} - -//! returs adjugate of the cofactor (sub 3x3) matrix -template -inline matrix getSub3x3TransposeCofactors(NBL_CONST_REF_ARG(matrix) mat) -{ - static_assert(N >= 3 && M >= 3); - - matrix output; - vector row0 = vector(mat[0]); - vector row1 = vector(mat[1]); - vector row2 = vector(mat[2]); - output[0] = hlsl::cross(row1, row2); - output[1] = hlsl::cross(row2, row0); - output[2] = hlsl::cross(row0, row1); - - output[0] = hlsl::cross(row0, row1); - - return output; -} - -template -inline bool getSub3x3InverseTranspose(NBL_CONST_REF_ARG(matrix) matIn, NBL_CONST_REF_ARG(matrix) matOut) -{ - matrix matIn3x3 = getSub3x3(matIn); - vector r1crossr2; - T d = transformation_matrix_utils_impl::determinant_helper(matIn3x3, r1crossr2); - if (abs(d) <= FLT_MIN) - return false; - auto rcp = T(1.0f)/d; - - // matrix of cofactors * 1/det - matOut = getSub3x3TransposeCofactors(matIn3x3); - matOut[0] *= rcp; - matOut[1] *= rcp; - matOut[2] *= rcp; - - return true; -} - -// TODO: use portable_float when merged -//! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) -template -inline matrix concatenateBFollowedByA(NBL_CONST_REF_ARG(matrix) a, NBL_CONST_REF_ARG(matrix) b) -{ - // TODO - // static_assert(N == 3 || N == 4); - - const matrix a4x4 = getMatrix3x4As4x4(a); - const matrix b4x4 = getMatrix3x4As4x4(b); - return matrix(mul(a4x4, b4x4)); -} - -template -inline void setScale(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) scale) -{ - // TODO - // static_assert(N == 3 || N == 4); - - outMat[0][0] = scale[0]; - outMat[1][1] = scale[1]; - outMat[2][2] = scale[2]; -} - -//! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged -template -inline void setRotation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(nbl::hlsl::quaternion) quat) -{ - // TODO - //static_assert(N == 3 || N == 4); - - outMat[0] = vector( - 1 - 2 * (quat.data.y * quat.data.y + quat.data.z * quat.data.z), - 2 * (quat.data.x * quat.data.y - quat.data.z * quat.data.w), - 2 * (quat.data.x * quat.data.z + quat.data.y * quat.data.w), - - outMat[0][3] - ); - - outMat[1] = vector( - 2 * (quat.data.x * quat.data.y + quat.data.z * quat.data.w), - 1 - 2 * (quat.data.x * quat.data.x + quat.data.z * quat.data.z), - 2 * (quat.data.y * quat.data.z - quat.data.x * quat.data.w), - outMat[1][3] - ); - - outMat[2] = vector( - 2 * (quat.data.x * quat.data.z - quat.data.y * quat.data.w), - 2 * (quat.data.y * quat.data.z + quat.data.x * quat.data.w), - 1 - 2 * (quat.data.x * quat.data.x + quat.data.y * quat.data.y), - outMat[2][3] - ); -} - -template -inline void setTranslation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) translation) -{ - // TODO - // static_assert(N == 3 || N == 4); - - outMat[0].w = translation.x; - outMat[1].w = translation.y; - outMat[2].w = translation.z; -} - -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/projection/projection.hlsl b/include/nbl/builtin/hlsl/projection/projection.hlsl index 94023e2d05..58714e7dab 100644 --- a/include/nbl/builtin/hlsl/projection/projection.hlsl +++ b/include/nbl/builtin/hlsl/projection/projection.hlsl @@ -8,7 +8,6 @@ namespace nbl { namespace hlsl { -// TODO: use glm instead for c++ template) inline matrix buildProjectionMatrixPerspectiveFovRH(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) { diff --git a/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl b/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl deleted file mode 100644 index e1fa9dd3a0..0000000000 --- a/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_VECTOR_UTILS_VECTOR_UTILS_INCLUDED_ -#define _NBL_BUILTIN_HLSL_VECTOR_UTILS_VECTOR_UTILS_INCLUDED_ - -#include - -namespace nbl -{ -namespace hlsl -{ - -// TODO: why cant I NBL_CONST_REF_ARG(vector) -template -inline T lengthsquared(vector vec) -{ - return dot(vec, vec); -} - -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/core/math/plane3dSIMD.h b/include/nbl/core/math/plane3dSIMD.h index 23099f0d61..edad0a1287 100644 --- a/include/nbl/core/math/plane3dSIMD.h +++ b/include/nbl/core/math/plane3dSIMD.h @@ -4,7 +4,8 @@ // See the original file in irrlicht source for authors #include "vectorSIMD.h" -#include +#include +#include #ifndef __NBL_CORE_PLANE_3D_H_INCLUDED__ #define __NBL_CORE_PLANE_3D_H_INCLUDED__ @@ -102,12 +103,14 @@ class plane3dSIMDf : private vectorSIMDf //! static inline plane3dSIMDf transform(const plane3dSIMDf& _in, const hlsl::float32_t3x4& _mat) { - hlsl::float32_t4x4 inv = hlsl::getMatrix3x4As4x4(_mat); - hlsl::inverse(inv); + hlsl::float32_t4x4 inv = hlsl::inverse(hlsl::math::linalg::promote_affine<4, 4, 3, 4>(_mat)); vectorSIMDf normal(_in.getNormal()); // transform by inverse transpose - hlsl::float32_t4 planeEq = inv[0] * hlsl::float32_t4(normal.x) + inv[1] * hlsl::float32_t4(normal.y) + inv[2] * hlsl::float32_t4(normal.z) + (hlsl::float32_t4(0, 0, 0, normal.w)); + hlsl::float32_t4 planeEq = inv[0] * hlsl::float32_t4(normal.x, normal.x, normal.x, normal.x) + + inv[1] * hlsl::float32_t4(normal.y, normal.y, normal.y, normal.y) + + inv[2] * hlsl::float32_t4(normal.z, normal.z, normal.z, normal.z) + + (hlsl::float32_t4(0, 0, 0, normal.w)); vectorSIMDf planeEqSIMD; for (int i = 0; i < 4; ++i) planeEqSIMD[i] = planeEq[i]; diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index b0c8a14d2f..d97908e7ae 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -157,11 +157,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754/impl.hlsl") # utility LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/array_accessors.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/vector_utils/vector_traits.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/vector_utils/vector_utils.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/matrix_traits.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/transformation_matrix_utils.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/projection/projection.hlsl") - #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/fragment_shader_pixel_interlock.hlsl") @@ -223,6 +219,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format.hlsl") #linear algebra LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transform.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl") # TODO: rename `equations` to `polynomials` probably LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") @@ -238,6 +235,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternion/quaternion_im #extra math LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/thin_lens_projection.hlsl") #acceleration structures LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/acceleration_structures.hlsl") #colorspace From df53619f076df0c08be28fee21dce76e82f2271c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 9 Dec 2025 16:45:22 +0100 Subject: [PATCH 221/472] Removed old quaternion code --- include/nabla.h | 1 - include/nbl/asset/IAnimationLibrary.h | 6 +- .../nbl/asset/utils/CQuantQuaternionCache.h | 2 +- .../transformation_matrix_utils.hlsl | 4 +- .../hlsl/math/quaternion/quaternion.hlsl | 97 ---- include/quaternion.h | 462 ------------------ src/nbl/builtin/CMakeLists.txt | 3 - 7 files changed, 6 insertions(+), 569 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl delete mode 100644 include/quaternion.h diff --git a/include/nabla.h b/include/nabla.h index 2c63b8629c..fa231e3db7 100644 --- a/include/nabla.h +++ b/include/nabla.h @@ -54,7 +54,6 @@ #include "vectorSIMD.h" #include "line3d.h" #include "position2d.h" -#include "quaternion.h" #include "rect.h" #include "dimension2d.h" diff --git a/include/nbl/asset/IAnimationLibrary.h b/include/nbl/asset/IAnimationLibrary.h index d650cb25d9..3ab87e5d32 100644 --- a/include/nbl/asset/IAnimationLibrary.h +++ b/include/nbl/asset/IAnimationLibrary.h @@ -34,7 +34,7 @@ class IAnimationLibrary : public virtual core::IReferenceCounted translation[2] = translation[1] = translation[0] = 0.f; quat = core::vectorSIMDu32(128u,128u,128u,255u); // should be (0,0,0,1) encoded } - Keyframe(const core::vectorSIMDf& _scale, const hlsl::quaternion& _quat, const CQuantQuaternionCache* quantCache, const core::vectorSIMDf& _translation) + Keyframe(const core::vectorSIMDf& _scale, const hlsl::math::quaternion& _quat, const CQuantQuaternionCache* quantCache, const core::vectorSIMDf& _translation) { std::copy(_translation.pointer,_translation.pointer+3,translation); quat = quantCache->template quantize(_quat); @@ -42,13 +42,13 @@ class IAnimationLibrary : public virtual core::IReferenceCounted //scale = ; } - inline hlsl::quaternion getRotation() const + inline hlsl::math::quaternion getRotation() const { const void* _pix[4] = {&quat,nullptr,nullptr,nullptr}; double out[4]; decodePixels(_pix,out,0u,0u); auto q = core::normalize(core::vectorSIMDf(out[0],out[1],out[2],out[3])); - return reinterpret_cast*>(&q)[0]; + return reinterpret_cast*>(&q)[0]; } inline core::vectorSIMDf getScale() const diff --git a/include/nbl/asset/utils/CQuantQuaternionCache.h b/include/nbl/asset/utils/CQuantQuaternionCache.h index a51549d24d..dc8d18545a 100644 --- a/include/nbl/asset/utils/CQuantQuaternionCache.h +++ b/include/nbl/asset/utils/CQuantQuaternionCache.h @@ -60,7 +60,7 @@ class CQuantQuaternionCache : public CDirQuantCacheBase - value_type_t quantize(const hlsl::quaternion& quat) + value_type_t quantize(const hlsl::math::quaternion& quat) { return Base::quantize<4u,CacheFormat>(reinterpret_cast(quat)); } diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl index 6ae938865c..2eb706bf99 100644 --- a/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl @@ -1,6 +1,6 @@ #ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ #define _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ -#include +#include // TODO: remove this header when deleting vectorSIMDf.hlsl #ifndef __HLSL_VERSION #include @@ -61,7 +61,7 @@ inline matrix getSub3x3(NBL_CONST_REF_ARG(matrix) mat) //! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged template -inline void setRotation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(nbl::hlsl::quaternion) quat) +inline void setRotation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(nbl::hlsl::math::quaternion) quat) { // TODO //static_assert(N == 3 || N == 4); diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl deleted file mode 100644 index aba9ebbd57..0000000000 --- a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" -// For conditions of distribution and use, see copyright notice in nabla.h -// See the original file in irrlicht source for authors - -#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNION_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_QUATERNION_INCLUDED_ - -#include - -namespace nbl -{ -namespace hlsl -{ - -//! Quaternion class for representing rotations. -/** It provides cheap combinations and avoids gimbal locks. -Also useful for interpolations. */ - -template -struct quaternion -{ - // i*data[0] + j*data[1] + k*data[2] + data[3] - using vec_t = vector; - vector data; - - //! creates identity quaternion - static inline quaternion create() - { - quaternion q; - q.data = vector(0.0f, 0.0f, 0.0f, 1.0f); - - return q; - } - - static inline quaternion create(float_t x, float_t y, float_t z, float_t w) - { - quaternion q; - q.data = vector(x, y, z, w); - - return q; - } - - static inline quaternion create(NBL_CONST_REF_ARG(quaternion) other) - { - return other; - } - - static inline quaternion create(float_t pitch, float_t yaw, float_t roll) - { - const float rollDiv2 = roll * 0.5f; - const float sr = sinf(rollDiv2); - const float cr = cosf(rollDiv2); - - const float pitchDiv2 = pitch * 0.5f; - const float sp = sinf(pitchDiv2); - const float cp = cosf(pitchDiv2); - - const float yawDiv2 = yaw * 0.5f; - const float sy = sinf(yawDiv2); - const float cy = cosf(yawDiv2); - - quaternion output; - output.data[0] = cr * sp * cy + sr * cp * sy; // x - output.data[1] = cr * cp * sy - sr * sp * cy; // y - output.data[2] = sr * cp * cy - cr * sp * sy; // z - output.data[3] = cr * cp * cy + sr * sp * sy; // w - - return output; - } - - // TODO: - //explicit quaternion(NBL_CONST_REF_ARG(float32_t3x4) m) {} - - inline quaternion operator*(float_t scalar) - { - quaternion output; - output.data = data * scalar; - return output; - } - - inline quaternion operator*(NBL_CONST_REF_ARG(quaternion) other) - { - return quaternion::create( - data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z, - data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y, - data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x, - data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w - ); - } -}; - -} // end namespace core -} // nbl - -#endif - diff --git a/include/quaternion.h b/include/quaternion.h deleted file mode 100644 index c1867235db..0000000000 --- a/include/quaternion.h +++ /dev/null @@ -1,462 +0,0 @@ -// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" -// For conditions of distribution and use, see copyright notice in nabla.h -// See the original file in irrlicht source for authors - -#ifndef __NBL_QUATERNION_H_INCLUDED__ -#define __NBL_QUATERNION_H_INCLUDED__ - - -#include "vectorSIMD.h" - -#include "nbl/core/math/glslFunctions.h" - - -namespace nbl -{ -namespace core -{ - -class matrix3x4SIMD; - - -//! Quaternion class for representing rotations. -/** It provides cheap combinations and avoids gimbal locks. -Also useful for interpolations. */ -class quaternion : private vectorSIMDf -{ - public: - //! Default Constructor - inline quaternion() : vectorSIMDf(0,0,0,1) {} - - inline quaternion(const quaternion& other) : vectorSIMDf(static_cast(other)) {} - - inline quaternion(const float* data) : vectorSIMDf(data) {} - - //! Constructor - inline quaternion(const float& x, const float& y, const float& z, const float& w) : vectorSIMDf(x,y,z,w) { } - - //! Constructor which converts euler angles (radians) to a quaternion - inline quaternion(const float& pitch, const float& yaw, const float& roll) {set(pitch,yaw,roll);} - - //! Constructor which converts a matrix to a quaternion - explicit quaternion(const matrix3x4SIMD& m); - - inline float* getPointer() {return pointer;} - - //! Equalilty operator - inline vector4db_SIMD operator==(const quaternion& other) const {return vectorSIMDf::operator==(other);} - - //! inequality operator - inline vector4db_SIMD operator!=(const quaternion& other) const {return vectorSIMDf::operator!=(other);} - - //! Assignment operator - inline quaternion& operator=(const quaternion& other) {return reinterpret_cast(vectorSIMDf::operator=(other));} - - //! Multiplication operator with scalar - inline quaternion operator*(const float& s) const - { - quaternion tmp; - reinterpret_cast(tmp) = reinterpret_cast(this)->operator*(s); - return tmp; - } - - //! Multiplication operator with scalar - inline quaternion& operator*=(const float& s) - { - *this = (*this)*s; - return *this; - } - - //! Multiplication operator - inline quaternion& operator*=(const quaternion& other) - { - *this = (*this)*other; - return *this; - } - - //! Multiplication operator - //http://momchil-velikov.blogspot.fr/2013/10/fast-sse-quternion-multiplication.html - inline quaternion operator*(const quaternion& other) const - { - __m128 xyzw = vectorSIMDf::getAsRegister(); - __m128 abcd = reinterpret_cast(other).getAsRegister(); - - __m128 t0 = FAST_FLOAT_SHUFFLE(abcd, _MM_SHUFFLE (3, 3, 3, 3)); /* 1, 0.5 */ - __m128 t1 = FAST_FLOAT_SHUFFLE(xyzw, _MM_SHUFFLE (2, 3, 0, 1)); /* 1, 0.5 */ - - __m128 t3 = FAST_FLOAT_SHUFFLE(abcd, _MM_SHUFFLE (0, 0, 0, 0)); /* 1, 0.5 */ - __m128 t4 = FAST_FLOAT_SHUFFLE(xyzw, _MM_SHUFFLE (1, 0, 3, 2)); /* 1, 0.5 */ - - __m128 t5 = FAST_FLOAT_SHUFFLE(abcd, _MM_SHUFFLE (1, 1, 1, 1)); /* 1, 0.5 */ - __m128 t6 = FAST_FLOAT_SHUFFLE(xyzw, _MM_SHUFFLE (2, 0, 3, 1)); /* 1, 0.5 */ - - /* [d,d,d,d]*[z,w,x,y] = [dz,dw,dx,dy] */ - __m128 m0 = _mm_mul_ps (t0, t1); /* 5/4, 1 */ - - /* [a,a,a,a]*[y,x,w,z] = [ay,ax,aw,az]*/ - __m128 m1 = _mm_mul_ps (t3, t4); /* 5/4, 1 */ - - /* [b,b,b,b]*[z,x,w,y] = [bz,bx,bw,by]*/ - __m128 m2 = _mm_mul_ps (t5, t6); /* 5/4, 1 */ - - /* [c,c,c,c]*[w,z,x,y] = [cw,cz,cx,cy] */ - __m128 t7 = FAST_FLOAT_SHUFFLE(abcd, _MM_SHUFFLE (2, 2, 2, 2)); /* 1, 0.5 */ - __m128 t8 = FAST_FLOAT_SHUFFLE(xyzw, _MM_SHUFFLE (3, 2, 0, 1)); /* 1, 0.5 */ - - __m128 m3 = _mm_mul_ps (t7, t8); /* 5/4, 1 */ - - /* 1 */ - /* [dz,dw,dx,dy]+-[ay,ax,aw,az] = [dz+ay,dw-ax,dx+aw,dy-az] */ - __m128 e = _mm_addsub_ps (m0, m1); /* 3, 1 */ - - /* 2 */ - /* [dx+aw,dz+ay,dy-az,dw-ax] */ - e = FAST_FLOAT_SHUFFLE(e, _MM_SHUFFLE (1, 3, 0, 2)); /* 1, 0.5 */ - - /* [dx+aw,dz+ay,dy-az,dw-ax]+-[bz,bx,bw,by] = [dx+aw+bz,dz+ay-bx,dy-az+bw,dw-ax-by]*/ - e = _mm_addsub_ps (e, m2); /* 3, 1 */ - - /* 2 */ - /* [dz+ay-bx,dw-ax-by,dy-az+bw,dx+aw+bz] */ - e = FAST_FLOAT_SHUFFLE(e, _MM_SHUFFLE (2, 0, 1, 3)); /* 1, 0.5 */ - - /* [dz+ay-bx,dw-ax-by,dy-az+bw,dx+aw+bz]+-[cw,cz,cx,cy] - = [dz+ay-bx+cw,dw-ax-by-cz,dy-az+bw+cx,dx+aw+bz-cy] */ - e = _mm_addsub_ps (e, m3); /* 3, 1 */ - - /* 2 */ - /* [dw-ax-by-cz,dz+ay-bx+cw,dy-az+bw+cx,dx+aw+bz-cy] */ - quaternion tmp; - reinterpret_cast(tmp) = FAST_FLOAT_SHUFFLE(e, _MM_SHUFFLE (2, 3, 1, 0)); /* 1, 0.5 */ - return tmp; - } - - inline vectorSIMDf transformVect(const vectorSIMDf& vec) - { - vectorSIMDf direction = *reinterpret_cast(this); - vectorSIMDf scale = core::length(direction); - direction.makeSafe3D(); - - return scale*vec+cross(direction,vec*W+cross(direction,vec))*2.f; - } - - //! Sets new quaternion - inline quaternion& set(const vectorSIMDf& xyzw) - { - *this = reinterpret_cast(xyzw); - return *this; - } - - //! Sets new quaternion based on euler angles (radians) - inline quaternion& set(const float& roll, const float& pitch, const float& yaw); - - //! Sets new quaternion from other quaternion - inline quaternion& set(const quaternion& quat) - { - *this = quat; - return *this; - } - - //! Inverts this quaternion - inline void makeInverse() - { - reinterpret_cast(*this) ^= _mm_set_epi32(0x0u,0x80000000u,0x80000000u,0x80000000u); - } - - //! Fills an angle (radians) around an axis (unit vector) - void toAngleAxis(float& angle, vector3df_SIMD& axis) const; - - //! Output this quaternion to an euler angle (radians) - void toEuler(vector3df_SIMD& euler) const; - - //! Set quaternion to identity - inline void makeIdentity() {vectorSIMDf::set(0,0,0,1);} - - - vectorSIMDf& getData() {return *((vectorSIMDf*)this);} - -//statics - inline static quaternion normalize(const quaternion& in) - { - quaternion tmp; - reinterpret_cast(tmp) = core::normalize(reinterpret_cast(in)); - return tmp; - } - - //! Helper func - static quaternion lerp(const quaternion &q1, const quaternion &q2, const float& interpolant, const bool& wrongDoubleCover); - - //! Set this quaternion to the linear interpolation between two quaternions - /** \param q1 First quaternion to be interpolated. - \param q2 Second quaternion to be interpolated. - \param interpolant Progress of interpolation. For interpolant=0 the result is - q1, for interpolant=1 the result is q2. Otherwise interpolation - between q1 and q2. - */ - static quaternion lerp(const quaternion &q1, const quaternion &q2, const float& interpolant); - - //! Helper func - static inline void flerp_interpolant_terms(float& interpolantPrecalcTerm2, float& interpolantPrecalcTerm3, const float& interpolant) - { - interpolantPrecalcTerm2 = (interpolant - 0.5f) * (interpolant - 0.5f); - interpolantPrecalcTerm3 = interpolant * (interpolant - 0.5f) * (interpolant - 1.f); - } - - static float flerp_adjustedinterpolant(const float& angle, const float& interpolant, const float& interpolantPrecalcTerm2, const float& interpolantPrecalcTerm3); - - //! Set this quaternion to the approximate slerp between two quaternions - /** \param q1 First quaternion to be interpolated. - \param q2 Second quaternion to be interpolated. - \param interpolant Progress of interpolation. For interpolant=0 the result is - q1, for interpolant=1 the result is q2. Otherwise interpolation - between q1 and q2. - */ - static quaternion flerp(const quaternion &q1, const quaternion &q2, const float& interpolant); - - //! Set this quaternion to the result of the spherical interpolation between two quaternions - /** \param q1 First quaternion to be interpolated. - \param q2 Second quaternion to be interpolated. - \param time Progress of interpolation. For interpolant=0 the result is - q1, for interpolant=1 the result is q2. Otherwise interpolation - between q1 and q2. - \param threshold To avoid inaccuracies the - interpolation switches to linear interpolation at some point. - This value defines how much of the interpolation will - be calculated with lerp. - */ - static quaternion slerp(const quaternion& q1, const quaternion& q2, - const float& interpolant, const float& threshold=.05f); - - inline static quaternion fromEuler(const vector3df_SIMD& euler) - { - quaternion tmp; - tmp.set(euler.X,euler.Y,euler.Z); - return tmp; - } - - inline static quaternion fromEuler(const vector3df& euler) - { - quaternion tmp; - tmp.set(euler.X,euler.Y,euler.Z); - return tmp; - } - - //! Set quaternion to represent a rotation from one vector to another. - static quaternion rotationFromTo(const vector3df_SIMD& from, const vector3df_SIMD& to); - - //! Create quaternion from rotation angle and rotation axis. - /** Axis must be unit length. - The quaternion representing the rotation is - q = cos(A/2)+sin(A/2)*(x*i+y*j+z*k). - \param angle Rotation Angle in radians. - \param axis Rotation axis. */ - static quaternion fromAngleAxis(const float& angle, const vector3df_SIMD& axis); -}; -static_assert(sizeof(quaternion) == sizeof(vectorSIMDf), "Quaternion not same size as vec4"); - - -// set this quaternion to the result of the linear interpolation between two quaternions -inline quaternion quaternion::lerp(const quaternion &q1, const quaternion &q2, const float& interpolant, const bool& wrongDoubleCover) -{ - vectorSIMDf retval; - if (wrongDoubleCover) - retval = mix(reinterpret_cast(q1),-reinterpret_cast(q2),vectorSIMDf(interpolant)); - else - retval = mix(reinterpret_cast(q1), reinterpret_cast(q2),vectorSIMDf(interpolant)); - return reinterpret_cast(retval); -} - -// set this quaternion to the result of the linear interpolation between two quaternions -inline quaternion quaternion::lerp(const quaternion &q1, const quaternion &q2, const float& interpolant) -{ - const float angle = dot(q1,q2)[0]; - return lerp(q1,q2,interpolant,angle < 0.0f); -} - -// Arseny Kapoulkine -inline float quaternion::flerp_adjustedinterpolant(const float& angle, const float& interpolant, const float& interpolantPrecalcTerm2, const float& interpolantPrecalcTerm3) -{ - float A = 1.0904f + angle * (-3.2452f + angle * (3.55645f - angle * 1.43519f)); - float B = 0.848013f + angle * (-1.06021f + angle * 0.215638f); - float k = A * interpolantPrecalcTerm2 + B; - float ot = interpolant + interpolantPrecalcTerm3 * k; - return ot; -} - -// set this quaternion to the result of an approximate slerp -inline quaternion quaternion::flerp(const quaternion &q1, const quaternion &q2, const float& interpolant) -{ - const float angle = dot(q1,q2)[0]; - return lerp(q1,q2,flerp_adjustedinterpolant(fabsf(angle),interpolant,(interpolant - 0.5f) * (interpolant - 0.5f),interpolant * (interpolant - 0.5f) * (interpolant - 1.f)),angle < 0.0f); -} - - -// set this quaternion to the result of the interpolation between two quaternions -inline quaternion quaternion::slerp(const quaternion &q1, const quaternion &q2, const float& interpolant, const float& threshold) -{ - float angle = dot(q1,q2)[0]; - - // make sure we use the short rotation - bool wrongDoubleCover = angle < 0.0f; - if (wrongDoubleCover) - angle *= -1.f; - - if (angle <= (1.f-threshold)) // spherical interpolation - { // acosf + sinf - vectorSIMDf retval; - - const float sinARcp = inversesqrt(1.f-angle*angle); - const float sinAt = sinf(acosf(angle) * interpolant); // could this line be optimized? - //1sqrt 3min/add 5mul from now on - const float sinAt_over_sinA = sinAt*sinARcp; - - const float scale = core::sqrt(1.f-sinAt*sinAt)-angle*sinAt_over_sinA; //cosAt-cos(A)sin(tA)/sin(A) = (sin(A)cos(tA)-cos(A)sin(tA))/sin(A) - if (wrongDoubleCover) // make sure we use the short rotation - retval = reinterpret_cast(q1)*scale - reinterpret_cast(q2)*sinAt_over_sinA; - else - retval = reinterpret_cast(q1)*scale + reinterpret_cast(q2)*sinAt_over_sinA; - - return reinterpret_cast(retval); - } - else - return normalize(lerp(q1,q2,interpolant,wrongDoubleCover)); -} - - -#if !NBL_TEST_BROKEN_QUATERNION_USE -//! axis must be unit length, angle in radians -inline quaternion quaternion::fromAngleAxis(const float& angle, const vector3df_SIMD& axis) -{ - const float fHalfAngle = 0.5f*angle; - const float fSin = sinf(fHalfAngle); - quaternion retval; - reinterpret_cast(retval) = axis*fSin; - reinterpret_cast(retval).W = cosf(fHalfAngle); - return retval; -} - - -inline void quaternion::toAngleAxis(float& angle, vector3df_SIMD &axis) const -{ - vectorSIMDf scale = core::length(*reinterpret_cast(this)); - - if (scale.X==0.f) - { - angle = 0.0f; - axis.X = 0.0f; - axis.Y = 1.0f; - axis.Z = 0.0f; - } - else - { - axis = reinterpret_cast(this)->operator/(scale); - angle = 2.f * acosf(axis.W); - - axis.makeSafe3D(); - } -} - -inline void quaternion::toEuler(vector3df_SIMD& euler) const -{ - vectorSIMDf sqr = *reinterpret_cast(this); - sqr *= sqr; - const double test = 2.0 * (Y*W - X*Z); - - if (core::equals(test, 1.0, 0.000001)) - { - // heading = rotation about z-axis - euler.Z = (float) (-2.0*atan2(X, W)); - // bank = rotation about x-axis - euler.X = 0; - // attitude = rotation about y-axis - euler.Y = core::HALF_PI(); - } - else if (core::equals(test, -1.0, 0.000001)) - { - // heading = rotation about z-axis - euler.Z = (float) (2.0*atan2(X, W)); - // bank = rotation about x-axis - euler.X = 0; - // attitude = rotation about y-axis - euler.Y = -core::HALF_PI(); - } - else - { - // heading = rotation about z-axis - euler.Z = (float) atan2(2.0 * (X*Y +Z*W),(sqr.X - sqr.Y - sqr.Z + sqr.W)); - // bank = rotation about x-axis - euler.X = (float) atan2(2.0 * (Y*Z +X*W),(-sqr.X - sqr.Y + sqr.Z + sqr.W)); - // attitude = rotation about y-axis - euler.Y = (float) asin( core::clamp(test, -1.0, 1.0) ); - } -} - -inline quaternion quaternion::rotationFromTo(const vector3df_SIMD& from, const vector3df_SIMD& to) -{ - // Based on Stan Melax's article in Game Programming Gems - // Copy, since cannot modify local - vector3df_SIMD v0 = from; - vector3df_SIMD v1 = to; - v0 = core::normalize(v0); - v1 = core::normalize(v1); - - const vectorSIMDf dddd = core::dot(v0,v1); - quaternion tmp; - if (dddd.X >= 1.0f) // If dot == 1, vectors are the same - { - return tmp; - } - else if (dddd.X <= -1.0f) // exactly opposite - { - vector3df_SIMD axis(1.0f, 0.f, 0.f); - axis = cross(axis,v0); - if (length(axis)[0]==0.f) - { - axis.set(0.f,1.f,0.f); - axis = cross(axis,v0); - } - // same as fromAngleAxis(PI, axis).normalize(); - reinterpret_cast(tmp) = axis; - return normalize(tmp); - } - - vectorSIMDf s = core::sqrt(vectorSIMDf(2.f,2.f,2.f,0.f)+dddd*2.f); - reinterpret_cast(tmp) = cross(v0,v1)*reciprocal_approxim(s); - tmp.W = s.X*0.5f; - return normalize(tmp); -} -#endif - -// sets new quaternion based on euler angles -inline quaternion& quaternion::set(const float& roll, const float& pitch, const float& yaw) -{ - float angle; - - angle = roll * 0.5f; - const float sr = sinf(angle); - const float cr = cosf(angle); - - angle = pitch * 0.5f; - const float sp = sinf(angle); - const float cp = cos(angle); - - angle = yaw * 0.5f; - const float sy = sinf(angle); - const float cy = cosf(angle); - - const float cpcy = cp * cy; - const float spcy = sp * cy; - const float cpsy = cp * sy; - const float spsy = sp * sy; - - *reinterpret_cast(this) = vectorSIMDf(sr,cr,cr,cr)*vectorSIMDf(cpcy,spcy,cpsy,cpcy)+vectorSIMDf(-cr,sr,-sr,sr)*vectorSIMDf(spsy,cpsy,spcy,spsy); - - return *this; -} - -} // end namespace core -} // end namespace nbl - -#endif - diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 6af6661eb3..0b8b27c9d1 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -230,9 +230,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") -#quaternions -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternion/quaternion.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternion/quaternion_impl.hlsl") #extra math LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/impl.hlsl") From 2c787efc1d8dddac83c21f4f97e7a21566401bcb Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 9 Dec 2025 17:09:31 +0100 Subject: [PATCH 222/472] Reverted changes done to some files --- include/nbl/ext/MitsubaLoader/CElementShape.h | 2 +- include/nbl/ext/MitsubaLoader/CElementTransform.h | 2 +- include/nbl/ext/MitsubaLoader/CMitsubaLoader.h | 8 ++++---- .../CMitsubaMaterialCompilerFrontend.h | 2 +- include/nbl/ext/MitsubaLoader/PropertyElement.h | 15 ++++++++------- include/nbl/ext/MitsubaLoader/SContext.h | 4 ++-- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index c1725963b2..205023afea 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -225,7 +225,7 @@ class CElementShape : public IElement std::string getLogName() const override { return "shape"; } - inline hlsl::float32_t3x4 getAbsoluteTransform() const + inline core::matrix3x4SIMD getAbsoluteTransform() const { auto local = transform.matrix.extractSub3x4(); // TODO restore at some point (and make it actually work??) diff --git a/include/nbl/ext/MitsubaLoader/CElementTransform.h b/include/nbl/ext/MitsubaLoader/CElementTransform.h index 88864f7365..d518f69e6c 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTransform.h +++ b/include/nbl/ext/MitsubaLoader/CElementTransform.h @@ -35,7 +35,7 @@ class CElementTransform : public IElement } */ - hlsl::float32_t4x4 matrix; + core::matrix4SIMD matrix; }; } diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h index fd28d881db..e61ab3fa87 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h @@ -28,7 +28,7 @@ class CMitsubaMaterialCompilerFrontend; //#include "nbl/builtin/glsl/ext/MitsubaLoader/instance_data_struct.glsl" #define uint uint32_t #define uvec2 uint64_t -#define mat4x3 hlsl::float32_t3x4 +#define mat4x3 nbl::core::matrix3x4SIMD #define nbl_glsl_MC_material_data_t asset::material_compiler::material_data_t struct nbl_glsl_ext_Mitsuba_Loader_instance_data_t { @@ -71,13 +71,13 @@ class CMitsubaLoader : public asset::IRenderpassIndependentPipelineLoader // core::vector getMesh(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape); - core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const hlsl::float32_t3x4& relTform); - SContext::shape_ass_type loadBasicShape(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const hlsl::float32_t3x4& relTform); + core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const core::matrix3x4SIMD& relTform); + SContext::shape_ass_type loadBasicShape(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const core::matrix3x4SIMD& relTform); void cacheTexture(SContext& ctx, uint32_t hierarchyLevel, const CElementTexture* texture, const CMitsubaMaterialCompilerFrontend::E_IMAGE_VIEW_SEMANTIC semantic); void cacheEmissionProfile(SContext& ctx, const CElementEmissionProfile* profile); - SContext::bsdf_type getBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf, const CElementEmitter* emitter, hlsl::float32_t4x4 tform); + SContext::bsdf_type getBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf, const CElementEmitter* emitter, core::matrix4SIMD tform); SContext::bsdf_type genBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf); template diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h b/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h index 8aaf9083fd..42bad88655 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h @@ -43,7 +43,7 @@ class CMitsubaMaterialCompilerFrontend explicit CMitsubaMaterialCompilerFrontend(const SContext* _ctx) : m_loaderContext(_ctx) {} front_and_back_t compileToIRTree(asset::material_compiler::IR* ir, const CElementBSDF* _bsdf); - EmitterNode* createEmitterNode(asset::material_compiler::IR* ir, const CElementEmitter* _emitter, hlsl::float32_t4x4 transform); + EmitterNode* createEmitterNode(asset::material_compiler::IR* ir, const CElementEmitter* _emitter, core::matrix4SIMD transform); private: using tex_ass_type = std::tuple,core::smart_refctd_ptr,float>; diff --git a/include/nbl/ext/MitsubaLoader/PropertyElement.h b/include/nbl/ext/MitsubaLoader/PropertyElement.h index ce2acd967a..ac257bd4b3 100644 --- a/include/nbl/ext/MitsubaLoader/PropertyElement.h +++ b/include/nbl/ext/MitsubaLoader/PropertyElement.h @@ -6,6 +6,7 @@ #define __PROPERTY_ELEMENT_H_INCLUDED__ #include "nbl/core/declarations.h" +#include "matrix4SIMD.h" #include namespace nbl @@ -201,7 +202,7 @@ struct SPropertyElementData bool bvalue; const char* svalue; core::vectorSIMDf vvalue; // rgb, srgb, vector, point - hlsl::float32_t4x4 mvalue; // matrix, translate, rotate, scale, lookat + core::matrix4SIMD mvalue; // matrix, translate, rotate, scale, lookat }; }; @@ -301,15 +302,15 @@ template<> struct SPropertyElementData::get_typename struct SPropertyElementData::get_typename { using type = void; }; template<> struct SPropertyElementData::get_typename -{ using type = hlsl::float32_t4x4; }; +{ using type = core::matrix4SIMD; }; template<> struct SPropertyElementData::get_typename -{ using type = hlsl::float32_t4x4; }; +{ using type = core::matrix4SIMD; }; template<> struct SPropertyElementData::get_typename -{ using type = hlsl::float32_t4x4; }; +{ using type = core::matrix4SIMD; }; template<> struct SPropertyElementData::get_typename -{ using type = hlsl::float32_t4x4; }; +{ using type = core::matrix4SIMD; }; template<> struct SPropertyElementData::get_typename -{ using type = hlsl::float32_t4x4; }; +{ using type = core::matrix4SIMD; }; template<> struct SPropertyElementData::get_typename { using type = void; }; @@ -320,7 +321,7 @@ class CPropertyElementManager static std::pair createPropertyData(const char* _el, const char** _atts); static bool retrieveBooleanValue(const std::string& _data, bool& success); - static hlsl::float32_t4x4 retrieveMatrix(const std::string& _data, bool& success); + static core::matrix4SIMD retrieveMatrix(const std::string& _data, bool& success); static core::vectorSIMDf retrieveVector(const std::string& _data, bool& success); static core::vectorSIMDf retrieveHex(const std::string& _data, bool& success); diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index 9777edf6f0..687f97054d 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -193,7 +193,7 @@ struct SContext struct SInstanceData { - SInstanceData(hlsl::float32_t3x4 _tform, SContext::bsdf_type _bsdf, const std::string& _id, const CElementEmitter& _emitterFront, const CElementEmitter& _emitterBack) : + SInstanceData(core::matrix3x4SIMD _tform, SContext::bsdf_type _bsdf, const std::string& _id, const CElementEmitter& _emitterFront, const CElementEmitter& _emitterBack) : tform(_tform), bsdf(_bsdf), #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) bsdf_id(_id), @@ -201,7 +201,7 @@ struct SContext emitter{_emitterFront, _emitterBack} {} - hlsl::float32_t3x4 tform; + core::matrix3x4SIMD tform; SContext::bsdf_type bsdf; #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) std::string bsdf_id; From c347c0de380568b8e2111dacee0af20c0fed0748 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 9 Dec 2025 17:12:15 +0100 Subject: [PATCH 223/472] Reverted changes done to EnvmapImportanceSampling.h --- .../ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h index 440a1ca463..678adf59a9 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h +++ b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h @@ -56,8 +56,8 @@ class EnvmapImportanceSampling float x,y,z; }; #define vec4 core::vectorSIMDf - #define mat4 hlsl::float32_t4x4 - #define mat4x3 hlsl::float32_t3x4 + #define mat4 core::matrix4SIMD + #define mat4x3 core::matrix3x4SIMD #include "nbl/builtin/glsl/ext/EnvmapImportanceSampling/structs.glsl" #undef uint #undef vec4 From a8f31d5af248822b415f288e326d04b93ae4ca23 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 9 Dec 2025 18:21:11 +0100 Subject: [PATCH 224/472] Added scalar matrix multiplication operation --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 11 +++++++ .../nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 10 +++++++ .../transformation_matrix_utils.hlsl | 30 ------------------- .../nbl/builtin/hlsl/math/quaternions.hlsl | 4 +-- 4 files changed, 23 insertions(+), 32 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index cd89ce45d1..87922bcb51 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -772,6 +772,17 @@ struct mul_helper && conce } }; +template +NBL_PARTIAL_REQ_TOP((concepts::Matrix && concepts::Scalar) || (concepts::Scalar && concepts::Matrix)) +struct mul_helper && concepts::Scalar) || (concepts::Scalar && concepts::Matrix)) > +{ + using return_t = hlsl::conditional_t, LhsT, RhsT>; + static inline return_t __call(LhsT lhs, RhsT rhs) + { + return mul(lhs, rhs); + } +}; + #define AUTO_SPECIALIZE_HELPER_FOR_VECTOR(HELPER_NAME, REQUIREMENT, RETURN_TYPE)\ template\ NBL_PARTIAL_REQ_TOP(REQUIREMENT)\ diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index 1ee5edf275..b704eef834 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -44,6 +44,16 @@ struct matrix final : private glm::mat { return glm::operator*(reinterpret_cast(rhs), lhs); } + template + inline friend matrix mul(const ScalarT lhs, matrix const& rhs) + { + return matrix(glm::operator*(lhs, reinterpret_cast(rhs))); + } + template + inline friend matrix mul(matrix const& lhs, const ScalarT rhs) + { + return matrix(glm::operator*(reinterpret_cast(lhs), rhs)); + } }; #endif diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl index 2eb706bf99..63cc93d899 100644 --- a/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl @@ -59,36 +59,6 @@ inline matrix getSub3x3(NBL_CONST_REF_ARG(matrix) mat) return matrix(mat); } -//! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged -template -inline void setRotation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(nbl::hlsl::math::quaternion) quat) -{ - // TODO - //static_assert(N == 3 || N == 4); - - outMat[0] = vector( - 1 - 2 * (quat.data.y * quat.data.y + quat.data.z * quat.data.z), - 2 * (quat.data.x * quat.data.y - quat.data.z * quat.data.w), - 2 * (quat.data.x * quat.data.z + quat.data.y * quat.data.w), - - outMat[0][3] - ); - - outMat[1] = vector( - 2 * (quat.data.x * quat.data.y + quat.data.z * quat.data.w), - 1 - 2 * (quat.data.x * quat.data.x + quat.data.z * quat.data.z), - 2 * (quat.data.y * quat.data.z - quat.data.x * quat.data.w), - outMat[1][3] - ); - - outMat[2] = vector( - 2 * (quat.data.x * quat.data.z - quat.data.y * quat.data.w), - 2 * (quat.data.y * quat.data.z + quat.data.x * quat.data.w), - 1 - 2 * (quat.data.x * quat.data.x + quat.data.y * quat.data.y), - outMat[2][3] - ); -} - } } diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 834d41cb54..4a06abe836 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -215,7 +215,7 @@ struct quaternion return v * scale + hlsl::cross(direction, v * data.w + hlsl::cross(direction, v)) * scalar_type(2.0); } - matrix_type constructMatrix() + matrix_type constructMatrix() NBL_CONST_MEMBER_FUNC { matrix_type mat; mat[0] = data.yzx * data.ywz + data.zxy * data.zyw * vector3_type( 1.0, 1.0,-1.0); @@ -224,7 +224,7 @@ struct quaternion mat[0][0] = scalar_type(0.5) - mat[0][0]; mat[1][1] = scalar_type(0.5) - mat[1][1]; mat[2][2] = scalar_type(0.5) - mat[2][2]; - mat *= scalar_type(2.0); + nbl::hlsl::mul(mat, scalar_type(2.0)); return hlsl::transpose(mat); // TODO: double check transpose? } From e23538cb43f5923c703daa16f346fe14ccfb3d78 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 10 Dec 2025 11:24:14 +0700 Subject: [PATCH 225/472] quantized sequence get/set values by index, simplify decode func specializations --- .../hlsl/sampling/quantized_sequence.hlsl | 272 ++++++------------ 1 file changed, 86 insertions(+), 186 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index fcb2488514..9392a7dab0 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -22,72 +22,19 @@ struct QuantizedSequence; namespace impl { template -struct decode_helper; - -template -struct decode_helper -{ - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using return_type = vector; - - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const scalar_type scrambleKey) - { - scalar_type seqVal = val.getX(); - seqVal ^= scrambleKey; - return hlsl::promote(seqVal) * bit_cast(0x2f800004u); - } -}; -template -struct decode_helper +struct decode_helper { using scalar_type = typename vector_traits::scalar_type; using fp_type = typename float_of_size::type; - using uvec_type = vector; - using return_type = vector; + using uvec_type = vector; + using sequence_type = QuantizedSequence; + using return_type = vector; - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { uvec_type seqVal; - seqVal[0] = val.getX(); - seqVal[1] = val.getY(); - seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast(0x2f800004u); - } -}; -template -struct decode_helper -{ - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; - using return_type = vector; - - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) - { - uvec_type seqVal; - seqVal[0] = val.getX(); - seqVal[1] = val.getY(); - seqVal[2] = val.getZ(); - seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast(0x2f800004u); - } -}; -template -struct decode_helper -{ - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; - using return_type = vector; - - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) - { - uvec_type seqVal; - seqVal[0] = val.getX(); - seqVal[1] = val.getY(); - seqVal[2] = val.getZ(); - seqVal[3] = val.getW(); + NBL_UNROLL for(uint16_t i = 0; i < D; i++) + seqVal[i] = val.get(i); seqVal ^= scrambleKey; return return_type(seqVal) * bit_cast(0x2f800004u); } @@ -109,8 +56,8 @@ struct QuantizedSequence 0 && idx < 1); return data; } + void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } store_type data; }; @@ -124,34 +71,16 @@ struct QuantizedSequence> (BitsPerComponent * uint16_t(1u))) & Mask; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - store_type getZ() { return (data >> (BitsPerComponent * uint16_t(2u))) & Mask; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - store_type getW() { return (data >> (BitsPerComponent * uint16_t(3u))) & Mask; } - - void setX(const store_type value) - { - data &= ~Mask; - data |= value & Mask; - } - void setY(const store_type value) - { - data &= ~(Mask << BitsPerComponent); - data |= (value & Mask) << BitsPerComponent; - } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - void setZ(const store_type value) + store_type get(const uint16_t idx) { - const uint16_t bits = (BitsPerComponent * uint16_t(2u)); - data &= ~(Mask << bits); - data |= (value & Mask) << bits; + assert(idx > 0 && idx < Dim); + return (data >> (BitsPerComponent * idx)) & Mask; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - void setW(const store_type value) + + void set(const uint16_t idx, const store_type value) { - const uint16_t bits = (BitsPerComponent * uint16_t(3u)); + assert(idx > 0 && idx < Dim); + const uint16_t bits = (BitsPerComponent * idx); data &= ~(Mask << bits); data |= (value & Mask) << bits; } @@ -166,19 +95,8 @@ struct QuantizedSequence::scalar_type; - scalar_type getX() { return data[0]; } - scalar_type getY() { return data[1]; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - scalar_type getZ() { return data[2]; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - scalar_type getW() { return data[3]; } - - void setX(const scalar_type value) { data[0] = value; } - void setY(const scalar_type value) { data[1] = value; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - void setZ(const scalar_type value) { data[2] = value; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - void setW(const scalar_type value) { data[3] = value; } + scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } + void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } store_type data; }; @@ -193,33 +111,38 @@ struct QuantizedSequence> BitsPerComponent; - y |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); - return y; - } - scalar_type getZ() { return data[1] & Mask; } - - void setX(const scalar_type value) - { - data[0] &= ~Mask; - data[0] |= value & Mask; - } - void setY(const scalar_type value) - { - const uint16_t ybits = StoreBits-BitsPerComponent; - const uint16_t ymask = uint16_t(1u) << ybits; - data[0] &= Mask; - data[1] &= Mask; - data[0] |= (value & ymask) << BitsPerComponent; - data[1] |= (value >> (ybits) & ymask) << BitsPerComponent; - } - void setZ(const scalar_type value) - { - data[1] &= ~Mask; - data[1] |= value & Mask; + scalar_type get(const uint16_t idx) + { + assert(idx > 0 && idx < 3); + if (idx < 2) + { + return data[idx] & Mask; + } + else + { + scalar_type z = data[0] >> BitsPerComponent; + z |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); + return z; + } + } + + void set(const uint16_t idx, const scalar_type value) + { + assert(idx > 0 && idx < 3); + if (idx < 2) + { + data[idx] &= ~Mask; + data[idx] |= value & Mask; + } + else + { + const uint16_t zbits = StoreBits-BitsPerComponent; + const uint16_t zmask = uint16_t(1u) << zbits; + data[0] &= Mask; + data[1] &= Mask; + data[0] |= (value & zmask) << BitsPerComponent; + data[1] |= (value >> (zbits) & zmask) << BitsPerComponent; + } } store_type data; @@ -235,30 +158,20 @@ struct QuantizedSequence> BitsPerComponent; } - scalar_type getZ() { return data[1] & Mask; } - scalar_type getW() { return data[1] >> BitsPerComponent; } - - void setX(const scalar_type value) - { - data[0] &= ~Mask; - data[0] |= value & Mask; - } - void setY(const scalar_type value) + scalar_type get(const uint16_t idx) { - data[0] &= Mask; - data[0] |= (value & Mask) << BitsPerComponent; + assert(idx > 0 && idx < 4); + const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); + return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask; } - void setZ(const scalar_type value) - { - data[1] &= ~Mask; - data[1] |= value & Mask; - } - void setW(const scalar_type value) + + void set(const uint16_t idx, const scalar_type value) { - data[1] &= Mask; - data[1] |= (value & Mask) << BitsPerComponent; + assert(idx > 0 && idx < 4); + const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); + const uint16_t odd = idx & uint16_t(1u); + data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); + data[i] |= (value & Mask) << (BitsPerComponent * odd); } store_type data; @@ -275,11 +188,22 @@ struct QuantizedSequence 0 && idx < 2); + base_type a; + a[0] = data[uint16_t(2u) * idx]; + a[1] = data[uint16_t(2u) * idx + 1]; + return a; + } - void setX(const base_type value) { data.xy = value; } - void setY(const base_type value) { data.zw = value; } + void set(const uint16_t idx, const base_type value) + { + assert(idx > 0 && idx < 2); + base_type a; + data[uint16_t(2u) * idx] = value[0]; + data[uint16_t(2u) * idx + 1] = value[1]; + } store_type data; }; @@ -298,45 +222,21 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); - base_type getX() - { - base_type x; - x[0] = data[0]; - x[1] = data[3] & Mask; - return x; - } - base_type getY() + base_type get(const uint16_t idx) { - base_type y; - y[0] = data[1]; - y[1] = (data[3] >> LeftoverBitsPerComponent) & Mask; - return y; - } - base_type getZ() - { - base_type z; - z[0] = data[1]; - z[1] = (data[3] >> (LeftoverBitsPerComponent * uint16_t(2u))) & Mask; - return z; + assert(idx > 0 && idx < 3); + base_type a; + a[0] = data[idx]; + a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask; + return a; } - void setX(const base_type value) - { - data[0] = value[0]; - data[3] &= ~Mask; - data[3] |= value[1] & Mask; - } - void setY(const base_type value) - { - data[1] = value[0]; - data[3] &= ~Mask; - data[3] |= (value[1] & Mask) << LeftoverBitsPerComponent; - } - void setZ(const base_type value) + void set(const uint16_t idx, const base_type value) { - data[2] = value[0]; + assert(idx > 0 && idx < 3); + data[idx] = value[0]; data[3] &= ~Mask; - data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * uint16_t(2u)); + data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * idx); } store_type data; From 6e9160e7b869a88912652bbbbbf3c672d4736de0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 10 Dec 2025 16:51:29 +0700 Subject: [PATCH 226/472] quantized sequence encode should right shift input, changed scramble to initialize a pcg hash, added some helpful unorm constants --- .../hlsl/sampling/quantized_sequence.hlsl | 64 +++++++++++++------ 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 9392a7dab0..b70bddf54e 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/concepts/vector.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include "nbl/builtin/hlsl/random/pcg.hlsl" namespace nbl { @@ -21,6 +22,23 @@ struct QuantizedSequence; namespace impl { +template +struct unorm_constant; +template<> +struct unorm_constant<4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; +template<> +struct unorm_constant<5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; +template<> +struct unorm_constant<8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; +template<> +struct unorm_constant<10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; +template<> +struct unorm_constant<16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; +template<> +struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; +template<> +struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; + template struct decode_helper { @@ -29,25 +47,25 @@ struct decode_helper using uvec_type = vector; using sequence_type = QuantizedSequence; using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) { + random::PCG32 pcg = random::PCG32::construct(scrambleSeed); uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - seqVal[i] = val.get(i); - seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast(0x2f800004u); + seqVal[i] = val.get(i) ^ pcg(); + return return_type(seqVal) * bit_cast(UNormConstant); } }; } template -vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const uint32_t scrambleSeed) { - return impl::decode_helper::__call(val, scrambleKey); + return impl::decode_helper::__call(val, scrambleSeed); } - #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 // all Dim=1 @@ -55,6 +73,7 @@ template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) struct QuantizedSequence { using store_type = T; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -67,9 +86,10 @@ template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_C struct QuantizedSequence::Dimension == 1 && Dim > 1 && Dim < 5) > { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; store_type get(const uint16_t idx) { @@ -82,7 +102,7 @@ struct QuantizedSequence 0 && idx < Dim); const uint16_t bits = (BitsPerComponent * idx); data &= ~(Mask << bits); - data |= (value & Mask) << bits; + data |= ((value >> DiscardBits) & Mask) << bits; } store_type data; @@ -107,9 +127,11 @@ struct QuantizedSequence::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -132,16 +154,17 @@ struct QuantizedSequence> DiscardBits) & Mask; } else { const uint16_t zbits = StoreBits-BitsPerComponent; const uint16_t zmask = uint16_t(1u) << zbits; + const scalar_type trunc_val = value >> DiscardBits; data[0] &= Mask; data[1] &= Mask; - data[0] |= (value & zmask) << BitsPerComponent; - data[1] |= (value >> (zbits) & zmask) << BitsPerComponent; + data[0] |= (trunc_val & zmask) << BitsPerComponent; + data[1] |= (trunc_val >> (zbits) & zmask) << BitsPerComponent; } } @@ -154,9 +177,10 @@ struct QuantizedSequence::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; scalar_type get(const uint16_t idx) { @@ -171,7 +195,7 @@ struct QuantizedSequence> uint16_t(1u); const uint16_t odd = idx & uint16_t(1u); data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); - data[i] |= (value & Mask) << (BitsPerComponent * odd); + data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd); } store_type data; @@ -184,9 +208,6 @@ struct QuantizedSequence::scalar_type; using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; - NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); base_type get(const uint16_t idx) { @@ -217,10 +238,11 @@ struct QuantizedSequence::scalar_type; using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; base_type get(const uint16_t idx) { @@ -236,7 +258,7 @@ struct QuantizedSequence 0 && idx < 3); data[idx] = value[0]; data[3] &= ~Mask; - data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * idx); + data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx); } store_type data; From 97a0bad7159ada794598263a7d9926b7c93b3bf9 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 10 Dec 2025 14:21:21 +0100 Subject: [PATCH 227/472] Fixed quaternion bug, removed old projection.hlsl file --- examples_tests | 2 +- .../nbl/builtin/hlsl/math/quaternions.hlsl | 2 +- .../hlsl/math/thin_lens_projection.hlsl | 3 + .../builtin/hlsl/projection/projection.hlsl | 81 ------------------- 4 files changed, 5 insertions(+), 83 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/projection/projection.hlsl diff --git a/examples_tests b/examples_tests index c256c8dd59..aa8c079d50 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c256c8dd5984036d35af7a615eb27d9454eda431 +Subproject commit aa8c079d50e4761db67ad97f6e5df10ee754a4d2 diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 4a06abe836..833601b4b8 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -224,7 +224,7 @@ struct quaternion mat[0][0] = scalar_type(0.5) - mat[0][0]; mat[1][1] = scalar_type(0.5) - mat[1][1]; mat[2][2] = scalar_type(0.5) - mat[2][2]; - nbl::hlsl::mul(mat, scalar_type(2.0)); + mat = nbl::hlsl::mul(mat, scalar_type(2.0)); return hlsl::transpose(mat); // TODO: double check transpose? } diff --git a/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl b/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl index ca43dcc0ba..70c46fdb37 100644 --- a/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl +++ b/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl @@ -8,6 +8,8 @@ namespace nbl { namespace hlsl { +namespace math +{ namespace thin_lens { @@ -81,5 +83,6 @@ inline matrix lhProjectionOrthoMatrix(FloatingPoint widthOf } } } +} #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/projection/projection.hlsl b/include/nbl/builtin/hlsl/projection/projection.hlsl deleted file mode 100644 index 58714e7dab..0000000000 --- a/include/nbl/builtin/hlsl/projection/projection.hlsl +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_PROJECTION_PROJECTION_INCLUDED_ -#define _NBL_BUILTIN_HLSL_PROJECTION_PROJECTION_INCLUDED_ - -#include -#include - -namespace nbl -{ -namespace hlsl -{ -template) -inline matrix buildProjectionMatrixPerspectiveFovRH(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) -{ - const FloatingPoint h = core::reciprocal(tan(fieldOfViewRadians * 0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(w, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -h, 0.f, 0.f); - m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); - m[3] = vector(0.f, 0.f, -1.f, 0.f); - - return m; -} -template) -inline matrix buildProjectionMatrixPerspectiveFovLH(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) -{ - const FloatingPoint h = core::reciprocal(tan(fieldOfViewRadians * 0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(w, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -h, 0.f, 0.f); - m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 1.f, 0.f); - - return m; -} - -template) -inline matrix buildProjectionMatrixOrthoRH(FloatingPoint widthOfViewVolume, FloatingPoint heightOfViewVolume, FloatingPoint zNear, FloatingPoint zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); - m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 0.f, 1.f); - - return m; -} - -template) -inline matrix buildProjectionMatrixOrthoLH(FloatingPoint widthOfViewVolume, FloatingPoint heightOfViewVolume, FloatingPoint zNear, FloatingPoint zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); - m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 0.f, 1.f); - - return m; -} - -} -} - -#endif \ No newline at end of file From 6d6a5b333ed8cbf4354576652c8035b660fda5af Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 10 Dec 2025 14:35:33 +0100 Subject: [PATCH 228/472] Fixed bug in the `promote_affine` function --- include/nbl/builtin/hlsl/math/linalg/transform.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl index 236c81a8b1..e13a333ade 100644 --- a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl @@ -88,7 +88,7 @@ matrix promote_affine(const matrix inMatrix) NBL_UNROLL for (uint32_t row_i = NIn; row_i < NOut; row_i++) { retval[row_i] = promote(0.0); - if (row_i >= MIn && row_i < MOut) + if (row_i < MOut) retval[row_i][row_i] = T(1.0); } return retval; From 48d1a81cae49650f7233059c2f396b1b0598ea55 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 10 Dec 2025 17:26:36 +0100 Subject: [PATCH 229/472] Implemented `truncate` for matrices --- examples_tests | 2 +- .../transformation_matrix_utils.hlsl | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples_tests b/examples_tests index aa8c079d50..1dec4de5e5 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit aa8c079d50e4761db67ad97f6e5df10ee754a4d2 +Subproject commit 1dec4de5e5e92040150bf529ec311183efff3c8c diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl index 63cc93d899..934a6adf44 100644 --- a/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl @@ -38,19 +38,19 @@ MatT diagonal(typename matrix_traits::scalar_type diagonal = 1) template MatT identity() { - // TODO - // static_assert(MatT::Square); return diagonal(1); } -template -inline matrix extractSub3x4From4x4Matrix(NBL_CONST_REF_ARG(matrix) mat) +template truncate(const NBL_CONST_REF_ARG(matrix) inMatrix) { - matrix output; - for (int i = 0; i < 3; ++i) - output[i] = mat[i]; + matrix retval; - return output; + for (uint16_t i = 0; i < NOut; ++i) + for (uint16_t j = 0; j < MOut; ++j) + retval[i][j] = inMatrix[i][j]; + + return retval; } template From 5ac28fb03e80ddebe9b8fcf7e663daf2df8f6812 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 10 Dec 2025 17:32:58 +0100 Subject: [PATCH 230/472] Removed unroll.hlsl from builtin CMakeLists.txt --- src/nbl/builtin/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 0b8b27c9d1..a92311a5c9 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -178,7 +178,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/unroll.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") From 4a399fbd58000439da6216eb6756edf9c9a84cf5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 00:27:55 +0700 Subject: [PATCH 231/472] Fix verifyAnyBitIntegral --- include/nbl/builtin/hlsl/morton.hlsl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 4e90fd4c91..9ba33ffb3d 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -28,14 +28,19 @@ NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; template && concepts::Scalar) NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val) { - NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((T(1) << Bits) - 1); - const bool allZero = ((val & mask) == 0); NBL_IF_CONSTEXPR(is_signed_v) { + // include the msb + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((uint64_t(1) << (Bits-1)) - 1); + const bool allZero = ((val & mask) == 0); const bool allOne = ((val & mask) == mask); return allZero || allOne; + } else + { + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((uint64_t(1) << Bits) - 1); + const bool allZero = ((val & mask) == 0); + return allZero; } - return allZero; } template && concepts::Scalar) From 84569361d6dce701e396284012bfb5d9d7ff445e Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 10 Dec 2025 14:35:33 +0100 Subject: [PATCH 232/472] Fixed bug in the `promote_affine` function --- include/nbl/builtin/hlsl/math/linalg/transform.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl index 59ff142150..070f1e7af5 100644 --- a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl @@ -88,7 +88,7 @@ matrix promote_affine(const matrix inMatrix) NBL_UNROLL for (uint32_t row_i = NIn; row_i < NOut; row_i++) { retval[row_i] = promote(0.0); - if (row_i >= MIn && row_i < MOut) + if (row_i < MOut) retval[row_i][row_i] = T(1.0); } return retval; From 6f0d0120f438637f054a43c6c0dfffe24b66a931 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 11 Dec 2025 11:26:37 +0700 Subject: [PATCH 233/472] added decode variant for scramble before decode --- examples_tests | 2 +- .../hlsl/sampling/quantized_sequence.hlsl | 50 +++++++++++++++++-- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/examples_tests b/examples_tests index fb24a25a44..456f9e2fb0 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit fb24a25a44b85a9cee830a3cafd86894ca137453 +Subproject commit 456f9e2fb0bffa0d599366bc4a0616730615ac93 diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index b70bddf54e..08f23eb170 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -39,8 +39,11 @@ struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000 template<> struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; +template +struct decode_helper; + template -struct decode_helper +struct decode_helper { using scalar_type = typename vector_traits::scalar_type; using fp_type = typename float_of_size::type; @@ -58,12 +61,46 @@ struct decode_helper return return_type(seqVal) * bit_cast(UNormConstant); } }; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using sequence_type = QuantizedSequence; + using sequence_store_type = typename sequence_type::store_type; + using sequence_scalar_type = typename vector_traits::scalar_type; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; + + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) + { + random::PCG32 pcg = random::PCG32::construct(scrambleSeed); + + sequence_store_type scrambleKey; + NBL_UNROLL for(uint16_t i = 0; i < vector_traits::Dimension; i++) + scrambleKey[i] = sequence_scalar_type(pcg()); + + sequence_type scramble; + scramble.data = scrambleKey ^ val.data; + + // sequence_type scramble; + // NBL_UNROLL for(uint16_t i = 0; i < D; i++) + // scramble.set(i, pcg()); + // scramble.data ^= val.data; + + uvec_type seqVal; + NBL_UNROLL for(uint16_t i = 0; i < D; i++) + seqVal[i] = scramble.get(i); + return return_type(seqVal) * bit_cast(UNormConstant); + } +}; } -template +template vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const uint32_t scrambleSeed) { - return impl::decode_helper::__call(val, scrambleSeed); + return impl::decode_helper::__call(val, scrambleSeed); } #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 @@ -73,7 +110,7 @@ template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) struct QuantizedSequence { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -90,6 +127,7 @@ struct QuantizedSequence::value; store_type get(const uint16_t idx) { @@ -114,6 +152,7 @@ struct QuantizedSequence::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } @@ -181,6 +220,7 @@ struct QuantizedSequence::value; scalar_type get(const uint16_t idx) { @@ -208,6 +248,7 @@ struct QuantizedSequence::scalar_type; using base_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) { @@ -243,6 +284,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) { From c5c1dc2e2c520cf795568b4667414bf1c08ad0b1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 14:13:37 +0700 Subject: [PATCH 234/472] Remove const specifier for parameters in ternary operator --- include/nbl/builtin/hlsl/functional.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 7531c5cdb9..5af6c98008 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -241,7 +241,7 @@ struct ternary_operator { using type_t = std::invoke_result_t; - constexpr inline type_t operator()(const bool condition, const F1& lhs, const F2& rhs) + constexpr inline type_t operator()(const bool condition, F1& lhs, F2& rhs) { if (condition) return std::invoke(lhs); @@ -255,7 +255,7 @@ struct ternary_operator { using type_t = decltype(experimental::declval().operator()); - NBL_CONSTEXPR_FUNC type_t operator()(const bool condition, NBL_CONST_REF_ARG(F1) lhs, NBL_CONST_REF_ARG(F2) rhs) + NBL_CONSTEXPR_FUNC type_t operator()(const bool condition, NBL_REF_ARG(F1) lhs, NBL_REF_ARG(F2) rhs) { if (condition) return lhs(); From 9b2780fe5dbb8ce427eba699f929f0fd6d1b86fb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 14:27:18 +0700 Subject: [PATCH 235/472] Improve select implementation to use spirv intrinsics instead of branch --- include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 67a9f67d8f..66ed29f1ad 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -661,7 +661,11 @@ struct select_helper) > { NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { + #ifdef __HLSL_VERSION + return spirv::select(condition, object1, object2); + #else return condition ? object1 : object2; + #endif } }; From 887117537e9b59cebdf27eb3cd792e6a43efbaac Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 15:48:01 +0700 Subject: [PATCH 236/472] Refactor select_helper to use intrinsics if possible --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 72 ++++++++++--------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 66ed29f1ad..2856871a02 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -52,6 +52,14 @@ NBL_BOOL_CONCEPT MixCallingBuiltins = #else MixIsCallable; #endif + +template +NBL_BOOL_CONCEPT SelectCallingBuiltins = +#ifdef __HLSL_VERSION +spirv::SelectIsCallable; +#else +false; +#endif } template @@ -476,6 +484,17 @@ struct mix_helper } }; +template +requires(concepts::BooleanScalar) +struct select_helper +{ + using return_t = T; + static inline return_t __call(const B condition, const T& object1, const T& object2) + { + return condition ? object1 : object2; + } +}; + template requires concepts::FloatingPointScalar || concepts::IntegralScalar struct sign_helper @@ -655,38 +674,6 @@ struct subBorrow_helper } }; -template -NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) -struct select_helper) > -{ - NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) - { - #ifdef __HLSL_VERSION - return spirv::select(condition, object1, object2); - #else - return condition ? object1 : object2; - #endif - } -}; - -template -NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) -struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > -{ - NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) - { - using traits = hlsl::vector_traits; - array_get conditionGetter; - array_get objectGetter; - array_set setter; - - T selected; - for (uint32_t i = 0; i < traits::Dimension; ++i) - setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); - - return selected; - } -}; template struct undef_helper @@ -980,6 +967,27 @@ struct mix_helper && concepts::B } }; + +template +NBL_PARTIAL_REQ_TOP(concepts::Boolean && concepts::Vector && concepts::Vector && (extent_v == extent_v) && !impl::SelectCallingBuiltins) +struct select_helper && concepts::Vector && concepts::Vector && (extent_v == extent_v) && !impl::SelectCallingBuiltins) > +{ + using return_t = T; + NBL_CONSTEXPR_STATIC return_t __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + using traits = hlsl::vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; + template NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) struct fma_helper From 1bf0616246f047c9be399181b319e1c5611e4617 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 11 Dec 2025 15:52:35 +0700 Subject: [PATCH 237/472] removed redundant constructors, some reorganize quaternions --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 63 ++++++++++--------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 834d41cb54..6114949572 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -48,22 +48,10 @@ struct quaternion q.data = data_type(0.0, 0.0, 0.0, 1.0); return q; } - - static this_t create(scalar_type x, scalar_type y, scalar_type z, scalar_type w) - { - this_t q; - q.data = data_type(x, y, z, w); - return q; - } - - static this_t create(NBL_CONST_REF_ARG(this_t) other) - { - return other; - } // angle: Rotation angle expressed in radians. // axis: Rotation axis, must be normalized. - static this_t create(scalar_type angle, const vector3_type axis) + static this_t create(const vector3_type axis, scalar_type angle) { this_t q; const scalar_type sinTheta = hlsl::sin(angle * 0.5); @@ -72,28 +60,39 @@ struct quaternion return q; } - - static this_t create(scalar_type pitch, scalar_type yaw, scalar_type roll) + template NBL_FUNC_REQUIRES(is_same_v,U>) + static this_t create(const U halfPitchCosSin, const U halfYawCosSin, const U halfRollCosSin) { - const scalar_type rollDiv2 = roll * scalar_type(0.5); - const scalar_type sr = hlsl::sin(rollDiv2); - const scalar_type cr = hlsl::cos(rollDiv2); + const scalar_type cp = halfPitchCosSin.x; + const scalar_type sp = halfPitchCosSin.y; - const scalar_type pitchDiv2 = pitch * scalar_type(0.5); - const scalar_type sp = hlsl::sin(pitchDiv2); - const scalar_type cp = hlsl::cos(pitchDiv2); + const scalar_type cy = halfYawCosSin.x; + const scalar_type sy = halfYawCosSin.y; - const scalar_type yawDiv2 = yaw * scalar_type(0.5); - const scalar_type sy = hlsl::sin(yawDiv2); - const scalar_type cy = hlsl::cos(yawDiv2); + const scalar_type cr = halfRollCosSin.x; + const scalar_type sr = halfRollCosSin.y; - this_t output; - output.data[0] = cr * sp * cy + sr * cp * sy; // x - output.data[1] = cr * cp * sy - sr * sp * cy; // y - output.data[2] = sr * cp * cy - cr * sp * sy; // z - output.data[3] = cr * cp * cy + sr * sp * sy; // w + this_t q; + q.data[0] = cr * sp * cy + sr * cp * sy; // x + q.data[1] = cr * cp * sy - sr * sp * cy; // y + q.data[2] = sr * cp * cy - cr * sp * sy; // z + q.data[3] = cr * cp * cy + sr * sp * sy; // w - return output; + return q; + } + + template) + static this_t create(const U pitch, const U yaw, const U roll) + { + const scalar_type halfPitch = pitch * scalar_type(0.5); + const scalar_type halfYaw = yaw * scalar_type(0.5); + const scalar_type halfRoll = roll * scalar_type(0.5); + + return create( + vector(hlsl::cos(halfPitch), hlsl::sin(halfPitch)), + vector(hlsl::cos(halfYaw), hlsl::sin(halfYaw)), + vector(hlsl::cos(halfRoll), hlsl::sin(halfRoll)) + ); } static this_t create(NBL_CONST_REF_ARG(matrix_type) m) @@ -165,12 +164,14 @@ struct quaternion this_t operator*(NBL_CONST_REF_ARG(this_t) other) { - return this_t::create( + this_t retval; + retval.data = data_type( data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z, data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y, data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x, data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w ); + return retval; } static this_t lerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle) From bbbeeea7471ed742fdd5ca15769d9f1bbb9d1983 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 15:58:16 +0700 Subject: [PATCH 238/472] Revert previous changes, instead make it clear that the function is implemented only in cpp --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 70 ++++++++----------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 2856871a02..9fe3ddc21b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -52,14 +52,6 @@ NBL_BOOL_CONCEPT MixCallingBuiltins = #else MixIsCallable; #endif - -template -NBL_BOOL_CONCEPT SelectCallingBuiltins = -#ifdef __HLSL_VERSION -spirv::SelectIsCallable; -#else -false; -#endif } template @@ -484,17 +476,6 @@ struct mix_helper } }; -template -requires(concepts::BooleanScalar) -struct select_helper -{ - using return_t = T; - static inline return_t __call(const B condition, const T& object1, const T& object2) - { - return condition ? object1 : object2; - } -}; - template requires concepts::FloatingPointScalar || concepts::IntegralScalar struct sign_helper @@ -674,6 +655,36 @@ struct subBorrow_helper } }; +template +requires (concepts::BooleanScalar) +struct select_helper +{ + using return_t = T; + constexpr static return_t __call(const B& condition, const T& object1, const T& object2) + { + return condition ? object1 : object2; + } +}; + +template +requires (concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) +struct select_helper +{ + using return_t = T; + constexpr static T __call(const B& condition, const T& object1, const T& object2) + { + using traits = vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; template struct undef_helper @@ -967,27 +978,6 @@ struct mix_helper && concepts::B } }; - -template -NBL_PARTIAL_REQ_TOP(concepts::Boolean && concepts::Vector && concepts::Vector && (extent_v == extent_v) && !impl::SelectCallingBuiltins) -struct select_helper && concepts::Vector && concepts::Vector && (extent_v == extent_v) && !impl::SelectCallingBuiltins) > -{ - using return_t = T; - NBL_CONSTEXPR_STATIC return_t __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) - { - using traits = hlsl::vector_traits; - array_get conditionGetter; - array_get objectGetter; - array_set setter; - - T selected; - for (uint32_t i = 0; i < traits::Dimension; ++i) - setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); - - return selected; - } -}; - template NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) struct fma_helper From 258e491cb3377f1b87a9c8850d1f4f3a69ccfa11 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 16:36:03 +0700 Subject: [PATCH 239/472] Reverse the order of template argument of select and select_helper --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 14 +++++++------- .../nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl | 4 ++-- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 9fe3ddc21b..8a745fc4ef 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -90,7 +90,7 @@ template struct all_helper; template struct any_helper; -template +template struct select_helper; template struct bitReverseAs_helper; @@ -166,7 +166,7 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSq template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), bool) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), bool) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (B)(T), (B)(T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (T)(B), (B)(T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T) @@ -655,9 +655,9 @@ struct subBorrow_helper } }; -template +template requires (concepts::BooleanScalar) -struct select_helper +struct select_helper { using return_t = T; constexpr static return_t __call(const B& condition, const T& object1, const T& object2) @@ -666,9 +666,9 @@ struct select_helper } }; -template +template requires (concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) -struct select_helper +struct select_helper { using return_t = T; constexpr static T __call(const B& condition, const T& object1, const T& object2) @@ -974,7 +974,7 @@ struct mix_helper && concepts::B using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) { - return select_helper(a, y, x); + return select_helper(a, y, x); } }; diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 27518222b3..78367f7924 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -156,10 +156,10 @@ inline bool any(Vector vec) return cpp_compat_intrinsics_impl::any_helper::__call(vec); } -template +template NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) { - return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); + return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); } /** diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 1324998d1a..4fa2014607 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -208,7 +208,7 @@ struct left_shift_operator(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - return select(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined @@ -235,7 +235,7 @@ struct arithmetic_right_shift_operator const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); - return select(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` the result is undefined @@ -261,7 +261,7 @@ struct arithmetic_right_shift_operator const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); - return select(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined From 8745a33514602e3a3089f588d2988dcb027fe733 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 11 Dec 2025 16:46:12 +0700 Subject: [PATCH 240/472] added some checks to create from rot mat --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 75 ++++++++++++++----- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 6114949572..49ad0dde96 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -95,8 +95,43 @@ struct quaternion ); } - static this_t create(NBL_CONST_REF_ARG(matrix_type) m) + static bool __isEqual(const scalar_type a, const scalar_type b) { + return hlsl::max(a/b, b/a) <= scalar_type(1e-4); + } + static bool __dotIsZero(const vector3_type a, const vector3_type b) + { + const scalar_type ab = hlsl::dot(a, b); + return hlsl::abs(ab) <= scalar_type(1e-4); + } + + static this_t create(NBL_CONST_REF_ARG(matrix_type) m, const bool dontAssertValidMatrix=false) + { + { + // only orthogonal and uniform scale mats can be converted + bool valid = __dotIsZero(m[0], m[1]); + valid = __dotIsZero(m[1], m[2]) && valid; + valid = __dotIsZero(m[0], m[2]) && valid; + + const matrix_type m_T = hlsl::transpose(m); + const scalar_type dotCol0 = hlsl::dot(m_T[0],m_T[0]); + const scalar_type dotCol1 = hlsl::dot(m_T[1],m_T[1]); + const scalar_type dotCol2 = hlsl::dot(m_T[2],m_T[2]); + valid = __isEqual(dotCol0, dotCol1) && valid; + valid = __isEqual(dotCol1, dotCol2) && valid; + valid = __isEqual(dotCol0, dotCol2) && valid; + + if (dontAssertValidMatrix) + if (!valid) + { + this_t retval; + retval.data = hlsl::promote(bit_cast(numeric_limits::quiet_NaN)); + return retval; + } + else + assert(valid); + } + const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2]; const scalar_type neg_m00 = bit_cast(bit_cast(m00)^0x80000000u); const scalar_type neg_m11 = bit_cast(bit_cast(m11)^0x80000000u); @@ -106,40 +141,42 @@ struct quaternion const data_type Qz = data_type(m22, neg_m22, neg_m22, m22); const data_type tmp = hlsl::promote(1.0) + Qx + Qy + Qz; - const data_type invscales = hlsl::promote(0.5) / hlsl::sqrt(tmp); - const data_type scales = tmp * invscales * hlsl::promote(0.5); // TODO: speed this up this_t retval; if (tmp.x > scalar_type(0.0)) { - retval.data.x = (m[2][1] - m[1][2]) * invscales.x; - retval.data.y = (m[0][2] - m[2][0]) * invscales.x; - retval.data.z = (m[1][0] - m[0][1]) * invscales.x; - retval.data.w = scales.x; + const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.x); + retval.data.x = (m[2][1] - m[1][2]) * invscales; + retval.data.y = (m[0][2] - m[2][0]) * invscales; + retval.data.z = (m[1][0] - m[0][1]) * invscales; + retval.data.w = tmp.x * invscales * scalar_type(0.5); } else { if (tmp.y > scalar_type(0.0)) { - retval.data.x = scales.y; - retval.data.y = (m[0][1] + m[1][0]) * invscales.y; - retval.data.z = (m[2][0] + m[0][2]) * invscales.y; - retval.data.w = (m[2][1] - m[1][2]) * invscales.y; + const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.y); + retval.data.x = tmp.y * invscales * scalar_type(0.5); + retval.data.y = (m[0][1] + m[1][0]) * invscales; + retval.data.z = (m[2][0] + m[0][2]) * invscales; + retval.data.w = (m[2][1] - m[1][2]) * invscales; } else if (tmp.z > scalar_type(0.0)) { - retval.data.x = (m[0][1] + m[1][0]) * invscales.z; - retval.data.y = scales.z; - retval.data.z = (m[0][2] - m[2][0]) * invscales.z; - retval.data.w = (m[1][2] + m[2][1]) * invscales.z; + const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.z); + retval.data.x = (m[0][1] + m[1][0]) * invscales; + retval.data.y = tmp.z * invscales * scalar_type(0.5); + retval.data.z = (m[0][2] - m[2][0]) * invscales; + retval.data.w = (m[1][2] + m[2][1]) * invscales; } else { - retval.data.x = (m[0][2] + m[2][0]) * invscales.w; - retval.data.y = (m[1][2] + m[2][1]) * invscales.w; - retval.data.z = scales.w; - retval.data.w = (m[1][0] - m[0][1]) * invscales.w; + const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.w); + retval.data.x = (m[0][2] + m[2][0]) * invscales; + retval.data.y = (m[1][2] + m[2][1]) * invscales; + retval.data.z = tmp.w * invscales * scalar_type(0.5); + retval.data.w = (m[1][0] - m[0][1]) * invscales; } } From 2a8451d73fab71fe283563cbcaff631c07f181e5 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 12 Dec 2025 11:15:26 +0700 Subject: [PATCH 241/472] moved normalize, static_cast to helper specializations, norm and unnorm variants for lerp/flerp --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 85 ++++++++++++------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 49ad0dde96..73dc977d62 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -184,14 +184,6 @@ struct quaternion return retval; } - static this_t create(NBL_CONST_REF_ARG(truncated_quaternion) first3Components) - { - this_t retval; - retval.data.xyz = first3Components.data; - retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(first3Components.data, first3Components.data)); - return retval; - } - this_t operator*(scalar_type scalar) { this_t output; @@ -211,19 +203,26 @@ struct quaternion return retval; } - static this_t lerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle) + static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle) { - const AsUint negationMask = hlsl::bit_cast(totalPseudoAngle) & AsUint(0x80000000u); - const data_type adjEnd = hlsl::bit_cast(hlsl::bit_cast(end.data) ^ negationMask); + // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle) + const data_type adjEnd = ieee754::flipSignIfRHSNegative(end.data, totalPseudoAngle); this_t retval; retval.data = hlsl::mix(start.data, adjEnd, fraction); return retval; } + static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction) + { + return unnormLerp(start, end, fraction, hlsl::dot(start.data, end.data)); + } + static this_t lerp(const this_t start, const this_t end, const scalar_type fraction) { - return lerp(start, end, fraction, hlsl::dot(start.data, end.data)); + this_t retval = unnormLerp(start, end, fraction); + retval.data = hlsl::normalize(retval.data); + return retval; } static scalar_type __adj_interpolant(const scalar_type angle, const scalar_type fraction, const scalar_type interpolantPrecalcTerm2, const scalar_type interpolantPrecalcTerm3) @@ -234,26 +233,32 @@ struct quaternion return fraction + interpolantPrecalcTerm3 * k; } - static this_t flerp(const this_t start, const this_t end, const scalar_type fraction) + static this_t unnormFlerp(const this_t start, const this_t end, const scalar_type fraction) { const scalar_type pseudoAngle = hlsl::dot(start.data,end.data); const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5); const scalar_type interpolantPrecalcTerm3 = fraction * interpolantPrecalcTerm * (fraction - scalar_type(1.0)); const scalar_type adjFrac = __adj_interpolant(hlsl::abs(pseudoAngle),fraction,interpolantPrecalcTerm*interpolantPrecalcTerm,interpolantPrecalcTerm3); - this_t retval = lerp(start,end,adjFrac,pseudoAngle); + this_t retval = unnormLerp(start,end,adjFrac,pseudoAngle); + return retval; + } + + static this_t flerp(const this_t start, const this_t end, const scalar_type fraction) + { + this_t retval = unnormFlerp(start,end,adjFrac,pseudoAngle); retval.data = hlsl::normalize(retval.data); return retval; } - vector3_type transformVector(const vector3_type v) + vector3_type transformVector(const vector3_type v, const bool assumeNoScale=false) NBL_CONST_MEMBER_FUNC { - scalar_type scale = hlsl::length(data); + scalar_type scale = hlsl::mix(hlsl::length(data), scalar_type(1.0), assumeNoScale); vector3_type direction = data.xyz; return v * scale + hlsl::cross(direction, v * data.w + hlsl::cross(direction, v)) * scalar_type(2.0); } - matrix_type constructMatrix() + matrix_type constructMatrix() NBL_CONST_MEMBER_FUNC { matrix_type mat; mat[0] = data.yzx * data.ywz + data.zxy * data.zyw * vector3_type( 1.0, 1.0,-1.0); @@ -280,23 +285,14 @@ struct quaternion return precompPart * cosAngle + hlsl::cross(planeNormal, precompPart); } - this_t inverse() + this_t inverse() NBL_CONST_MEMBER_FUNC { this_t retval; - retval.data.x = bit_cast(bit_cast(data.x)^0x80000000u); - retval.data.y = bit_cast(bit_cast(data.y)^0x80000000u); - retval.data.z = bit_cast(bit_cast(data.z)^0x80000000u); + retval.data.xyz = -retval.data.xyz; retval.data.w = data.w; return retval; } - static this_t normalize(NBL_CONST_REF_ARG(this_t) q) - { - this_t retval; - retval.data = hlsl::normalize(q.data); - return retval; - } - data_type data; }; @@ -305,19 +301,44 @@ struct quaternion namespace impl { +template +struct normalize_helper > +{ + static inline math::truncated_quaternion __call(const math::truncated_quaternion q) + { + math::truncated_quaternion retval; + retval.data = hlsl::normalize(q.data); + return retval; + } +} + +template +struct normalize_helper > +{ + static inline math::quaternion __call(const math::quaternion q) + { + math::quaternion retval; + retval.data = hlsl::normalize(q.data); + return retval; + } +} + template struct static_cast_helper, math::truncated_quaternion > { - static inline math::quaternion cast(math::truncated_quaternion q) + static inline math::quaternion cast(const math::truncated_quaternion q) { - return math::quaternion::create(q); + math::quaternion retval; + retval.data.xyz = q.data; + retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(q.data, q.data)); + return retval; } }; template struct static_cast_helper, math::quaternion > { - static inline math::truncated_quaternion cast(math::quaternion q) + static inline math::truncated_quaternion cast(const math::quaternion q) { math::truncated_quaternion t; t.data.x = t.data.x; @@ -330,7 +351,7 @@ struct static_cast_helper, math::quaternion > template struct static_cast_helper, math::quaternion > { - static inline matrix cast(math::quaternion q) + static inline matrix cast(const math::quaternion q) { return q.constructMatrix(); } From a93fa2608f608574e17937bf42bdcdc75e17e291 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 12 Dec 2025 15:39:32 +0700 Subject: [PATCH 242/472] fix some quaternion bugs --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 73dc977d62..91ee4975e3 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -246,7 +246,7 @@ struct quaternion static this_t flerp(const this_t start, const this_t end, const scalar_type fraction) { - this_t retval = unnormFlerp(start,end,adjFrac,pseudoAngle); + this_t retval = unnormFlerp(start,end,fraction); retval.data = hlsl::normalize(retval.data); return retval; } @@ -267,8 +267,10 @@ struct quaternion mat[0][0] = scalar_type(0.5) - mat[0][0]; mat[1][1] = scalar_type(0.5) - mat[1][1]; mat[2][2] = scalar_type(0.5) - mat[2][2]; - mat *= scalar_type(2.0); - return hlsl::transpose(mat); // TODO: double check transpose? + mat[0] = mat[0] * scalar_type(2.0); + mat[1] = mat[1] * scalar_type(2.0); + mat[2] = mat[2] * scalar_type(2.0); + return mat;// hlsl::transpose(mat); // TODO: double check transpose? } static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart) @@ -298,9 +300,9 @@ struct quaternion } -namespace impl -{ +namespace cpp_compat_intrinsics_impl +{ template struct normalize_helper > { @@ -310,7 +312,7 @@ struct normalize_helper > retval.data = hlsl::normalize(q.data); return retval; } -} +}; template struct normalize_helper > @@ -321,8 +323,11 @@ struct normalize_helper > retval.data = hlsl::normalize(q.data); return retval; } +}; } +namespace impl +{ template struct static_cast_helper, math::truncated_quaternion > { From c6462d1834d9b8781adcb2a7e67942b2cecdf77c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 12 Dec 2025 17:32:00 +0700 Subject: [PATCH 243/472] some minor fixes to quantized sequence set, decode --- .../hlsl/sampling/quantized_sequence.hlsl | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 08f23eb170..27588dd9e0 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -71,23 +71,16 @@ struct decode_helper using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) { random::PCG32 pcg = random::PCG32::construct(scrambleSeed); - sequence_store_type scrambleKey; - NBL_UNROLL for(uint16_t i = 0; i < vector_traits::Dimension; i++) - scrambleKey[i] = sequence_scalar_type(pcg()); - sequence_type scramble; - scramble.data = scrambleKey ^ val.data; - - // sequence_type scramble; - // NBL_UNROLL for(uint16_t i = 0; i < D; i++) - // scramble.set(i, pcg()); - // scramble.data ^= val.data; + NBL_UNROLL for(uint16_t i = 0; i < D; i++) + scramble.set(i, pcg()); + scramble.data ^= val.data; uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) @@ -197,13 +190,13 @@ struct QuantizedSequence> DiscardBits; + const scalar_type zbits = StoreBits-BitsPerComponent; + const scalar_type zmask = (uint16_t(1u) << zbits) - uint16_t(1u); + const scalar_type trunc_val = value >> (DiscardBits-1u); data[0] &= Mask; data[1] &= Mask; data[0] |= (trunc_val & zmask) << BitsPerComponent; - data[1] |= (trunc_val >> (zbits) & zmask) << BitsPerComponent; + data[1] |= ((trunc_val >> zbits) & zmask) << BitsPerComponent; } } From ba6e1ec941fd5bb694c4b161ff236ced0f41ffb2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 12 Dec 2025 23:15:27 +0700 Subject: [PATCH 244/472] Fix complex identity macro --- include/nbl/builtin/hlsl/complex.hlsl | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl index 7f59d103fa..7e8f6526ec 100644 --- a/include/nbl/builtin/hlsl/complex.hlsl +++ b/include/nbl/builtin/hlsl/complex.hlsl @@ -238,28 +238,28 @@ struct divides< complex_t > // Out of line generic initialization of static member data not yet supported so we X-Macro identities for Scalar types we want to support // (left X-Macro here since it's pretty readable) -#define COMPLEX_ARITHMETIC_IDENTITIES(SCALAR) \ +#define COMPLEX_ARITHMETIC_IDENTITIES(SCALAR, COMPONENT) \ template<> \ -const static complex_t< SCALAR > plus< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(0), promote< SCALAR , uint32_t>(0)}; \ +const static complex_t< SCALAR > plus< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(0), promote< SCALAR, COMPONENT>(0)}; \ template<> \ -const static complex_t< SCALAR > minus< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(0), promote< SCALAR , uint32_t>(0)}; \ +const static complex_t< SCALAR > minus< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(0), promote< SCALAR, COMPONENT>(0)}; \ template<> \ -const static complex_t< SCALAR > multiplies< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(1), promote< SCALAR , uint32_t>(0)}; \ +const static complex_t< SCALAR > multiplies< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(1), promote< SCALAR, COMPONENT>(0)}; \ template<> \ -const static complex_t< SCALAR > divides< complex_t< SCALAR > >::identity = { promote< SCALAR , uint32_t>(1), promote< SCALAR , uint32_t>(0)}; - -COMPLEX_ARITHMETIC_IDENTITIES(float16_t) -COMPLEX_ARITHMETIC_IDENTITIES(float16_t2) -COMPLEX_ARITHMETIC_IDENTITIES(float16_t3) -COMPLEX_ARITHMETIC_IDENTITIES(float16_t4) -COMPLEX_ARITHMETIC_IDENTITIES(float32_t) -COMPLEX_ARITHMETIC_IDENTITIES(float32_t2) -COMPLEX_ARITHMETIC_IDENTITIES(float32_t3) -COMPLEX_ARITHMETIC_IDENTITIES(float32_t4) -COMPLEX_ARITHMETIC_IDENTITIES(float64_t) -COMPLEX_ARITHMETIC_IDENTITIES(float64_t2) -COMPLEX_ARITHMETIC_IDENTITIES(float64_t3) -COMPLEX_ARITHMETIC_IDENTITIES(float64_t4) +const static complex_t< SCALAR > divides< complex_t< SCALAR > >::identity = { promote< SCALAR, COMPONENT>(1), promote< SCALAR, COMPONENT>(0)}; + +COMPLEX_ARITHMETIC_IDENTITIES(float16_t, float16_t) +COMPLEX_ARITHMETIC_IDENTITIES(float16_t2, float16_t) +COMPLEX_ARITHMETIC_IDENTITIES(float16_t3, float16_t) +COMPLEX_ARITHMETIC_IDENTITIES(float16_t4, float16_t) +COMPLEX_ARITHMETIC_IDENTITIES(float32_t, float32_t) +COMPLEX_ARITHMETIC_IDENTITIES(float32_t2, float32_t) +COMPLEX_ARITHMETIC_IDENTITIES(float32_t3, float32_t) +COMPLEX_ARITHMETIC_IDENTITIES(float32_t4, float32_t) +COMPLEX_ARITHMETIC_IDENTITIES(float64_t, float64_t) +COMPLEX_ARITHMETIC_IDENTITIES(float64_t2, float64_t) +COMPLEX_ARITHMETIC_IDENTITIES(float64_t3, float64_t) +COMPLEX_ARITHMETIC_IDENTITIES(float64_t4, float64_t) #undef COMPLEX_ARITHMETIC_IDENTITIES From 87e2cff9efd25500101ba58479b1dfdf7a70c318 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 12 Dec 2025 17:26:19 +0100 Subject: [PATCH 245/472] Fixed keys --- cmake/common.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 010c7409dc..c61c13714f 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1485,7 +1485,8 @@ namespace @IMPL_NAMESPACE@ { # generate keys and commands for compiling shaders foreach(BUILD_CONFIGURATION ${CMAKE_CONFIGURATION_TYPES}) - set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${BUILD_CONFIGURATION}/${FINAL_KEY}") + set(FINAL_KEY_REL_PATH "${BUILD_CONFIGURATION}/${FINAL_KEY}") + set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") set(NBL_NSC_COMPILE_COMMAND "$" @@ -1514,7 +1515,7 @@ namespace @IMPL_NAMESPACE@ { NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}" NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" - NBL_SPIRV_ACCESS_KEY "${FINAL_KEY}" + NBL_SPIRV_ACCESS_KEY "${FINAL_KEY_REL_PATH}" ) set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") From 34b1de46a0e75eb15a7b1e178808741fbb3d80bc Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 12 Dec 2025 23:27:51 +0700 Subject: [PATCH 246/472] Fix default blit_blit.comp.hlsl --- include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl b/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl index 1407d7fc77..4b97bbc08f 100644 --- a/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl +++ b/include/nbl/builtin/hlsl/blit/default_blit.comp.hlsl @@ -59,6 +59,7 @@ using namespace nbl::hlsl::blit; // TODO: push constants [numthreads(ConstevalParameters::WorkGroupSize,1,1)] +[shader("compute")] void main() { InImgAccessor inImgA; From 67e6e5031b2875ca605e532c5dfb31a71cd247ac Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 12 Dec 2025 23:35:25 +0700 Subject: [PATCH 247/472] Fix ternary operator --- include/nbl/builtin/hlsl/functional.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 5af6c98008..3f1043a5e2 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -236,7 +236,7 @@ struct maximum }; #ifndef __HLSL_VERSION -template requires(is_same_v, std::invoke_result_t()> ) +template requires(is_same_v, std::invoke_result_t > ) struct ternary_operator { using type_t = std::invoke_result_t; From 5698cf050362bcfed179d62ec9390d5f08446a6d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 13 Dec 2025 00:16:12 +0700 Subject: [PATCH 248/472] Add missing include --- include/nbl/builtin/hlsl/functional.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 3f1043a5e2..118fe07c63 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -8,6 +8,7 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/limits.hlsl" #include "nbl/builtin/hlsl/concepts/vector.hlsl" +#include "nbl/builtin/hlsl/array_accessors.hlsl" namespace nbl From 7d9611d8e40e5f2b0a2e9339a3aa654bf739a08d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 13 Dec 2025 02:23:04 +0700 Subject: [PATCH 249/472] Fix mix partial specialization --- .../nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 8a745fc4ef..a5e48debbf 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -48,7 +48,7 @@ NBL_VALID_EXPRESSION(MixIsCallable, (T)(U), glm::mix(declval(),declval(),d template NBL_BOOL_CONCEPT MixCallingBuiltins = #ifdef __HLSL_VERSION -(spirv::FMixIsCallable && is_same_v) || spirv::SelectIsCallable; +(spirv::FMixIsCallable && is_same_v); #else MixIsCallable; #endif @@ -968,13 +968,13 @@ struct mix_helper NBL_PARTIAL_REQ_TOP(concepts::Vectorial && concepts::BooleanScalar) -struct mix_helper && concepts::BooleanScalar) > +template NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::BooleanScalar && !impl::MixCallingBuiltins) +struct mix_helper || concepts::Scalar) && concepts::BooleanScalar && !impl::MixCallingBuiltins) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) { - return select_helper(a, y, x); + return select_helper::__call(a, y, x); } }; From e44a8fbc95792a1dcefe2440db1f18e3e525cf79 Mon Sep 17 00:00:00 2001 From: Przemog1 <32484732+Przemog1@users.noreply.github.com> Date: Fri, 12 Dec 2025 21:33:17 +0100 Subject: [PATCH 250/472] Fixed nsc command line arguments --- cmake/common.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index c61c13714f..16ea1aee06 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1185,8 +1185,8 @@ struct DeviceConfigCaps -Zpr -spirv -fspv-target-env=vulkan1.3 - -WShadow - -WConversion + -Wshadow + -Wconversion $<$:-O0> $<$:-O3> $<$:-O3> @@ -1599,4 +1599,4 @@ function(NBL_CREATE_RESOURCE_ARCHIVE) if(IMPL_LINK_TO) LINK_BUILTIN_RESOURCES_TO_TARGET(${IMPL_LINK_TO} ${IMPL_TARGET}) endif() -endfunction() \ No newline at end of file +endfunction() From b803f838a329f7c2eff9a9ce3d89e81868e9637b Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Fri, 12 Dec 2025 23:19:24 +0100 Subject: [PATCH 251/472] fix missing built-in --- src/nbl/builtin/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 6562fbb69b..75cb681d36 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -145,6 +145,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/indirect_commands.hlsl") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_common_member_inc.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") @@ -357,4 +358,4 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.h #morton codes LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") -ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") \ No newline at end of file +ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From d61989fb86f790f76805939519f2a467b34beea8 Mon Sep 17 00:00:00 2001 From: devsh Date: Sun, 14 Dec 2025 12:29:44 +0100 Subject: [PATCH 252/472] post merge submodule pointer update also prepare new release --- CMakeLists.txt | 2 +- examples_tests | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bedb9f1dc2..2235512d1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ include(ExternalProject) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/toolchains/android/build.cmake) project(Nabla - VERSION 0.8.0.1 + VERSION 0.9.0.0 HOMEPAGE_URL "https://www.devsh.eu/nabla" LANGUAGES CXX C ) diff --git a/examples_tests b/examples_tests index eb7d4fe788..4ab1de2235 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit eb7d4fe788fb5e88b8b475c979586e050e202b00 +Subproject commit 4ab1de2235365833db2d089259000bec2bcce3e3 From a30f08314c92245e5c6761012da9e767fef8c912 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 12:20:05 +0700 Subject: [PATCH 253/472] fix quantized sequence mask being too small, assert conditions --- .../hlsl/sampling/quantized_sequence.hlsl | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 27588dd9e0..8ea31cbe71 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -71,7 +71,8 @@ struct decode_helper using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; + // NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<21>::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) { @@ -118,7 +119,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; @@ -161,13 +162,13 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { - assert(idx > 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { return data[idx] & Mask; @@ -182,15 +183,16 @@ struct QuantizedSequence 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { + const scalar_type trunc_val = value >> DiscardBits; data[idx] &= ~Mask; - data[idx] |= (value >> DiscardBits) & Mask; + data[idx] |= trunc_val &Mask; } else { - const scalar_type zbits = StoreBits-BitsPerComponent; + const uint16_t zbits = StoreBits-BitsPerComponent; const scalar_type zmask = (uint16_t(1u) << zbits) - uint16_t(1u); const scalar_type trunc_val = value >> (DiscardBits-1u); data[0] &= Mask; @@ -211,20 +213,20 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { - assert(idx > 0 && idx < 4); + assert(idx >= 0 && idx < 4); const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask; } void set(const uint16_t idx, const scalar_type value) { - assert(idx > 0 && idx < 4); + assert(idx >= 0 && idx < 4); const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); const uint16_t odd = idx & uint16_t(1u); data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); @@ -245,7 +247,7 @@ struct QuantizedSequence 0 && idx < 2); + assert(idx >= 0 && idx < 2); base_type a; a[0] = data[uint16_t(2u) * idx]; a[1] = data[uint16_t(2u) * idx + 1]; @@ -254,7 +256,7 @@ struct QuantizedSequence 0 && idx < 2); + assert(idx >= 0 && idx < 2); base_type a; data[uint16_t(2u) * idx] = value[0]; data[uint16_t(2u) * idx + 1] = value[1]; @@ -275,13 +277,13 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) { - assert(idx > 0 && idx < 3); + assert(idx >= 0 && idx < 3); base_type a; a[0] = data[idx]; a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask; @@ -290,7 +292,7 @@ struct QuantizedSequence 0 && idx < 3); + assert(idx >= 0 && idx < 3); data[idx] = value[0]; data[3] &= ~Mask; data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx); From 7d16cb26736dc326c03a37d7d18c8a696e41f6d0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 14:02:09 +0700 Subject: [PATCH 254/472] fixed problems from merging master --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 4 ++-- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 2 +- include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl | 5 ----- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 0c498efb79..b13abc6632 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -617,7 +617,7 @@ struct iridescent_helper NBL_UNROLL for (int m=1; m<=2; ++m) { Cm *= r123p; - Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(m)*D, hlsl::promote(m)*(phi23p+phi21p)); + Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(scalar_type(m))*D, hlsl::promote(scalar_type(m))*(phi23p+phi21p)); I += Cm*Sm; } @@ -631,7 +631,7 @@ struct iridescent_helper NBL_UNROLL for (int m=1; m<=2; ++m) { Cm *= r123s; - Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(m)*D, hlsl::promote(m) *(phi23s+phi21s)); + Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(scalar_type(m))*D, hlsl::promote(scalar_type(m)) *(phi23s+phi21s)); I += Cm*Sm; } diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 6a8476e644..1887f4b51f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -22,7 +22,7 @@ struct Promote }; template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) -struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > +struct Promote && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) > { NBL_CONSTEXPR_FUNC To operator()(const From v) { diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index 02495e2f2e..9190a4ec73 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -347,11 +347,6 @@ template [[vk::ext_instruction(spv::OpAny)]] enable_if_t&& is_same_v::scalar_type, bool>, bool> any(BooleanVector vec); -// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q) -template && (! concepts::Vector || (concepts::Vector && (extent_v == extent_v)))) -[[vk::ext_instruction(spv::OpSelect)]] -ResultType select(Condition condition, ResultType object1, ResultType object2); - template) [[vk::ext_instruction(spv::OpIAddCarry)]] AddCarryOutput addCarry(T operand1, T operand2); From 965e028a96a18e4b89fc4597504281b49093bf42 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 15:54:29 +0700 Subject: [PATCH 255/472] fix decode scramble key, shifting discard bits in quantization --- .../hlsl/sampling/quantized_sequence.hlsl | 38 +++++++++---------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 8ea31cbe71..24ca8eb66d 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -52,12 +52,11 @@ struct decode_helper using return_type = vector; NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { - random::PCG32 pcg = random::PCG32::construct(scrambleSeed); uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - seqVal[i] = val.get(i) ^ pcg(); + seqVal[i] = val.get(i) ^ scrambleKey[i]; return return_type(seqVal) * bit_cast(UNormConstant); } }; @@ -71,16 +70,13 @@ struct decode_helper using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - // NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<21>::value; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { - random::PCG32 pcg = random::PCG32::construct(scrambleSeed); - sequence_type scramble; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - scramble.set(i, pcg()); + scramble.set(i, scrambleKey[i]); scramble.data ^= val.data; uvec_type seqVal; @@ -92,9 +88,9 @@ struct decode_helper } template -vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const uint32_t scrambleSeed) +vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) { - return impl::decode_helper::__call(val, scrambleSeed); + return impl::decode_helper::__call(val, scrambleKey); } #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 @@ -162,13 +158,13 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { - assert(idx >= 0 && idx < 3); + // assert(idx >= 0 && idx < 3); if (idx < 2) { return data[idx] & Mask; @@ -176,24 +172,24 @@ struct QuantizedSequence> BitsPerComponent; - z |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); + z |= (data[1] >> BitsPerComponent) << DiscardBits; return z; } } void set(const uint16_t idx, const scalar_type value) { - assert(idx >= 0 && idx < 3); + // assert(idx >= 0 && idx < 3); if (idx < 2) { const scalar_type trunc_val = value >> DiscardBits; data[idx] &= ~Mask; - data[idx] |= trunc_val &Mask; + data[idx] |= trunc_val & Mask; } else { - const uint16_t zbits = StoreBits-BitsPerComponent; - const scalar_type zmask = (uint16_t(1u) << zbits) - uint16_t(1u); + const scalar_type zbits = scalar_type(DiscardBits); + const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); const scalar_type trunc_val = value >> (DiscardBits-1u); data[0] &= Mask; data[1] &= Mask; @@ -214,7 +210,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) @@ -278,7 +274,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) From cfd55006be6e34deb69ce891f8145c78230c7e68 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 17:03:08 +0700 Subject: [PATCH 256/472] fix z component storing too many bits in quantized sequence in vec2 data type for dim 3 --- examples_tests | 2 +- .../builtin/hlsl/sampling/quantized_sequence.hlsl | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/examples_tests b/examples_tests index 6f9bdc3b18..ea3ec9e728 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 6f9bdc3b182f8bcd2cd699a4c6d092240e5c7f9f +Subproject commit ea3ec9e7282d2911c12f261bcc404255570eb870 diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 24ca8eb66d..8929609c34 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -164,22 +164,24 @@ struct QuantizedSequence= 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { return data[idx] & Mask; } else { - scalar_type z = data[0] >> BitsPerComponent; - z |= (data[1] >> BitsPerComponent) << DiscardBits; + const scalar_type zbits = scalar_type(DiscardBits); + const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); + scalar_type z = (data[0] >> BitsPerComponent) & zmask; + z |= ((data[1] >> BitsPerComponent) & zmask) << DiscardBits; return z; } } void set(const uint16_t idx, const scalar_type value) { - // assert(idx >= 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { const scalar_type trunc_val = value >> DiscardBits; @@ -190,7 +192,7 @@ struct QuantizedSequence> (DiscardBits-1u); + const scalar_type trunc_val = value >> DiscardBits; data[0] &= Mask; data[1] &= Mask; data[0] |= (trunc_val & zmask) << BitsPerComponent; From bacdacffc71945732bbf8bc8d4b25daaede489ba Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 16:09:17 +0100 Subject: [PATCH 257/472] Updated examples_tests --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 0995b6797a..cc4f871dce 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0995b6797adc8c7bd1af9fded71098a035a04ffc +Subproject commit cc4f871dce0ccf56b54118c4e90ecf2b3107d19e From 2e820cbd145aae0b1d7306c793456ab0bd108feb Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 17:08:31 +0100 Subject: [PATCH 258/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index cc4f871dce..ab5e466db4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit cc4f871dce0ccf56b54118c4e90ecf2b3107d19e +Subproject commit ab5e466db43ff94e748bae478d0c0e28a492dfc8 From 7a0c325eea7575c76d81c1cec23ddfaa47e22755 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 17:19:21 +0100 Subject: [PATCH 259/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index ab5e466db4..c593979c42 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ab5e466db43ff94e748bae478d0c0e28a492dfc8 +Subproject commit c593979c42627b49524690ea7a7717a2d7ca5fdf From 41466f7cfada59e4c0536568b7319f6d709d211e Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 19:44:12 +0100 Subject: [PATCH 260/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c593979c42..8114cb0740 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c593979c42627b49524690ea7a7717a2d7ca5fdf +Subproject commit 8114cb0740323bbde03375c731bce34d6eeeb8d9 From 7da0bdab2f7f9c78dcef7b398f10f255b1eefb27 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 15 Dec 2025 20:15:57 +0100 Subject: [PATCH 261/472] get latest glm --- 3rdparty/glm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/glm b/3rdparty/glm index 2d4c4b4dd3..8f6213d379 160000 --- a/3rdparty/glm +++ b/3rdparty/glm @@ -1 +1 @@ -Subproject commit 2d4c4b4dd31fde06cfffad7915c2b3006402322f +Subproject commit 8f6213d379a904f5ae910e09a114e066e25faf57 From 290478d0d6dcee1ef64804954c58053f94f303e6 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 15 Dec 2025 20:28:22 +0100 Subject: [PATCH 262/472] update gli now --- 3rdparty/gli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/gli b/3rdparty/gli index c4e6446d3b..2749a197e8 160000 --- a/3rdparty/gli +++ b/3rdparty/gli @@ -1 +1 @@ -Subproject commit c4e6446d3b646538026fd5a95533daed952878d4 +Subproject commit 2749a197e88f94858f4108732824b3790064f6ec From 5b634dd927a0f1606dbfb4218202f4672dc60eeb Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 15 Dec 2025 20:32:32 +0100 Subject: [PATCH 263/472] fixed example 22 --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 4ab1de2235..4425ec1454 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4ab1de2235365833db2d089259000bec2bcce3e3 +Subproject commit 4425ec1454acd2e7771f290d7b5f08fd9dbcb07b From 778f7c104dd77b7c1660c75cca796fc9a3e9288f Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 20:32:48 +0100 Subject: [PATCH 264/472] Extended matrix --- .../nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 9 +++++++++ include/nbl/system/to_string.h | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index 1ee5edf275..712ce5e979 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -44,6 +44,15 @@ struct matrix final : private glm::mat { return glm::operator*(reinterpret_cast(rhs), lhs); } + + inline friend bool operator==(matrix const& lhs, matrix const& rhs) + { + return glm::operator==(reinterpret_cast(lhs), reinterpret_cast(rhs)); + } + inline friend bool operator!=(matrix const& lhs, matrix const& rhs) + { + return glm::operator!=(reinterpret_cast(lhs), reinterpret_cast(rhs)); + } }; #endif diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 67be417df2..2a06ace5e5 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -59,6 +59,24 @@ struct to_string_helper> } }; +template +struct to_string_helper> +{ + static std::string __call(const hlsl::matrix& matrix) + { + std::stringstream output; + output << '\n'; + for (int i = 0; i < N; ++i) + { + output << "{ "; + for (int j = 0; j < M; ++j) + output << matrix[i][j] << ", "; + output << "}\n"; + } + return output.str(); + } +}; + template struct to_string_helper> { From 53ff444984a4b55da8cbcaa2070e8ddb4c9dc079 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 10:54:43 +0700 Subject: [PATCH 265/472] mix_helper requirements include bool vectors --- include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index a5e48debbf..ad53bad2e8 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -968,8 +968,8 @@ struct mix_helper NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::BooleanScalar && !impl::MixCallingBuiltins) -struct mix_helper || concepts::Scalar) && concepts::BooleanScalar && !impl::MixCallingBuiltins) > +template NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) +struct mix_helper || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) From 5055713ae307adcb844d24dbc348448776a7d977 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 10:55:28 +0700 Subject: [PATCH 266/472] fix iridescent oriented eta bug --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index b13abc6632..954022e216 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -756,7 +756,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = base_type::ior1[0] / base_type::ior3[0]; + rcpEta.value = hlsl::promote(base_type::ior1[0] / base_type::ior3[0]); rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } From 3f3b5c991675f89d3d22f76ad99c2ade3d0b6c12 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 11:27:07 +0700 Subject: [PATCH 267/472] partitionRandVar stores leftProb, fix minor bugs --- .../builtin/hlsl/bxdf/base/cook_torrance_base.hlsl | 3 ++- .../hlsl/bxdf/transmission/smooth_dielectric.hlsl | 6 ++++-- include/nbl/builtin/hlsl/sampling/basic.hlsl | 12 +++++++----- .../hlsl/sampling/projected_spherical_triangle.hlsl | 2 +- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index 5e5e543791..c3de375678 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -304,7 +304,8 @@ struct SCookTorrance scalar_type rcpChoiceProb; scalar_type z = u.z; sampling::PartitionRandVariable partitionRandVariable; - bool transmitted = partitionRandVariable(reflectance, z, rcpChoiceProb); + partitionRandVariable.leftProb = reflectance; + bool transmitted = partitionRandVariable(z, rcpChoiceProb); const scalar_type LdotH = hlsl::mix(VdotH, ieee754::copySign(hlsl::sqrt(rcpEta.value2[0]*VdotH*VdotH + scalar_type(1.0) - rcpEta.value2[0]), -VdotH), transmitted); bool valid; diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl index 712b614755..6d5744fb49 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl @@ -41,7 +41,8 @@ struct SSmoothDielectric scalar_type rcpChoiceProb; sampling::PartitionRandVariable partitionRandVariable; - bool transmitted = partitionRandVariable(reflectance, u.z, rcpChoiceProb); + partitionRandVariable.leftProb = reflectance; + bool transmitted = partitionRandVariable(u.z, rcpChoiceProb); ray_dir_info_type V = interaction.getV(); Refract r = Refract::create(V.getDirection(), interaction.getN()); @@ -128,7 +129,8 @@ struct SThinSmoothDielectric scalar_type rcpChoiceProb; scalar_type z = u.z; sampling::PartitionRandVariable partitionRandVariable; - const bool transmitted = partitionRandVariable(reflectionProb, z, rcpChoiceProb); + partitionRandVariable.leftProb = reflectionProb; + const bool transmitted = partitionRandVariable(z, rcpChoiceProb); remainderMetadata = hlsl::mix(reflectance, hlsl::promote(1.0) - reflectance, transmitted) * rcpChoiceProb; ray_dir_info_type V = interaction.getV(); diff --git a/include/nbl/builtin/hlsl/sampling/basic.hlsl b/include/nbl/builtin/hlsl/sampling/basic.hlsl index d0738dd930..9c575a22ce 100644 --- a/include/nbl/builtin/hlsl/sampling/basic.hlsl +++ b/include/nbl/builtin/hlsl/sampling/basic.hlsl @@ -19,14 +19,14 @@ template) struct PartitionRandVariable { using floating_point_type = T; - using uint_type = typename unsigned_integer_of_size::type; + using uint_type = unsigned_integer_of_size_t; - bool operator()(floating_point_type leftProb, NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb) + bool operator()(NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb) { - const floating_point_type NEXT_ULP_AFTER_UNITY = bit_cast(bit_cast(floating_point_type(1.0)) + uint_type(1u)); - const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; + const floating_point_type NextULPAfterUnity = bit_cast(bit_cast(floating_point_type(1.0)) + uint_type(1u)); + const bool pickRight = xi >= leftProb * NextULPAfterUnity; - // This is all 100% correct taking into account the above NEXT_ULP_AFTER_UNITY + // This is all 100% correct taking into account the above NextULPAfterUnity xi -= pickRight ? leftProb : floating_point_type(0.0); rcpChoiceProb = floating_point_type(1.0) / (pickRight ? (floating_point_type(1.0) - leftProb) : leftProb); @@ -34,6 +34,8 @@ struct PartitionRandVariable return pickRight; } + + floating_point_type leftProb; }; diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index f2f29ed12b..0578af5b19 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -49,7 +49,7 @@ struct ProjectedSphericalTriangle // pre-warp according to proj solid angle approximation vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); Bilinear bilinear = Bilinear::create(patch); - u = bilinear.generate(rcpPdf, u); + u = bilinear.generate(rcpPdf, _u); // now warp the points onto a spherical triangle const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); From db454c0a3aef25044294e926504ab057f443a992 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 11:46:51 +0700 Subject: [PATCH 268/472] plain const for vector types --- include/nbl/builtin/hlsl/sampling/bilinear.hlsl | 12 ++++++------ .../builtin/hlsl/sampling/box_muller_transform.hlsl | 2 +- .../builtin/hlsl/sampling/concentric_mapping.hlsl | 2 +- .../builtin/hlsl/sampling/cos_weighted_spheres.hlsl | 10 +++++----- include/nbl/builtin/hlsl/sampling/linear.hlsl | 4 ++-- .../hlsl/sampling/projected_spherical_triangle.hlsl | 12 ++++++------ .../builtin/hlsl/sampling/spherical_rectangle.hlsl | 2 +- .../builtin/hlsl/sampling/spherical_triangle.hlsl | 8 ++++---- .../nbl/builtin/hlsl/sampling/uniform_spheres.hlsl | 4 ++-- .../nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl | 6 +++--- .../nbl/builtin/hlsl/shapes/spherical_triangle.hlsl | 6 +++--- 11 files changed, 34 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl index 746713e4c4..a74869990f 100644 --- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -24,7 +24,7 @@ struct Bilinear using vector3_type = vector; using vector4_type = vector; - static Bilinear create(NBL_CONST_REF_ARG(vector4_type) bilinearCoeffs) + static Bilinear create(const vector4_type bilinearCoeffs) { Bilinear retval; retval.bilinearCoeffs = bilinearCoeffs; @@ -32,22 +32,22 @@ struct Bilinear return retval; } - vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) _u) + vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type _u) { - vector2_type u = _u; + vector2_type u; Linear lineary = Linear::create(twiceAreasUnderXCurve); - u.y = lineary.generate(u.y); + u.y = lineary.generate(_u.y); const vector2_type ySliceEndPoints = vector2_type(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[2], u.y), nbl::hlsl::mix(bilinearCoeffs[1], bilinearCoeffs[3], u.y)); Linear linearx = Linear::create(ySliceEndPoints); - u.x = linearx.generate(u.x); + u.x = linearx.generate(_u.x); rcpPdf = (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]) / (4.0 * nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x)); return u; } - scalar_type pdf(NBL_CONST_REF_ARG(vector2_type) u) + scalar_type pdf(const vector2_type u) { return 4.0 * nbl::hlsl::mix(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[1], u.x), nbl::hlsl::mix(bilinearCoeffs[2], bilinearCoeffs[3], u.x), u.y) / (bilinearCoeffs[0] + bilinearCoeffs[1] + bilinearCoeffs[2] + bilinearCoeffs[3]); } diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index 93cea06ee0..9474642f4c 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -21,7 +21,7 @@ struct BoxMullerTransform using scalar_type = T; using vector2_type = vector; - vector2_type operator()(vector2_type xi) + vector2_type operator()(const vector2_type xi) { scalar_type sinPhi, cosPhi; math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl index 1a5c96b6df..841fc9ff2d 100644 --- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl +++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl @@ -17,7 +17,7 @@ namespace sampling { template -vector concentricMapping(vector _u) +vector concentricMapping(const vector _u) { //map [0;1]^2 to [-1;1]^2 vector u = 2.0f * _u - hlsl::promote >(1.0); diff --git a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl index 9f95bf2ee5..ddbb961300 100644 --- a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl @@ -22,26 +22,26 @@ struct ProjectedHemisphere using vector_t2 = vector; using vector_t3 = vector; - static vector_t3 generate(vector_t2 _sample) + static vector_t3 generate(const vector_t2 _sample) { vector_t2 p = concentricMapping(_sample * T(0.99999) + T(0.000005)); T z = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - p.x * p.x - p.y * p.y)); return vector_t3(p.x, p.y, z); } - static T pdf(T L_z) + static T pdf(const T L_z) { return L_z * numbers::inv_pi; } template > - static sampling::quotient_and_pdf quotient_and_pdf(T L) + static sampling::quotient_and_pdf quotient_and_pdf(const T L) { return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L)); } template > - static sampling::quotient_and_pdf quotient_and_pdf(vector_t3 L) + static sampling::quotient_and_pdf quotient_and_pdf(const vector_t3 L) { return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L.z)); } @@ -77,7 +77,7 @@ struct ProjectedSphere } template > - static sampling::quotient_and_pdf quotient_and_pdf(vector_t3 L) + static sampling::quotient_and_pdf quotient_and_pdf(const vector_t3 L) { return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L.z)); } diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl index ddd7bcf8df..6c3cf1fad9 100644 --- a/include/nbl/builtin/hlsl/sampling/linear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl @@ -21,7 +21,7 @@ struct Linear using scalar_type = T; using vector2_type = vector; - static Linear create(NBL_CONST_REF_ARG(vector2_type) linearCoeffs) // start and end importance values (start, end) + static Linear create(const vector2_type linearCoeffs) // start and end importance values (start, end) { Linear retval; retval.linearCoeffStart = linearCoeffs[0]; @@ -32,7 +32,7 @@ struct Linear return retval; } - scalar_type generate(scalar_type u) + scalar_type generate(const scalar_type u) { return hlsl::mix(u, (linearCoeffStart - hlsl::sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, hlsl::abs(rcpDiff) < numeric_limits::max); } diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index 0578af5b19..e60fe28423 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -33,17 +33,17 @@ struct ProjectedSphericalTriangle return retval; } - vector4_type computeBilinearPatch(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF) + vector4_type computeBilinearPatch(const vector3_type receiverNormal, bool isBSDF) { const scalar_type minimumProjSolidAngle = 0.0; matrix m = matrix(tri.vertex0, tri.vertex1, tri.vertex2); - const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), (vector3_type)minimumProjSolidAngle); + const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); return bxdfPdfAtVertex.yyxz; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) _u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool isBSDF, const vector2_type _u) { vector2_type u; // pre-warp according to proj solid angle approximation @@ -58,7 +58,7 @@ struct ProjectedSphericalTriangle return L; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector3_type receiverNormal, bool isBSDF, const vector2_type u) { scalar_type cos_a, cos_c, csc_b, csc_c; vector3_type cos_vertices, sin_vertices; @@ -66,7 +66,7 @@ struct ProjectedSphericalTriangle return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); } - scalar_type pdf(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L) + scalar_type pdf(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) { scalar_type pdf; const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); @@ -76,7 +76,7 @@ struct ProjectedSphericalTriangle return pdf * bilinear.pdf(u); } - scalar_type pdf(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L) + scalar_type pdf(const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) { scalar_type pdf; const vector2_type u = sphtri.generateInverse(pdf, L); diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index f5c19fb864..f9e3d2f7ae 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -32,7 +32,7 @@ struct SphericalRectangle return retval; } - vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S) + vector2_type generate(const vector2_type rectangleExtents, const vector2_type uv, NBL_REF_ARG(scalar_type) S) { const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x); const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index c31e194788..5770403cd2 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -33,7 +33,7 @@ struct SphericalTriangle } // WARNING: can and will return NAN if one or three of the triangle edges are near zero length - vector3_type generate(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector2_type) u) + vector3_type generate(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector2_type u) { scalar_type negSinSubSolidAngle,negCosSubSolidAngle; math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); @@ -66,7 +66,7 @@ struct SphericalTriangle return retval; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type u) { scalar_type cos_a, cos_c, csc_b, csc_c; vector3_type cos_vertices, sin_vertices; @@ -76,7 +76,7 @@ struct SphericalTriangle return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); } - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) L) + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type L) { pdf = 1.0 / solidAngle; @@ -102,7 +102,7 @@ struct SphericalTriangle return vector2_type(u,v); } - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(vector3_type) L) + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, const vector3_type L) { scalar_type cos_a, cos_c, csc_b, csc_c; vector3_type cos_vertices, sin_vertices; diff --git a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl index df4100db9b..5fc3bc7a0b 100644 --- a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl @@ -23,7 +23,7 @@ struct UniformHemisphere using vector_t2 = vector; using vector_t3 = vector; - static vector_t3 generate(vector_t2 _sample) + static vector_t3 generate(const vector_t2 _sample) { T z = _sample.x; T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); @@ -49,7 +49,7 @@ struct UniformSphere using vector_t2 = vector; using vector_t3 = vector; - static vector_t3 generate(vector_t2 _sample) + static vector_t3 generate(const vector_t2 _sample) { T z = T(1.0) - T(2.0) * _sample.x; T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index daeb3175c3..11442bef7c 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -25,14 +25,14 @@ struct SphericalRectangle using vector4_type = vector; using matrix3x3_type = matrix; - static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(matrix3x3_type) basis) + static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const matrix3x3_type basis) { SphericalRectangle retval; retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer); return retval; } - static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N) + static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const vector3_type T, vector3_type B, const vector3_type N) { SphericalRectangle retval; matrix3x3_type TBN = nbl::hlsl::transpose(matrix3x3_type(T, B, N)); @@ -40,7 +40,7 @@ struct SphericalRectangle return retval; } - scalar_type solidAngleOfRectangle(NBL_CONST_REF_ARG(vector) rectangleExtents) + scalar_type solidAngleOfRectangle(const vector rectangleExtents) { const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x); const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z); diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index f0b184d057..7304fa72e9 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -25,7 +25,7 @@ struct SphericalTriangle using scalar_type = T; using vector3_type = vector; - static SphericalTriangle create(NBL_CONST_REF_ARG(vector3_type) vertex0, NBL_CONST_REF_ARG(vector3_type) vertex1, NBL_CONST_REF_ARG(vector3_type) vertex2, NBL_CONST_REF_ARG(vector3_type) origin) + static SphericalTriangle create(const vector3_type vertex0, const vector3_type vertex1, const vector3_type vertex2, const vector3_type origin) { SphericalTriangle retval; retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin); @@ -72,7 +72,7 @@ struct SphericalTriangle return solidAngleOfTriangle(dummy0,dummy1,dummy2,dummy3,dummy4,dummy5); } - scalar_type projectedSolidAngleOfTriangle(NBL_CONST_REF_ARG(vector3_type) receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) + scalar_type projectedSolidAngleOfTriangle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) { if (pyramidAngles()) return 0.f; @@ -106,7 +106,7 @@ namespace util { // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. template - vector compInternalAngle(NBL_CONST_REF_ARG(vector) e0, NBL_CONST_REF_ARG(vector) e1, NBL_CONST_REF_ARG(vector) e2) + vector compInternalAngle(const vector e0, vector e1, const vector e2) { // Calculate this triangle's weight for each of its three m_vertices // start by calculating the lengths of its sides From c353ab3247444fc8fd3cc53a6e97b353c868e5a2 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 14:11:16 +0700 Subject: [PATCH 269/472] fixes to iridescent fresnel, moved getOrientedEtaRcp to dielectric fresnels only --- .../hlsl/bxdf/base/cook_torrance_base.hlsl | 2 +- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 51 ++++++++++--------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index c3de375678..d70e8823da 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -280,7 +280,7 @@ struct SCookTorrance const scalar_type NdotV = localV.z; fresnel_type _f = __getOrientedFresnel(fresnel, NdotV); - fresnel::OrientedEtaRcps rcpEta = _f.getOrientedEtaRcps(); + fresnel::OrientedEtaRcps rcpEta = _f.getRefractionOrientedEtaRcps(); const vector3_type upperHemisphereV = ieee754::flipSignIfRHSNegative(localV, hlsl::promote(NdotV)); const vector3_type localH = ndf.generateH(upperHemisphereV, u.xy); diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 954022e216..d32d3de16c 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -313,9 +313,7 @@ NBL_CONCEPT_BEGIN(2) NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) ((NBL_CONCEPT_REQ_TYPE)(T::vector_type)) - ((NBL_CONCEPT_REQ_TYPE)(T::eta_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel(cosTheta)), ::nbl::hlsl::is_same_v, typename T::vector_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps)) ); #undef cosTheta #undef fresnel @@ -331,7 +329,9 @@ NBL_CONCEPT_BEGIN(2) #define cosTheta NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(Fresnel, T)) + ((NBL_CONCEPT_REQ_TYPE)(T::eta_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEta()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getReorientedFresnel(cosTheta)), ::nbl::hlsl::is_same_v, T)) ); #undef cosTheta @@ -362,7 +362,7 @@ struct Schlick return F0 + (1.0 - F0) * x*x*x*x*x; } - OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { const eta_type sqrtF0 = hlsl::sqrt(F0); OrientedEtaRcps rcpEta; @@ -424,13 +424,13 @@ struct Conductor return (rs2 + rp2) * hlsl::promote(0.5); } - OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC - { - OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / eta; - rcpEta.value2 = rcpEta.value * rcpEta.value; - return rcpEta; - } + // OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + // { + // OrientedEtaRcps rcpEta; + // rcpEta.value = hlsl::promote(1.0) / eta; + // rcpEta.value2 = rcpEta.value * rcpEta.value; + // return rcpEta; + // } T eta; T etak2; @@ -484,7 +484,7 @@ struct Dielectric // default to monochrome, but it is possible to have RGB fresnel without dispersion fixing the refraction Eta // to be something else than the etas used to compute RGB reflectance or some sort of interpolation of them scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return orientedEta.value[0]; } - OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { return orientedEta.getReciprocals(); } + OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { return orientedEta.getReciprocals(); } Dielectric getReorientedFresnel(const scalar_type NdotI) NBL_CONST_MEMBER_FUNC { @@ -548,8 +548,6 @@ struct iridescent_helper static T __call(const vector_type _D, const vector_type ior1, const vector_type ior2, const vector_type ior3, const vector_type iork3, const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta) { - const vector_type wavelengths = vector_type(Colorspace::wavelength_R, Colorspace::wavelength_G, Colorspace::wavelength_B); - const scalar_type cosTheta_1 = clampedCosTheta; vector_type R12p, R23p, R12s, R23s; vector_type cosTheta_2; @@ -589,7 +587,6 @@ struct iridescent_helper // Optical Path Difference const vector_type D = _D * cosTheta_2; - const vector_type Dphi = hlsl::promote(2.0 * numbers::pi) * D / wavelengths; vector_type phi21p, phi21s, phi23p, phi23s, r123s, r123p, Rs; vector_type I = hlsl::promote(0.0); @@ -635,7 +632,7 @@ struct iridescent_helper I += Cm*Sm; } - return hlsl::max(colorspace::scRGB::FromXYZ(I) * hlsl::promote(0.5), hlsl::promote(0.0)); + return hlsl::max(Colorspace::FromXYZ(I) * hlsl::promote(0.5), hlsl::promote(0.0)); } }; @@ -652,6 +649,7 @@ struct iridescent_base vector_type iork3; vector_type eta12; // outside (usually air 1.0) -> thin-film IOR vector_type eta23; // thin-film -> base material IOR + vector_type eta13; }; } @@ -688,6 +686,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC - { - OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / base_type::eta23; - rcpEta.value2 = rcpEta.value * rcpEta.value; - return rcpEta; - } + // OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + // { + // OrientedEtaRcps rcpEta; + // rcpEta.value = hlsl::promote(1.0) / base_type::eta13; + // rcpEta.value2 = rcpEta.value * rcpEta.value; + // return rcpEta; + // } vector_type getEtak23() NBL_CONST_MEMBER_FUNC { @@ -743,6 +742,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta13[0]; } + OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(base_type::ior1[0] / base_type::ior3[0]); + rcpEta.value = hlsl::promote(1.0) / hlsl::promote(base_type::eta13[0]); rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } @@ -771,6 +771,7 @@ struct Iridescent(1.0)/base_type::eta23, flip); orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta12, flip); + orientedFresnel.eta13 = hlsl::mix(base_type::eta13, hlsl::promote(1.0)/base_type::eta13, flip); return orientedFresnel; } From 04f1c7637638cdefdfb3bddc3574f4298d8bd7f3 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 15:22:23 +0700 Subject: [PATCH 270/472] adjust mix_helper calling select requirements --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index ad53bad2e8..5a19a1d529 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -968,8 +968,19 @@ struct mix_helper NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) -struct mix_helper || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) > +namespace impl +{ +template +NBL_BOOL_CONCEPT MixCallingSelect = +#ifdef __HLSL_VERSION +spirv::SelectIsCallable; +#else +concepts::Boolean && (concepts::Scalar || (concepts::Vector && vector_traits::Dimension==vector_traits::Dimension)) && !MixCallingBuiltins; +#endif +} + +template NBL_PARTIAL_REQ_TOP(impl::MixCallingSelect) +struct mix_helper) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) From 94a778fef8127a4e0ad0c75ade54a1f13593b015 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 15:35:50 +0700 Subject: [PATCH 271/472] restore regular triangle stuff, refactor usage --- .../hlsl/shapes/spherical_triangle.hlsl | 23 ---------- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 46 +++++++++++++++++++ .../asset/utils/CSmoothNormalGenerator.cpp | 4 +- src/nbl/builtin/CMakeLists.txt | 1 + 4 files changed, 49 insertions(+), 25 deletions(-) create mode 100644 include/nbl/builtin/hlsl/shapes/triangle.hlsl diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index 7304fa72e9..f574b106ce 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -102,29 +102,6 @@ struct SphericalTriangle vector3_type csc_sides; }; -namespace util -{ - // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. - template - vector compInternalAngle(const vector e0, vector e1, const vector e2) - { - // Calculate this triangle's weight for each of its three m_vertices - // start by calculating the lengths of its sides - const float_t a = hlsl::dot(e0, e0); - const float_t asqrt = hlsl::sqrt(a); - const float_t b = hlsl::dot(e1, e1); - const float_t bsqrt = hlsl::sqrt(b); - const float_t c = hlsl::dot(e2, e2); - const float_t csqrt = hlsl::sqrt(c); - - const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); - const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); - const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); - // use them to find the angle at each vertex - return vector(angle0, angle1, angle2); - } -} - } } } diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl new file mode 100644 index 0000000000..d64fc9d29d --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ + +namespace util +{ + // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. + template + vector anglesFromTriangleEdge(const vector e0, vector e1, const vector e2) + { + // Calculate this triangle's weight for each of its three m_vertices + // start by calculating the lengths of its sides + const float_t a = hlsl::dot(e0, e0); + const float_t asqrt = hlsl::sqrt(a); + const float_t b = hlsl::dot(e1, e1); + const float_t bsqrt = hlsl::sqrt(b); + const float_t c = hlsl::dot(e2, e2); + const float_t csqrt = hlsl::sqrt(c); + + const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); + const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); + const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); + // use them to find the angle at each vertex + return vector(angle0, angle1, angle2); + } +} + +} +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 43413152a8..2ed1d4e19e 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -5,7 +5,7 @@ #include "CSmoothNormalGenerator.h" #include "nbl/core/declarations.h" -#include "nbl/builtin/hlsl/shapes/spherical_triangle.hlsl" +#include "nbl/builtin/hlsl/shapes/triangle.hlsl" #include @@ -58,7 +58,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const auto faceNormal = normalize(cross(v1 - v0, v2 - v0)); //set data for m_vertices - const auto angleWages = hlsl::shapes::util::compInternalAngle(v2 - v1, v0 - v2, v1 - v2); + const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdge(v2 - v1, v0 - v2, v1 - v2); vertices.add({ i, 0, faceNormal * angleWages.x, v0}); vertices.add({ i + 1, 0, faceNormal * angleWages.y,v1}); diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 343bfa31a6..cdafa522ab 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -253,6 +253,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/circle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/ellipse.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/line.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_rectangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") From 9e4a16c5118d0684cdfa82ea3b374ddb57de25af Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 16:13:14 +0700 Subject: [PATCH 272/472] minor changes to rwmc usage --- .../builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 9 +++++---- .../builtin/hlsl/rwmc/SplattingParameters.hlsl | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 593e267a26..2ab953b469 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -69,12 +69,9 @@ struct CascadeAccumulator // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp void addSample(uint32_t sampleCount, input_sample_type _sample) { - const float32_t2 unpackedParams = hlsl::unpackHalf2x16(splattingParameters.packedLog2); - const cascade_layer_scalar_type log2Start = unpackedParams[0]; - const cascade_layer_scalar_type log2Base = unpackedParams[1]; const cascade_layer_scalar_type luma = getLuma(_sample); const cascade_layer_scalar_type log2Luma = log2(luma); - const cascade_layer_scalar_type cascade = log2Luma * 1.f / log2Base - log2Start / log2Base; + const cascade_layer_scalar_type cascade = log2Luma * splattingParameters.rcpLog2Base - splattingParameters.baseRootOfStart; const cascade_layer_scalar_type clampedCascade = clamp(cascade, 0, CascadeCount - 1); // c<=0 -> 0, c>=Count-1 -> Count-1 uint32_t lowerCascadeIndex = floor(cascade); @@ -85,7 +82,11 @@ struct CascadeAccumulator // handle super bright sample case if (cascade > CascadeCount - 1) + { + const cascade_layer_scalar_type log2Base = cascade_layer_scalar_type(1.0) / splattingParameters.rcpLog2Base; + const cascade_layer_scalar_type log2Start = splattingParameters.baseRootOfStart * log2Base; lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma); + } accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index c549d83be6..df39660d95 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/tgmath.hlsl" namespace nbl { @@ -12,10 +13,18 @@ namespace rwmc struct SplattingParameters { - // float16_t log2Start; 0 - // float16_t log2Base; 1 - // pack as Half2x16 - int32_t packedLog2; + using scalar_t = float; + + static SplattingParameters create(const scalar_t base, const scalar_t start) + { + SplattingParameters retval; + retval.rcpLog2Base = scalar_t(1.0) / hlsl::log2(base); + retval.baseRootOfStart = hlsl::log2(start) * retval.rcpLog2Base; + return retval; + } + + scalar_t baseRootOfStart; + scalar_t rcpLog2Base; }; } From 62c79b40e00edcda8a3599c149198c2fb14fe88c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 16:26:12 +0700 Subject: [PATCH 273/472] remove storing texture inside local var of ResolveAccessorAdaptor --- include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl index d8f777d277..6c2b8b3230 100644 --- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -48,8 +48,6 @@ struct ResolveAccessorAdaptor using output_type = vector; NBL_CONSTEXPR int32_t image_dimension = 2; - RWTexture2DArray cascade; - float32_t calcLuma(NBL_REF_ARG(float32_t3) col) { return hlsl::dot(colorspace::scRGB::ToXYZ()[1], col); From 5075c6385b93a0ca4f29c22a36a4f3ba026d13c5 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 16:53:31 +0700 Subject: [PATCH 274/472] removed accessor, user should provide accessor that matches concept --- include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 26 ---------------------- 1 file changed, 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl index 6c2b8b3230..906cad512b 100644 --- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -41,32 +41,6 @@ NBL_CONCEPT_END( template NBL_BOOL_CONCEPT ResolveAccessor = ResolveAccessorBase && concepts::accessors::LoadableImage; -template -struct ResolveAccessorAdaptor -{ - using output_scalar_type = OutputScalar; - using output_type = vector; - NBL_CONSTEXPR int32_t image_dimension = 2; - - float32_t calcLuma(NBL_REF_ARG(float32_t3) col) - { - return hlsl::dot(colorspace::scRGB::ToXYZ()[1], col); - } - - template - output_type get(vector uv, uint16_t layer) - { - uint32_t imgWidth, imgHeight, layers; - cascade.GetDimensions(imgWidth, imgHeight, layers); - int16_t2 cascadeImageDimension = int16_t2(imgWidth, imgHeight); - - if (any(uv < int16_t2(0, 0)) || any(uv > cascadeImageDimension)) - return vector(0, 0, 0, 0); - - return cascade.Load(int32_t3(uv, int32_t(layer))); - } -}; - template && ResolveAccessor) struct Resolver { From 90007e68b7b3778f5532f4df35854283a7316515 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:02:49 +0700 Subject: [PATCH 275/472] fix formatting, name --- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 36 +++++++++---------- .../asset/utils/CSmoothNormalGenerator.cpp | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index d64fc9d29d..b2f4170f70 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -18,25 +18,25 @@ namespace shapes namespace util { - // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. - template - vector anglesFromTriangleEdge(const vector e0, vector e1, const vector e2) - { - // Calculate this triangle's weight for each of its three m_vertices - // start by calculating the lengths of its sides - const float_t a = hlsl::dot(e0, e0); - const float_t asqrt = hlsl::sqrt(a); - const float_t b = hlsl::dot(e1, e1); - const float_t bsqrt = hlsl::sqrt(b); - const float_t c = hlsl::dot(e2, e2); - const float_t csqrt = hlsl::sqrt(c); +// Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. +template +vector anglesFromTriangleEdges(const vector e0, vector e1, const vector e2) +{ + // Calculate this triangle's weight for each of its three m_vertices + // start by calculating the lengths of its sides + const float_t a = hlsl::dot(e0, e0); + const float_t asqrt = hlsl::sqrt(a); + const float_t b = hlsl::dot(e1, e1); + const float_t bsqrt = hlsl::sqrt(b); + const float_t c = hlsl::dot(e2, e2); + const float_t csqrt = hlsl::sqrt(c); - const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); - const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); - const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); - // use them to find the angle at each vertex - return vector(angle0, angle1, angle2); - } + const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); + const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); + const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); + // use them to find the angle at each vertex + return vector(angle0, angle1, angle2); +} } } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 2ed1d4e19e..f8bc45a317 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -58,7 +58,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const auto faceNormal = normalize(cross(v1 - v0, v2 - v0)); //set data for m_vertices - const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdge(v2 - v1, v0 - v2, v1 - v2); + const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdges(v2 - v1, v0 - v2, v1 - v2); vertices.add({ i, 0, faceNormal * angleWages.x, v0}); vertices.add({ i + 1, 0, faceNormal * angleWages.y,v1}); From 3d36c1392cd9830857500cda363baa7d2df83300 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:09:48 +0700 Subject: [PATCH 276/472] added more things to precompute --- include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 6 +----- include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl | 10 +++++++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 2ab953b469..1ed8884206 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -82,11 +82,7 @@ struct CascadeAccumulator // handle super bright sample case if (cascade > CascadeCount - 1) - { - const cascade_layer_scalar_type log2Base = cascade_layer_scalar_type(1.0) / splattingParameters.rcpLog2Base; - const cascade_layer_scalar_type log2Start = splattingParameters.baseRootOfStart * log2Base; - lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma); - } + lowerCascadeWeight = exp2(splattingParameters.log2Start + splattingParameters.log2Base * (CascadeCount - 1) - log2Luma); accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index df39660d95..a6c479a8e2 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -18,11 +18,15 @@ struct SplattingParameters static SplattingParameters create(const scalar_t base, const scalar_t start) { SplattingParameters retval; - retval.rcpLog2Base = scalar_t(1.0) / hlsl::log2(base); - retval.baseRootOfStart = hlsl::log2(start) * retval.rcpLog2Base; + retval.log2Base = hlsl::log2(base); + retval.log2Start = hlsl::log2(start); + retval.rcpLog2Base = scalar_t(1.0) / retval.log2Base; + retval.baseRootOfStart = retval.log2Start * retval.rcpLog2Base; return retval; } - + + scalar_t log2Base; + scalar_t log2Start; scalar_t baseRootOfStart; scalar_t rcpLog2Base; }; From 62f2c99d01166ea2e077bc73856727d5af016d43 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:27:55 +0700 Subject: [PATCH 277/472] changes to splatting params precompute for the last time --- .../nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 2 +- .../nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 1ed8884206..9413bcee98 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -82,7 +82,7 @@ struct CascadeAccumulator // handle super bright sample case if (cascade > CascadeCount - 1) - lowerCascadeWeight = exp2(splattingParameters.log2Start + splattingParameters.log2Base * (CascadeCount - 1) - log2Luma); + lowerCascadeWeight = splattingParameters.lastCascadeLuma / luma; accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index a6c479a8e2..a3a3520415 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -15,18 +15,18 @@ struct SplattingParameters { using scalar_t = float; - static SplattingParameters create(const scalar_t base, const scalar_t start) + static SplattingParameters create(const scalar_t base, const scalar_t start, const uint32_t cascadeCount) { SplattingParameters retval; - retval.log2Base = hlsl::log2(base); - retval.log2Start = hlsl::log2(start); - retval.rcpLog2Base = scalar_t(1.0) / retval.log2Base; - retval.baseRootOfStart = retval.log2Start * retval.rcpLog2Base; + const scalar_t log2Base = hlsl::log2(base); + const scalar_t log2Start = hlsl::log2(start); + retval.lastCascadeLuma = hlsl::exp2(log2Start + log2Base * (cascadeCount - 1)); + retval.rcpLog2Base = scalar_t(1.0) / log2Base; + retval.baseRootOfStart = log2Start * retval.rcpLog2Base; return retval; } - scalar_t log2Base; - scalar_t log2Start; + scalar_t lastCascadeLuma; scalar_t baseRootOfStart; scalar_t rcpLog2Base; }; From f3f60c64da31c852aea6118200c6f5c86db3d829 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:41:18 +0700 Subject: [PATCH 278/472] check that eta type of dielectric fresnels should be monochrome --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index d32d3de16c..33faa79efc 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -319,6 +319,12 @@ NBL_CONCEPT_END( #undef fresnel #include +namespace impl +{ +template +NBL_BOOL_CONCEPT VectorIsMonochrome = vector_traits::Dimension == 1; +} + #define NBL_CONCEPT_NAME TwoSidedFresnel #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) #define NBL_CONCEPT_TPLT_PRM_NAMES (T) @@ -333,6 +339,7 @@ NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEta()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getReorientedFresnel(cosTheta)), ::nbl::hlsl::is_same_v, T)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(impl::VectorIsMonochrome, typename T::eta_type)) ); #undef cosTheta #undef fresnel From b4d957d40bf3ad1967b20d65cbe929b2965eb50a Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Tue, 16 Dec 2025 13:02:12 +0100 Subject: [PATCH 279/472] Change `Compile flag error` to `Compile flag warning` so CI logs are easier to Ctrl+F --- src/nbl/asset/utils/CHLSLCompiler.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 306d2f60de..d36ecfa1cb 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -115,11 +115,11 @@ static bool fixup_spirv_target_ver(std::vector& arguments, system: const auto found = AllowedSuffices.find(suffix); if (found!=AllowedSuffices.end()) return true; - logger.log("Compile flag error: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env= found but with unsupported value `%s`.", system::ILogger::ELL_ERROR, "TODO: write wchar to char convert usage"); + logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env= found but with unsupported value `%s`.", system::ILogger::ELL_ERROR, "TODO: write wchar to char convert usage"); return false; } - logger.log("Compile flag error: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env=vulkan1.3, as it is required by Nabla.", system::ILogger::ELL_WARNING); + logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env=vulkan1.3, as it is required by Nabla.", system::ILogger::ELL_WARNING); arguments.push_back(L"-fspv-target-env=vulkan1.3"); return true; } @@ -148,7 +148,7 @@ static void try_upgrade_hlsl_version(std::vector& arguments, syste } else { - logger.log("Compile flag error: Required compile flag not found -HV. Force enabling -HV 202x, as it is required by Nabla.", system::ILogger::ELL_WARNING); + logger.log("Compile flag warning: Required compile flag not found -HV. Force enabling -HV 202x, as it is required by Nabla.", system::ILogger::ELL_WARNING); arguments.push_back(L"-HV"); arguments.push_back(L"202x"); } @@ -254,7 +254,7 @@ static void add_required_arguments_if_not_present(std::vector& arg { bool missing = set.find(required[j]) == set.end(); if (missing) { - logger.log("Compile flag error: Required compile flag not found %ls. This flag will be force enabled, as it is required by Nabla.", system::ILogger::ELL_WARNING, required[j]); + logger.log("Compile flag warning: Required compile flag not found %ls. This flag will be force enabled, as it is required by Nabla.", system::ILogger::ELL_WARNING, required[j]); arguments.push_back(required[j]); } } @@ -534,4 +534,4 @@ void CHLSLCompiler::insertIntoStart(std::string& code, std::ostringstream&& ins) code.insert(0u, ins.str()); } -#endif \ No newline at end of file +#endif From ed3352a293d0ea6d0e5106a2f9aaafecb4054aea Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 17 Dec 2025 18:38:00 +0100 Subject: [PATCH 280/472] Implemented relative approx compare --- examples_tests | 2 +- include/nbl/builtin/hlsl/ieee754.hlsl | 18 ++++ .../hlsl/testing/relative_approx_compare.hlsl | 94 +++++++++++++++++++ .../hlsl/vector_utils/vector_traits.hlsl | 21 ++--- src/nbl/builtin/CMakeLists.txt | 2 + 5 files changed, 122 insertions(+), 15 deletions(-) create mode 100644 include/nbl/builtin/hlsl/testing/relative_approx_compare.hlsl diff --git a/examples_tests b/examples_tests index 8114cb0740..2a7a800195 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8114cb0740323bbde03375c731bce34d6eeeb8d9 +Subproject commit 2a7a800195f945981ce8ade4f07c31f14925cfb5 diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 29c48a79d1..a3930a362a 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -251,6 +251,24 @@ NBL_CONSTEXPR_FUNC T flipSignIfRHSNegative(T val, T flip) return impl::flipSignIfRHSNegative_helper::__call(val, flip); } +template ) +NBL_CONSTEXPR_FUNC bool isSubnormal(T val) +{ + const uint32_t biasedExponent = extractBiasedExponent(val); + const typename unsigned_integer_of_size::type mantissa = extractMantissa(val); + return biasedExponent == 0 && mantissa != 0u; +} + +template ) +NBL_CONSTEXPR_FUNC bool isZero(T val) +{ + using traits_t = traits; + using AsUint = typename unsigned_integer_of_size::type; + + const AsUint exponentAndMantissaMask = ~traits_t::signMask; + return !(ieee754::impl::bitCastToUintType(val) & exponentAndMantissaMask); +} + } } } diff --git a/include/nbl/builtin/hlsl/testing/relative_approx_compare.hlsl b/include/nbl/builtin/hlsl/testing/relative_approx_compare.hlsl new file mode 100644 index 0000000000..8d32780f93 --- /dev/null +++ b/include/nbl/builtin/hlsl/testing/relative_approx_compare.hlsl @@ -0,0 +1,94 @@ +#ifndef _NBL_BUILTIN_HLSL_TESTING_RELATIVE_APPROX_COMPARE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TESTING_RELATIVE_APPROX_COMPARE_INCLUDED_ + +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace testing +{ +namespace impl +{ + +template +struct RelativeApproxCompareHelper; + +template +NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeScalar) +struct RelativeApproxCompareHelper) > +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPoint) lhs, NBL_CONST_REF_ARG(FloatingPoint) rhs, const float64_t maxAllowedDifference) + { + const bool bothAreNaN = nbl::hlsl::isnan(lhs) && nbl::hlsl::isnan(rhs); + const bool bothAreInf = nbl::hlsl::isinf(lhs) && nbl::hlsl::isinf(rhs); + const bool bothHaveSameSign = nbl::hlsl::ieee754::extractSign(lhs) == nbl::hlsl::ieee754::extractSign(rhs); + const bool lhsIsSubnormalOrZero = ieee754::isSubnormal(lhs) || ieee754::isZero(lhs); + const bool rhsIsSubnormalOrZero = ieee754::isSubnormal(rhs) || ieee754::isZero(rhs); + + if (bothAreNaN) + return true; + if (bothAreInf && bothHaveSameSign) + return true; + if (lhsIsSubnormalOrZero && rhsIsSubnormalOrZero) + return true; + if (!lhsIsSubnormalOrZero && rhsIsSubnormalOrZero) + return false; + if (lhsIsSubnormalOrZero && !rhsIsSubnormalOrZero) + return false; + + return hlsl::max(hlsl::abs(lhs / rhs), hlsl::abs(rhs / lhs)) <= 1.f + maxAllowedDifference; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial) +struct RelativeApproxCompareHelper) > +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPointVector) lhs, NBL_CONST_REF_ARG(FloatingPointVector) rhs, const float64_t maxAllowedDifference) + { + using traits = nbl::hlsl::vector_traits; + for (uint32_t i = 0; i < traits::Dimension; ++i) + { + if (!RelativeApproxCompareHelper::__call(lhs[i], rhs[i], maxAllowedDifference)) + return false; + } + + return true; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Matricial && concepts::FloatingPointLikeScalar::scalar_type>) +struct RelativeApproxCompareHelper && concepts::FloatingPointLikeScalar::scalar_type>) > +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPointMatrix) lhs, NBL_CONST_REF_ARG(FloatingPointMatrix) rhs, const float64_t maxAllowedDifference) + { + using traits = nbl::hlsl::matrix_traits; + for (uint32_t i = 0; i < traits::RowCount; ++i) + { + if (!RelativeApproxCompareHelper::__call(lhs[i], rhs[i], maxAllowedDifference)) + return false; + } + + return true; + } +}; + +} + +template +bool relativeApproxCompare(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs, const float64_t maxAllowedDifference) +{ + return impl::RelativeApproxCompareHelper::__call(lhs, rhs, maxAllowedDifference); +} + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/vector_utils/vector_traits.hlsl b/include/nbl/builtin/hlsl/vector_utils/vector_traits.hlsl index 652cabd7c7..95315f6e3c 100644 --- a/include/nbl/builtin/hlsl/vector_utils/vector_traits.hlsl +++ b/include/nbl/builtin/hlsl/vector_utils/vector_traits.hlsl @@ -18,20 +18,13 @@ struct vector_traits NBL_CONSTEXPR_STATIC_INLINE bool IsVector = false; }; -// i choose to implement it this way because of this DXC bug: https://github.com/microsoft/DirectXShaderCom0piler/issues/7007 -#define DEFINE_VECTOR_TRAITS_TEMPLATE_SPECIALIZATION(DIMENSION)\ -template \ -struct vector_traits >\ -{\ - using scalar_type = T;\ - NBL_CONSTEXPR_STATIC_INLINE uint32_t Dimension = DIMENSION;\ - NBL_CONSTEXPR_STATIC_INLINE bool IsVector = true;\ -};\ - -DEFINE_VECTOR_TRAITS_TEMPLATE_SPECIALIZATION(1) -DEFINE_VECTOR_TRAITS_TEMPLATE_SPECIALIZATION(2) -DEFINE_VECTOR_TRAITS_TEMPLATE_SPECIALIZATION(3) -DEFINE_VECTOR_TRAITS_TEMPLATE_SPECIALIZATION(4) +template +struct vector_traits > +{ + using scalar_type = T; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Dimension = N; + NBL_CONSTEXPR_STATIC_INLINE bool IsVector = true; +}; } } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 75cb681d36..6549a2b691 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -357,5 +357,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") #morton codes LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") +#testing +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/relative_approx_compare.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From 669bdb25e57b1995120a21bf02e9c575a449bf62 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 17 Dec 2025 19:44:53 +0100 Subject: [PATCH 281/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 2a7a800195..ab4ae7d2ac 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2a7a800195f945981ce8ade4f07c31f14925cfb5 +Subproject commit ab4ae7d2ac92030437477e3172866804587b6c14 From e714c2469357633bd17a26b693e9157c94116dd8 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 17 Dec 2025 22:25:41 +0300 Subject: [PATCH 282/472] RandomSampler can give floats now, ranged and [0, 1), also update examples submodule --- examples_tests | 2 +- include/nbl/core/sampling/RandomSampler.h | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/examples_tests b/examples_tests index a35eddd1bd..1c6458d81b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit a35eddd1bd83fbf636e820b59c6eef939ed09668 +Subproject commit 1c6458d81b83aea176ac7ebda7450a9b395a85bd diff --git a/include/nbl/core/sampling/RandomSampler.h b/include/nbl/core/sampling/RandomSampler.h index 39832dc8f1..b692ef5e08 100644 --- a/include/nbl/core/sampling/RandomSampler.h +++ b/include/nbl/core/sampling/RandomSampler.h @@ -11,8 +11,8 @@ namespace nbl::core { -class RandomSampler -{ + class RandomSampler + { public: RandomSampler(uint32_t _seed) { @@ -25,9 +25,24 @@ class RandomSampler return mersenneTwister(); } + // Returns a float in [0, 1) + inline float nextFloat() + { + // 1 / 2^32 + constexpr float norm = 1.0f / 4294967296.0f; + return mersenneTwister() * norm; + } + + // Returns a float in [min, max) + inline float nextFloat(float min, float max) + { + constexpr float norm = 1.0f / 4294967296.0f; + return min + (mersenneTwister() * norm) * (max - min); + } + protected: std::mt19937 mersenneTwister; -}; + }; } From 6741c756172abb1e8095e9c153cecc3207622313 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Thu, 18 Dec 2025 01:11:29 +0300 Subject: [PATCH 283/472] update examples submodules --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 1c6458d81b..2e306fc96b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1c6458d81b83aea176ac7ebda7450a9b395a85bd +Subproject commit 2e306fc96bfae85a9669ad552751cece33d1b383 From 92545a557f6231d8a84275e75228f235ea7b4e41 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Thu, 18 Dec 2025 02:25:03 +0300 Subject: [PATCH 284/472] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 2e306fc96b..12486d4670 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2e306fc96bfae85a9669ad552751cece33d1b383 +Subproject commit 12486d4670f0453722351814996d91f198a16749 From 5aee002beb5181fe3a92a32abfcf0934e937ec5f Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 18 Dec 2025 11:25:49 +0700 Subject: [PATCH 285/472] roll constructor params into own struct, fix assert in validation --- include/nbl/ext/DebugDraw/CDrawAABB.h | 48 ++++++++++++++++++--------- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 29 ++++++---------- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index e7af675324..22a7a1e595 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -40,10 +40,9 @@ namespace nbl::ext::debug_draw core::smart_refctd_ptr streamingBuffer = nullptr; }; - // only used to make the 24 element index buffer and instanced pipeline on create struct SCreationParameters : SCachedCreationParameters { - video::IQueue* transfer = nullptr; + video::IQueue* transfer = nullptr; // only used to make the 24 element index buffer and instanced pipeline on create core::smart_refctd_ptr assetManager = nullptr; core::smart_refctd_ptr singlePipelineLayout; @@ -52,18 +51,24 @@ namespace nbl::ext::debug_draw inline bool validate() const { - assert(bool(assetManager)); - assert(bool(assetManager->getSystem())); - assert(bool(utilities)); - assert(bool(transfer)); - assert(bool(renderpass)); + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), + std::make_pair(bool(renderpass), "Invalid `creationParams.renderpass` is nullptr!"), + std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") + }); system::logger_opt_ptr logger = utilities->getLogger(); - if (!bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT))) - { - logger.log("Invalid `creationParams.transfer` is not capable of transfer operations!", system::ILogger::ELL_ERROR); - return false; - } + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + assert(bool(assetManager->getSystem())); return true; } @@ -168,12 +173,23 @@ namespace nbl::ext::debug_draw static hlsl::float32_t3x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); protected: - DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, - core::smart_refctd_ptr indicesBuffer); - ~DrawAABB() override; + struct ConstructorParams + { + SCachedCreationParameters creationParams; + core::smart_refctd_ptr singlePipeline = nullptr; + core::smart_refctd_ptr batchPipeline = nullptr; + core::smart_refctd_ptr indicesBuffer = nullptr; + }; + + DrawAABB(ConstructorParams&& params) : + m_cachedCreationParams(std::move(params.creationParams)), + m_singlePipeline(std::move(params.singlePipeline)), + m_batchPipeline(std::move(params.batchPipeline)), + m_indicesBuffer(std::move(params.indicesBuffer)) + {} + ~DrawAABB() override {} private: - //static bool validateCreationParameters(SCreationParameters& params); static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index fa07d9805f..d2bf60849b 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -28,28 +28,28 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) return nullptr; } - smart_refctd_ptr singlePipeline = nullptr; + ConstructorParams constructorParams; + if (params.drawMode & ADM_DRAW_SINGLE) { auto pipelineLayout = params.singlePipelineLayout; if (!pipelineLayout) pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_SINGLE); - singlePipeline = createPipeline(params, pipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); - if (!singlePipeline) + constructorParams.singlePipeline = createPipeline(params, pipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!constructorParams.singlePipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); return nullptr; } } - smart_refctd_ptr batchPipeline = nullptr; if (params.drawMode & ADM_DRAW_BATCH) { auto pipelineLayout = params.batchPipelineLayout; if (!pipelineLayout) pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_BATCH); - batchPipeline = createPipeline(params, pipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); - if (!batchPipeline) + constructorParams.batchPipeline = createPipeline(params, pipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!constructorParams.batchPipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); return nullptr; @@ -62,24 +62,15 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) return nullptr; } - auto indicesBuffer = createIndicesBuffer(params); - if (!indicesBuffer) + constructorParams.indicesBuffer = createIndicesBuffer(params); + if (!constructorParams.indicesBuffer) { logger->log("Failed to create indices buffer!", ILogger::ELL_ERROR); return nullptr; } - return core::smart_refctd_ptr(new DrawAABB(std::move(params), singlePipeline, batchPipeline, indicesBuffer)); -} - -DrawAABB::DrawAABB(SCreationParameters&& params, core::smart_refctd_ptr singlePipeline, smart_refctd_ptr batchPipeline, smart_refctd_ptr indicesBuffer) - : m_cachedCreationParams(std::move(params)), m_singlePipeline(std::move(singlePipeline)), m_batchPipeline(std::move(batchPipeline)), - m_indicesBuffer(std::move(indicesBuffer)) -{ -} - -DrawAABB::~DrawAABB() -{ + constructorParams.creationParams = std::move(params); + return core::smart_refctd_ptr(new DrawAABB(std::move(constructorParams))); } // note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly From a1bd02605f505068c97646fb8eff10c67cf86901 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 18 Dec 2025 13:56:45 +0700 Subject: [PATCH 286/472] adds a check against double mounting same archive --- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index d2bf60849b..c32c591410 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -83,6 +83,12 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l if (!system) return nullptr; + if (system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) + { + logger->log("CDrawAABB directory is already mounted!", ILogger::ELL_WARNING); + return nullptr; + } + // extension should mount everything for you, regardless if content goes from virtual filesystem // or disk directly - and you should never rely on application framework to expose extension data @@ -135,6 +141,12 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet return params.utilities->getLogicalDevice()->compileShader({ shaderSrc.get() }); }; + if (!system->areBuiltinsMounted()) + { + params.utilities->getLogger()->log("Nabla builtins are not mounted!", ILogger::ELL_ERROR); + return nullptr; + } + if (!system->exists(path(NBL_ARCHIVE_ENTRY) / "common.hlsl", {})) mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); From c5f3f8922a0e1006a0291d8190a7dd00422b5afa Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 18 Dec 2025 15:13:13 +0100 Subject: [PATCH 287/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index ab4ae7d2ac..ec1f5a5a6f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ab4ae7d2ac92030437477e3172866804587b6c14 +Subproject commit ec1f5a5a6f805c5213499d6611a7e7785ee60aaf From 32de44d2d31f0ee80292a255b3df5bd824f218f2 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 18 Dec 2025 16:23:48 +0100 Subject: [PATCH 288/472] Create docs for NSC prebuilds --- docs/nsc-prebuilds.md | 386 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 386 insertions(+) create mode 100644 docs/nsc-prebuilds.md diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md new file mode 100644 index 0000000000..4d57d7a8de --- /dev/null +++ b/docs/nsc-prebuilds.md @@ -0,0 +1,386 @@ +# NSC prebuilds (build-time HLSL -> SPIR-V) + +This document explains how to use `NBL_CREATE_NSC_COMPILE_RULES` together with `NBL_CREATE_RESOURCE_ARCHIVE` to: + +- Compile HLSL to SPIR-V at **build time** (via the `nsc` tool). +- Optionally generate **device-cap permutations** (limits/features "CAPS"). +- Generate a small C++ header with **type-safe key getters** (`get_spirv_key<...>()`). +- Make the same code work with `NBL_EMBED_BUILTIN_RESOURCES` **ON** (embedded virtual archive) and **OFF** (mounted build directory) when loading your precompiled SPIR-V at runtime. + +Definitions live in `cmake/common.cmake` (`NBL_CREATE_NSC_COMPILE_RULES`, `NBL_CREATE_RESOURCE_ARCHIVE`). + +## Runtime mounting requirement (important) + +All of this assumes your app mounts the directory/archive containing the NSC outputs (i.e. `BINARY_DIR`) into Nabla's virtual filesystem, then loads files via keys that are relative to that mounted root (the examples use `app_resources`). + +The examples "just work" because they inherit from `nbl::examples::BuiltinResourcesApplication`, which mounts: + +- `NBL_EMBED_BUILTIN_RESOURCES=OFF`: `system::CMountDirectoryArchive(NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT, ...)` at `app_resources` +- `NBL_EMBED_BUILTIN_RESOURCES=ON`: the generated embedded archive (e.g. `nbl::this_example::builtin::build::CArchive`) at `app_resources` + +If you're writing your own app/extension and don't use `BuiltinResourcesApplication`, you must mount equivalently yourself (split by `NBL_EMBED_BUILTIN_RESOURCES`). Optionally set `IAssetLoader::SAssetLoadParams::workingDirectory` to whatever virtual root you want to load from. + +The `MOUNT_POINT_DEFINE` argument of `NBL_CREATE_NSC_COMPILE_RULES` defines a C/C++ macro whose value is the absolute path to the NSC output directory (`BINARY_DIR`) that you mount when builtins are off (in examples it's `NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT`). + +See `examples_tests/common/include/nbl/examples/common/BuiltinResourcesApplication.hpp` for the exact mounting logic. + +## Why build-time NSC instead of runtime compilation? + +Build-time compilation is usually preferable because it: + +- Uses your build system's parallelism (Ninja/MSBuild jobs) to compile shaders quickly. +- Writes **only into the build tree** (no source tree pollution, easy clean/reconfigure). +- Lets CI validate "shaders compile" as part of a normal build. +- Enables fast runtime iteration: at runtime you only **pick** the right SPIR-V, you don't compile it. +- Makes shader compilation deterministic and reproducible (toolchain + flags captured by the build). + +Runtime compilation is still useful for prototyping, but (assuming you don't use a runtime shader cache) it can make startup slower and shift failures to runtime instead of CI/build (a cache can hide the repeated cost on subsequent runs; our current one has some rough edges: it writes into the source tree and has issues when compiling many inputs from the same source directory). + +## What `NBL_CREATE_NSC_COMPILE_RULES` produces + +For each registered input it generates: + +- One `.spv` output **per CMake configuration** (`Debug/`, `Release/`, `RelWithDebInfo/`). +- If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each. +- A generated header (you choose the path via `INCLUDE`) containing: + - a primary template `get_spirv_key(limits, features)` and `get_spirv_key(device)` + - explicit specializations for each registered base `KEY` + - the returned key already includes the build config prefix (compiled into the header). + +Keys are strings that match the output layout: + +``` +/(._)(._)....spv +``` + +## The JSON "INPUTS" format + +`INPUTS` is a JSON array of objects. Each object supports: + +- `INPUT` (string, required): path to `.hlsl` (relative to `CMAKE_CURRENT_SOURCE_DIR` or absolute). +- `KEY` (string, required): base key (prefer without `.spv`; it is always appended, so using `foo.spv` will result in `foo.spv.spv`). +- `COMPILE_OPTIONS` (array of strings, optional): per-input extra options (e.g. `["-T","cs_6_8"]`). +- `DEPENDS` (array of strings, optional): per-input dependencies (extra files that should trigger rebuild). +- `CAPS` (array, optional): permutation caps (see below). + +You can register many rules in a single call, and you can call the function multiple times to append rules to the same `TARGET`. + +## Compile options (generator expressions, defaults, debug info) + +`NBL_CREATE_NSC_COMPILE_RULES` combines options from multiple sources: + +- Built-in defaults from the helper (see `cmake/common.cmake`): HLSL version, Vulkan SPIR-V target env, scalar layout, warnings, and per-config optimization flags (e.g. `-O0` for Debug, `-O3` for Release) implemented via CMake generator expressions. +- Global extra options via `COMMON_OPTIONS` (CMake list). +- Per-input extra options via JSON `COMPILE_OPTIONS` (array of strings). + +Both `COMMON_OPTIONS` and JSON `COMPILE_OPTIONS` support CMake generator expressions like `$<$:...>` (the helper uses them itself), so you can make flags configuration-dependent when needed. + +### Debug info for RenderDoc + +The helper also exposes CMake options that append NSC debug flags **only for Debug config** (via generator expressions). Enable them if you want RenderDoc to show source/line information instead of just raw disassembly: + +- `NSC_DEBUG_EDIF_FILE_BIT` (default `ON`) -> `-fspv-debug=file` +- `NSC_DEBUG_EDIF_TOOL_BIT` (default `ON`) -> `-fspv-debug=tool` +- `NSC_DEBUG_EDIF_SOURCE_BIT` (default `OFF`) -> `-fspv-debug=source` +- `NSC_DEBUG_EDIF_LINE_BIT` (default `OFF`) -> `-fspv-debug=line` +- `NSC_DEBUG_EDIF_NON_SEMANTIC_BIT` (default `OFF`) -> `-fspv-debug=vulkan-with-source` + +## Source files and rebuild dependencies (important) + +Make sure shader inputs and includes are: + +1. Marked as header-only on your target (so the IDE shows them, but the build system doesn't try to compile them with default HLSL rules like `fxc`): + +```cmake +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) +``` + +2. Listed as dependencies of the NSC custom commands (so editing any of them triggers a rebuild of the `.spv` outputs). + +This is what the `DEPENDS` argument of `NBL_CREATE_NSC_COMPILE_RULES` (and/or per-input JSON `DEPENDS`) is for. Always include the main `INPUT` file itself and any files it includes; otherwise the build system might not re-run `nsc` when you change them. + +## Minimal usage (no permutations) + +Example pattern (as in `examples_tests/27_MPMCScheduler/CMakeLists.txt`): + +```cmake +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/shader.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.comp.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "cs_6_8"], + "DEPENDS": [], + "CAPS": [] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) +``` + +Then include the generated header and use the key to load the SPIR-V: + +```cpp +#include "nbl/this_example/builtin/build/spirv/keys.hpp" +// ... +auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(device); +auto bundle = assetMgr->getAsset(key.c_str(), loadParams); +``` + +`OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). This list is intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`. + +## Permutations via `CAPS` + +`CAPS` lets you prebuild multiple SPIR-V variants parameterized by device limits or features. + +Each `CAPS` entry looks like: + +- `kind` (string, optional): `"limits"` or `"features"` (defaults to `"limits"` if omitted/invalid). +- `name` (string, required): identifier used in both generated HLSL config and C++ key (must be a valid C/C++ identifier). +- `type` (string, required): `bool`, `uint16_t`, `uint32_t`, `uint64_t`. +- `values` (array of numbers, required): the values you want to prebuild. + - for `bool`, values must be `0` or `1`. + +At build time, NSC compiles each combination of values (cartesian product). At runtime, `get_spirv_key` appends suffixes using the `limits`/`features` you pass in. + +### Example: mixing `limits` and `features` + +This example permutes over one device limit and one device feature (order matters: the suffix order matches the `CAPS` array order): + +```cmake +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "lib_6_8"], + "DEPENDS": ["app_resources/common.hlsl"], + "CAPS": [ + { + "kind": "limits", + "name": "maxComputeSharedMemorySize", + "type": "uint32_t", + "values": [16384, 32768, 65536] + }, + { + "kind": "features", + "name": "shaderFloat64", + "type": "bool", + "values": [0, 1] + } + ] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + # ... + OUTPUT_VAR KEYS + INPUTS ${JSON} +) +``` + +This produces `3 * 2 = 6` permutations per build configuration, and `KEYS` contains all of them (for example): + +``` +Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_0.spv +Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_1.spv +... +``` + +Practical tip: for numeric limits you often want to "bucket" real device values into one of the prebuilt values. The CountingSort example does exactly that: + +- CMake definition: `examples_tests/10_CountingSort/CMakeLists.txt` +- Runtime bucketing: `examples_tests/10_CountingSort/main.cpp` + +```cpp +auto limits = m_physicalDevice->getLimits(); +constexpr std::array AllowedMaxComputeSharedMemorySizes = { 16384, 32768, 65536 }; + +auto upperBoundSharedMemSize = std::upper_bound( + AllowedMaxComputeSharedMemorySizes.begin(), AllowedMaxComputeSharedMemorySizes.end(), limits.maxComputeSharedMemorySize +); +// devices which support less than 16KB of max compute shared memory size are not supported +if (upperBoundSharedMemSize == AllowedMaxComputeSharedMemorySizes.begin()) +{ + m_logger->log("maxComputeSharedMemorySize is too low (%u)", ILogger::E_LOG_LEVEL::ELL_ERROR, limits.maxComputeSharedMemorySize); + exit(0); +} + +limits.maxComputeSharedMemorySize = *(upperBoundSharedMemSize - 1); + +auto key = nbl::this_example::builtin::build::get_spirv_key<"prefix_sum_shader">(limits, m_physicalDevice->getFeatures()); +``` + +## Pairing with `NBL_CREATE_RESOURCE_ARCHIVE` (works with builtins ON/OFF) + +The recommended pattern is to always call `NBL_CREATE_RESOURCE_ARCHIVE` right after the NSC rules, using the produced `KEYS` list: + +```cmake +NBL_CREATE_RESOURCE_ARCHIVE( + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + NAMESPACE nbl::this_example::builtin::build +) +``` + +### How `BINARY_DIR`, `MOUNT_POINT_DEFINE`, and `BIND` fit together + +- In `NBL_CREATE_NSC_COMPILE_RULES`, `BINARY_DIR` is the output directory where NSC writes the compiled files: + - `${BINARY_DIR}//....spv` +- In `NBL_CREATE_NSC_COMPILE_RULES`, `MOUNT_POINT_DEFINE` is the *name* of a C/C++ preprocessor define whose value is set to the **absolute path** of `BINARY_DIR`. + - Example: `MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT` results in something like `-DNBL_THIS_EXAMPLE_BUILD_MOUNT_POINT="C:/.../auto-gen"` on the target. + - Keys returned by `get_spirv_key<...>()` are relative to that directory; the full path on disk is: + - `${NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT}/` +- In `NBL_CREATE_RESOURCE_ARCHIVE`, `BIND` should point at the same directory as `BINARY_DIR`. + - The `BUILTINS` list entries must be relative to `BIND`. + - This is why pairing it with `OUTPUT_VAR KEYS` works: `KEYS` is exactly the list of relative paths under `BINARY_DIR` that were generated by the NSC rules, so the archive generator knows what to serialize/embed. + +This is designed to work in both modes: + +- `NBL_EMBED_BUILTIN_RESOURCES=OFF`: + - `NBL_CREATE_RESOURCE_ARCHIVE` becomes a no-op (creates a dummy interface target). + - You load SPIR-V from the **build directory** mounted into the virtual filesystem. + - `MOUNT_POINT_DEFINE` provides an absolute path (e.g. `NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT`) for mounting. +- `NBL_EMBED_BUILTIN_RESOURCES=ON`: + - `NBL_CREATE_RESOURCE_ARCHIVE` generates a small library that embeds the listed files into a virtual archive and emits `.../CArchive.h` under the requested `NAMESPACE`. + - You mount the embedded archive instead of a directory; runtime loading code stays the same (keys don't change). + +## Notes / gotchas + +- `INCLUDE` must be a **relative** path (it is emitted under the build tree and added to include dirs automatically). +- Prefer not to include `.spv` in `KEY` (the extension is appended unconditionally); if you do, you'll just get `.spv.spv` in the final filename/key (not an error, just not what you want). +- You can mix: + - per-input `COMPILE_OPTIONS` (inside JSON), and + - global `COMMON_OPTIONS` (CMake list after `COMMON_OPTIONS`). + +## Troubleshooting (no logs / silent NSC failures) + +Sometimes an NSC compile rule fails during the build, but the build output doesn't show a useful log. In that case, run the failing command under a debugger: + +1. Open the generated Visual Studio solution and set the `nsc` project/target as the Startup Project. +2. Open the `nsc` project properties and set **Debugging -> Command Arguments**. +3. Copy the exact CLI from the failing "NSC Rules" custom command (the one that calls `nsc.exe`) into the Command Arguments field. +4. Start debugging (`F5`) and reproduce; if needed, put a breakpoint in the HLSL compiler/preprocessor codepath and step until you find the root cause. + +If the error looks like a preprocessing issue, note that we use Boost.Wave as the preprocessor; it can have quirky edge cases (e.g. needing a trailing newline/whitespace at the end of a file for correct parsing). + +## Best practices + +- Prefer compiling to a shader library (`-T lib_6_x`) and using multiple entry points when possible: fewer inputs means fewer compile rules and less build overhead; at runtime you still choose the entry point from the same `.spv`. +- Treat `CAPS` as a build-time cost multiplier (cartesian product). If the permutation count gets too large (thousands+), prebuilding usually stops paying off; an example of such workload is `examples_tests/23_Arithmetic2UnitTest`. + +## Complete example (expand) + +
+NSC rules + archive + runtime key usage + +### CMake (`CMakeLists.txt`) + +```cmake +include(common) + +nbl_create_executable_project("" "" "" "") + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/shader.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "lib_6_8"], + "DEPENDS": [], + "CAPS": [ + { + "kind": "limits", + "name": "maxComputeSharedMemorySize", + "type": "uint32_t", + "values": [16384, 32768, 65536] + }, + { + "kind": "features", + "name": "shaderFloat64", + "type": "bool", + "values": [0, 1] + } + ] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +# Works for both NBL_EMBED_BUILTIN_RESOURCES=ON/OFF +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) +``` + +### Runtime usage (C++) + +```cpp +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +// Load relative to the VFS mount (examples mount it at "app_resources") +asset::IAssetLoader::SAssetLoadParams lp = {}; +lp.workingDirectory = "app_resources"; + +auto limits = device->getPhysicalDevice()->getLimits(); +limits.maxComputeSharedMemorySize = 32768; // one of the prebuilt values; real code should bucket/clamp with std::upper_bound (see the CountingSort snippet above) + +auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(limits, device->getEnabledFeatures()); +auto bundle = assetMgr->getAsset(key.c_str(), lp); +const auto assets = bundle.getContents(); +auto spvShader = asset::IAsset::castDown(assets[0]); + +// params.shader.shader = spvShader.get(); + +// If you compiled with `-T lib_6_x`, pick the entry point at pipeline creation time (e.g. `params.shader.entryPoint = "main";`). +``` + +
From fcee6ed616f720bb717387063e23c7e92009cddf Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 19 Dec 2025 10:41:55 +0700 Subject: [PATCH 289/472] return false if the streaming buffer is too small --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 22a7a1e595..99f0fa9223 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -137,6 +137,8 @@ namespace nbl::ext::debug_draw const uint32_t numInstances = aabbInstances.size(); const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); + if (numInstances > instancesPerIter) + return false; using suballocator_t = core::LinearAddressAllocatorST; uint32_t beginOffset = 0; while (beginOffset < numInstances) From 25370497736a02f5b175f34b38b91930ad3a7eba Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 19 Dec 2025 18:15:57 +0700 Subject: [PATCH 290/472] Remove duplicate partial specialization for truncate and emulated_vec --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 4eb8b7bf06..25b033c30e 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -587,53 +587,6 @@ NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) #undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST -#define NBL_EMULATED_VEC_PROMOTION(N) template\ -struct Promote, ComponentType>\ -{\ - using VecType = emulated_vector_t##N ;\ - NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\ - {\ - array_set setter;\ - VecType promoted;\ - [[unroll]]\ - for (int i = 0; i < N; ++i)\ - setter(promoted, i, v);\ - return promoted;\ - }\ -}; - -NBL_EMULATED_VEC_PROMOTION(2) -NBL_EMULATED_VEC_PROMOTION(3) -NBL_EMULATED_VEC_PROMOTION(4) - -#undef NBL_EMULATED_VEC_PROMOTION - -#define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ -struct Truncate, emulated_vector_t##M >\ -{\ - using OutputVecType = emulated_vector_t##N ;\ - using InputVecType = emulated_vector_t##M ;\ - NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\ - {\ - array_get getter;\ - array_set setter;\ - OutputVecType output;\ - [[unroll]]\ - for (int i = 0; i < N; ++i)\ - setter(output, i, getter(vec, i));\ - return output;\ - }\ -}; - -NBL_EMULATED_VEC_TRUNCATION(2, 2) -NBL_EMULATED_VEC_TRUNCATION(2, 3) -NBL_EMULATED_VEC_TRUNCATION(2, 4) -NBL_EMULATED_VEC_TRUNCATION(3, 3) -NBL_EMULATED_VEC_TRUNCATION(3, 4) -NBL_EMULATED_VEC_TRUNCATION(4, 4) - -#undef NBL_EMULATED_VEC_TRUNCATION - } //namespace impl } From 34af0ecc75d8461a86f0acc3a3e1f1f105c39347 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 19 Dec 2025 12:43:16 +0100 Subject: [PATCH 291/472] Fixed bug in CStdoutLogger --- include/nbl/system/CStdoutLogger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/system/CStdoutLogger.h b/include/nbl/system/CStdoutLogger.h index 24693edd61..a63b8cf567 100644 --- a/include/nbl/system/CStdoutLogger.h +++ b/include/nbl/system/CStdoutLogger.h @@ -15,7 +15,7 @@ class CStdoutLogger : public IThreadsafeLogger protected: virtual void threadsafeLog_impl(const std::string_view& fmt, E_LOG_LEVEL logLevel, va_list args) override { - printf(constructLogString(fmt, logLevel, args).data()); + printf("%s", constructLogString(fmt, logLevel, args).data()); fflush(stdout); } From cd7197ae49d5a149042c394b3494ef6bb744a0f1 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 19 Dec 2025 13:52:37 +0100 Subject: [PATCH 292/472] Every ILogger::log call now uses string literal as its first argument --- examples_tests | 2 +- src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp | 2 +- src/nbl/asset/interchange/CImageLoaderJPG.cpp | 2 +- src/nbl/asset/utils/CSPIRVIntrospector.cpp | 2 +- src/nbl/system/CColoredStdoutLoggerWin32.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples_tests b/examples_tests index ec1f5a5a6f..04627c7bb7 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ec1f5a5a6f805c5213499d6611a7e7785ee60aaf +Subproject commit 04627c7bb708cfed00ddb6de3f289a37bd7a1ff1 diff --git a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp index d4b9a3e394..b538f75eb3 100644 --- a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp +++ b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp @@ -894,7 +894,7 @@ auto CGraphicsPipelineLoaderMTL::readMaterials(system::IFile* _file, const syste case 'f': // Tf - Transmitivity currMaterial->params.transmissionFilter = readRGB(); sprintf(tmpbuf, "%s, %s: Detected Tf parameter, it won't be used in generated shader - fallback to alpha=0.5 instead", _file->getFileName().string().c_str(), currMaterial->name.c_str()); - logger.log(tmpbuf, system::ILogger::ELL_WARNING); + logger.log("%s", system::ILogger::ELL_WARNING, tmpbuf); break; case 'r': // Tr, transparency = 1.0-d currMaterial->params.opacity = (1.f - readFloat()); diff --git a/src/nbl/asset/interchange/CImageLoaderJPG.cpp b/src/nbl/asset/interchange/CImageLoaderJPG.cpp index 45677ff5cf..1db5e16ac2 100644 --- a/src/nbl/asset/interchange/CImageLoaderJPG.cpp +++ b/src/nbl/asset/interchange/CImageLoaderJPG.cpp @@ -93,7 +93,7 @@ namespace jpeg std::string errMsg("JPEG FATAL ERROR in "); auto ctx = reinterpret_cast(cinfo->client_data); errMsg += ctx->filename; - ctx->logger.log(errMsg + temp1, system::ILogger::ELL_ERROR); + ctx->logger.log("%s", system::ILogger::ELL_ERROR, errMsg + temp1); } /* Initialize source. This is called by jpeg_read_header() before any diff --git a/src/nbl/asset/utils/CSPIRVIntrospector.cpp b/src/nbl/asset/utils/CSPIRVIntrospector.cpp index 4ac78066a7..818fbc584b 100644 --- a/src/nbl/asset/utils/CSPIRVIntrospector.cpp +++ b/src/nbl/asset/utils/CSPIRVIntrospector.cpp @@ -1054,7 +1054,7 @@ void CSPIRVIntrospector::CStageIntrospectionData::debugPrint(system::ILogger* lo } } - logger->log(debug.str() + '\n'); + logger->log("%s", system::ILogger::ELL_DEBUG, debug.str() + '\n'); } } \ No newline at end of file diff --git a/src/nbl/system/CColoredStdoutLoggerWin32.cpp b/src/nbl/system/CColoredStdoutLoggerWin32.cpp index e664ae84bc..f2690a81b4 100644 --- a/src/nbl/system/CColoredStdoutLoggerWin32.cpp +++ b/src/nbl/system/CColoredStdoutLoggerWin32.cpp @@ -15,7 +15,7 @@ CColoredStdoutLoggerWin32::CColoredStdoutLoggerWin32(core::bitflag void CColoredStdoutLoggerWin32::threadsafeLog_impl(const std::string_view& fmt, E_LOG_LEVEL logLevel, va_list args) { SetConsoleTextAttribute(m_native_console, getConsoleColor(logLevel)); - printf(constructLogString(fmt, logLevel, args).data()); + printf("%s", constructLogString(fmt, logLevel, args).data()); fflush(stdout); SetConsoleTextAttribute(m_native_console, 15); // restore to white } From 6295aa80656e51b5089aa412352e85c7328bc7f7 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 19 Dec 2025 14:38:37 +0100 Subject: [PATCH 293/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 04627c7bb7..5acd059641 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 04627c7bb708cfed00ddb6de3f289a37bd7a1ff1 +Subproject commit 5acd05964180897127d63d68b3db504ea8e46cc2 From 993032c01e2890934021af8a0525eda310cd984e Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sat, 20 Dec 2025 10:19:17 +0300 Subject: [PATCH 294/472] update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 12486d4670..1961a898fd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 12486d4670f0453722351814996d91f198a16749 +Subproject commit 1961a898fd0a91c8e4d5c1a3fcb02df9142e8388 From 6ab99fac0405cda68d5e7a05d9dc8a9a99ca556b Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 20 Dec 2025 11:44:51 +0100 Subject: [PATCH 295/472] Updated DXC --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index ecd3f93521..d76c7890b1 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit ecd3f93521f1aceabff64b14857f47f9a32c9958 +Subproject commit d76c7890b19ce0b344ee0ce116dbc1c92220ccea From 85c3b5a1afc61f35b36cdb93135c9c129a242021 Mon Sep 17 00:00:00 2001 From: devsh Date: Sun, 21 Dec 2025 17:15:38 +0100 Subject: [PATCH 296/472] make CElementFilm work --- include/nbl/ext/MitsubaLoader/CElementFilm.h | 64 +++- src/nbl/ext/MitsubaLoader/CElementFilm.cpp | 344 ++++++------------ .../ext/MitsubaLoader/CElementIntegrator.cpp | 8 + src/nbl/ext/MitsubaLoader/ElementMacros.h | 9 +- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 7 +- 5 files changed, 188 insertions(+), 244 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementFilm.h b/include/nbl/ext/MitsubaLoader/CElementFilm.h index 986a5a5cbe..267cd57938 100644 --- a/include/nbl/ext/MitsubaLoader/CElementFilm.h +++ b/include/nbl/ext/MitsubaLoader/CElementFilm.h @@ -22,16 +22,7 @@ class CElementFilm final : public IElement LDR_FILM, MFILM }; - static inline core::unordered_map compStringToTypeMap() - { - return { - {"hdrfilm", Type::HDR_FILM}, - {"tiledhdrfilm",Type::TILED_HDR}, - {"ldrfilm", Type::LDR_FILM}, - {"mfilm", Type::MFILM} - }; - } - + // enum PixelFormat : uint8_t { LUMINANCE, @@ -62,10 +53,14 @@ class CElementFilm final : public IElement }; struct HDR { + constexpr static inline Type VariantType = Type::HDR_FILM; + bool attachLog = true; }; struct LDR { + constexpr static inline Type VariantType = Type::LDR_FILM; + enum TonemapMethod { GAMMA, @@ -79,7 +74,9 @@ class CElementFilm final : public IElement }; struct M { - M() : digits(4) + constexpr static inline Type VariantType = Type::MFILM; + + inline M() : digits(4) { variable[0] = 'd'; variable[1] = 'a'; @@ -92,6 +89,23 @@ class CElementFilm final : public IElement char variable[MaxVarNameLen+1]; }; + // + using variant_list_t = core::type_list< + HDR, + LDR, + M + >; + static inline core::unordered_map compStringToTypeMap() + { + return { + {"hdrfilm", Type::HDR_FILM}, + {"tiledhdrfilm",Type::TILED_HDR}, + {"ldrfilm", Type::LDR_FILM}, + {"mfilm", Type::MFILM} + }; + } + static AddPropertyMap compAddPropertyMap(); + inline CElementFilm(const char* id) : IElement(id), type(Type::HDR_FILM), width(768), height(576), cropOffsetX(0), cropOffsetY(0), cropWidth(INT_MAX), cropHeight(INT_MAX), fileFormat(OPENEXR), pixelFormat(RGB), componentFormat(FLOAT16), @@ -124,6 +138,32 @@ class CElementFilm final : public IElement } } + template + inline void visit(Visitor&& visitor) + { + switch (type) + { + case CElementFilm::Type::LDR_FILM: + visitor(ldrfilm); + break; + case CElementFilm::Type::MFILM: + visitor(mfilm); + break; + default: + visitor(hdrfilm); + break; + } + } + template + inline void visit(Visitor&& visitor) const + { + const_cast(this)->visit([&](T& var)->void + { + visitor(const_cast(var)); + } + ); + } + bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override; constexpr static inline auto ElementType = IElement::Type::FILM; @@ -147,7 +187,7 @@ class CElementFilm final : public IElement Type type; int32_t width,height; int32_t cropOffsetX,cropOffsetY,cropWidth,cropHeight; - FileFormat fileFormat; + FileFormat fileFormat = OPENEXR; PixelFormat pixelFormat; ComponentFormat componentFormat; bool banner; diff --git a/src/nbl/ext/MitsubaLoader/CElementFilm.cpp b/src/nbl/ext/MitsubaLoader/CElementFilm.cpp index 7f17cbe64d..361fc4fd55 100644 --- a/src/nbl/ext/MitsubaLoader/CElementFilm.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementFilm.cpp @@ -4,266 +4,150 @@ #include "nbl/ext/MitsubaLoader/CElementFilm.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" +#include "nbl/ext/MitsubaLoader/ElementMacros.h" #include namespace nbl::ext::MitsubaLoader { -bool CElementFilm::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) + +inline bool setLimitedString(const std::string_view memberName, std::span out, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger) { -#if 0 - bool error = type==Type::INVALID; -#define SET_PROPERTY(MEMBER,PROPERTY_TYPE) [&]() -> void { \ - if (_property.type!=PROPERTY_TYPE) { \ - error = true; \ - return; \ - } \ - MEMBER = _property.getProperty(); \ - } - auto setWidth = SET_PROPERTY(width,SNamedPropertyElement::Type::INTEGER); - auto setHeight = SET_PROPERTY(height,SNamedPropertyElement::Type::INTEGER); - auto setCropOffsetX = SET_PROPERTY(cropOffsetX,SNamedPropertyElement::Type::INTEGER); - auto setCropOffsetY = SET_PROPERTY(cropOffsetY,SNamedPropertyElement::Type::INTEGER); - auto setCropWidth = SET_PROPERTY(cropWidth,SNamedPropertyElement::Type::INTEGER); - auto setCropHeight = SET_PROPERTY(cropHeight,SNamedPropertyElement::Type::INTEGER); - auto setFileFormat = [&]() -> void - { - if (_property.type!=SNamedPropertyElement::Type::STRING) - { - error = true; - return; - } - static const core::unordered_map StringToType = - { - {"openexr", OPENEXR}, - {"png", PNG}, - {"rgbe", RGBE}, - {"pfm", PFM}, - {"matlab", MATLAB}, - {"mathematica", MATHEMATICA}, - {"numpy", NUMPY} - }; - auto found = StringToType.find(_property.svalue); - if (found==StringToType.end()) - { - error = true; - return; - } - fileFormat = found->second; - }; - auto setPixelFormat = [&]() -> void - { - if (_property.type!=SNamedPropertyElement::Type::STRING) - { - error = true; - return; - } - static const core::unordered_map StringToType = - { - {"luminance", LUMINANCE}, - {"luminanceAlpha", LUMINANCE_ALPHA}, - {"rgb", RGB}, - {"rgba", RGBA}, - {"xyz", XYZ}, - {"xyza", XYZA}, - {"spectrum", SPECTRUM}, - {"spectrumAlpha", SPECTRUM_ALPHA} - }; - auto found = StringToType.find(_property.svalue); - if (found==StringToType.end()) + auto len = strlen(_property.svalue); + if (len>=out.size()) + logger.log( + "String property assigned to %s is too long, max allowed length %d, is %d, property value: \"%s\"", + system::ILogger::ELL_ERROR,memberName.data(),out.size(),len,_property.svalue + ); + len = std::min(out.size()-1,len); + memcpy(out.data(),_property.svalue,len); + out[len] = 0; + return true; +} + +auto CElementFilm::compAddPropertyMap() -> AddPropertyMap +{ + using this_t = CElementFilm; + AddPropertyMap retval; + + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(width,INTEGER); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(height,INTEGER); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(cropOffsetX,INTEGER); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(cropOffsetY,INTEGER); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(cropWidth,INTEGER); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(cropHeight,INTEGER); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("fileFormat",STRING) { - error = true; - return; + static const core::unordered_map StringToType = + { + {"openexr", OPENEXR}, + {"png", PNG}, + {"rgbe", RGBE}, + {"pfm", PFM}, + {"matlab", MATLAB}, + {"mathematica", MATHEMATICA}, + {"numpy", NUMPY} + }; + auto found = StringToType.find(_property.svalue); + if (found==StringToType.end()) + return false; + _this->fileFormat = found->second; + return true; } - pixelFormat = found->second; - }; - auto setComponentFormat = [&]() -> void - { - if (_property.type!=SNamedPropertyElement::Type::STRING || type==Type::LDR_FILM || type==Type::MFILM) + }); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("pixelFormat",STRING) { - error = true; - return; + static const core::unordered_map StringToType = + { + {"luminance", LUMINANCE}, + {"luminanceAlpha", LUMINANCE_ALPHA}, + {"rgb", RGB}, + {"rgba", RGBA}, + {"xyz", XYZ}, + {"xyza", XYZA}, + {"spectrum", SPECTRUM}, + {"spectrumAlpha", SPECTRUM_ALPHA} + }; + auto found = StringToType.find(_property.svalue); + if (found==StringToType.end()) + return false; + _this->pixelFormat = found->second; + return true; } - static const core::unordered_map StringToType = + }); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("setComponentFormat",STRING) { - {"float16", FLOAT16}, - {"float32", FLOAT32}, - {"uint32", UINT32} - }; - auto found = StringToType.find(_property.svalue); - if (found==StringToType.end()) - { - error = true; - return; + static const core::unordered_map StringToType = + { + {"float16", FLOAT16}, + {"float32", FLOAT32}, + {"uint32", UINT32} + }; + auto found = StringToType.find(_property.svalue); + if (found==StringToType.end()) + return false; + _this->componentFormat = found->second; + return true; } - componentFormat = found->second; - }; - auto setBanner = SET_PROPERTY(banner,SNamedPropertyElement::Type::BOOLEAN); - auto setHighQualityEdges= SET_PROPERTY(highQualityEdges,SNamedPropertyElement::Type::BOOLEAN); + }); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(banner,BOOLEAN); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(highQualityEdges,BOOLEAN); - - auto dispatch = [&](auto func) -> void - { - switch (type) + + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(attachLog,BOOLEAN,std::is_same,HDR); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("tonemapMethod",STRING,std::is_same,LDR) { - case CElementFilm::Type::HDR_FILM: - func(hdrfilm); - break; - case CElementFilm::Type::LDR_FILM: - func(ldrfilm); - break; - case CElementFilm::Type::MFILM: - func(mfilm); - break; - default: - error = true; - break; + static const core::unordered_map StringToType = + { + {"gamma", LDR::GAMMA}, + {"reinhard",LDR::REINHARD} + }; + auto found = StringToType.find(_property.svalue); + if (found==StringToType.end()) + return false; + _this->ldrfilm.tonemapMethod = found->second; + return true; } - }; -#define SET_PROPERTY_TEMPLATE(MEMBER,PROPERTY_TYPE, ... ) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ - if constexpr (is_any_of::type,__VA_ARGS__>::value) \ - { \ - if (_property.type!=PROPERTY_TYPE) { \ - error = true; \ - return; \ - } \ - state. ## MEMBER = _property.getProperty(); \ - } \ - }); \ - } + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(gamma,FLOAT,std::is_same,LDR); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(exposure,FLOAT,std::is_same,LDR); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(key,FLOAT,std::is_same,LDR); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(burn,FLOAT,std::is_same,LDR); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(attachLog,INTEGER,std::is_same,HDR); - auto setAttachLog = SET_PROPERTY_TEMPLATE(attachLog, SNamedPropertyElement::Type::BOOLEAN, HDR); - auto setTonemapMethod = [&]() -> void - { - if (_property.type != SNamedPropertyElement::Type::STRING || type == Type::LDR_FILM) + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("variable",STRING,std::is_same,M) { - error = true; - return; + return setLimitedString("variable",_this->outputFilePath,std::move(_property),logger); } - static const core::unordered_map StringToType = + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("outputFilePath",STRING) { - {"gamma", LDR::GAMMA}, - {"reinhard",LDR::REINHARD} - }; - auto found = StringToType.find(_property.svalue); - if (found != StringToType.end()) - { - error = true; - return; + return setLimitedString("outputFilePath",_this->outputFilePath,std::move(_property),logger); } - ldrfilm.tonemapMethod = found->second; - }; - auto setGamma = SET_PROPERTY_TEMPLATE(gamma, SNamedPropertyElement::Type::FLOAT, LDR); - auto setExposure = SET_PROPERTY_TEMPLATE(exposure, SNamedPropertyElement::Type::FLOAT, LDR); - auto setKey = SET_PROPERTY_TEMPLATE(key, SNamedPropertyElement::Type::FLOAT, LDR); - auto setBurn = SET_PROPERTY_TEMPLATE(burn, SNamedPropertyElement::Type::FLOAT, LDR); - auto setDigits = SET_PROPERTY_TEMPLATE(digits, SNamedPropertyElement::Type::INTEGER, M); - auto setVariable = [&]() -> void - { - if (_property.type != SNamedPropertyElement::Type::STRING || type == Type::MFILM) + }); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("bloomFilePath",STRING) { - error = true; - return; + return setLimitedString("bloomFilePath",_this->denoiserTonemapperArgs,std::move(_property),logger); } - size_t len = std::min(strlen(_property.svalue),M::MaxVarNameLen); - memcpy(mfilm.variable,_property.svalue,len); - mfilm.variable[len] = 0; - }; - auto setOutputFilePath = [&]() -> void - { - if (_property.type != SNamedPropertyElement::Type::STRING) + }); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("tonemapper",STRING) { - error = true; - return; + return setLimitedString("tonemapper",_this->denoiserTonemapperArgs,std::move(_property),logger); } + }); - size_t len = std::min(strlen(_property.svalue),MaxPathLen); - memcpy(outputFilePath,_property.svalue,len); - outputFilePath[len] = 0; - }; - - auto setBloomFilePath = [&]() -> void - { - if (_property.type != SNamedPropertyElement::Type::STRING) - { - error = true; - return; - } - - size_t len = std::min(strlen(_property.svalue),MaxPathLen); - memcpy(denoiserBloomFilePath,_property.svalue,len); - denoiserBloomFilePath[len] = 0; - }; - - auto setTonemapperArgs = [&]() -> void - { - if (_property.type != SNamedPropertyElement::Type::STRING) - { - error = true; - return; - } + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(cascadeCount,INTEGER); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(cascadeLuminanceBase,FLOAT); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(cascadeLuminanceStart,FLOAT); - size_t len = std::min(strlen(_property.svalue),MaxTonemapperArgsLen); - memcpy(denoiserTonemapperArgs,_property.svalue,len); - denoiserTonemapperArgs[len] = 0; - }; + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(denoiserBloomScale,FLOAT); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(denoiserBloomIntensity,FLOAT); - auto setCascadeCount = SET_PROPERTY(cascadeCount,SNamedPropertyElement::Type::INTEGER); - auto setCascadeLuminanceBase = SET_PROPERTY(cascadeLuminanceBase,SNamedPropertyElement::Type::FLOAT); - auto setCascadeLuminanceStart = SET_PROPERTY(cascadeLuminanceStart,SNamedPropertyElement::Type::FLOAT); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(envmapRegularizationFactor,FLOAT); - auto setBloomScale = SET_PROPERTY(denoiserBloomScale,SNamedPropertyElement::Type::FLOAT); - auto setBloomIntensity = SET_PROPERTY(denoiserBloomIntensity,SNamedPropertyElement::Type::FLOAT); - - auto setEnvmapRegularizationFactor = SET_PROPERTY(envmapRegularizationFactor,SNamedPropertyElement::Type::FLOAT); - - const core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> SetPropertyMap = - { - {"width", setWidth}, - {"height", setHeight}, - {"cropOffsetX", setCropOffsetX}, - {"cropOffsetY", setCropOffsetY}, - {"cropWidth", setCropWidth}, - {"cropHeight", setCropHeight}, - {"fileFormat", setFileFormat}, - {"pixelFormat", setPixelFormat}, - {"componentFormat", setComponentFormat}, - {"banner", setBanner}, - {"highQualityEdges", setHighQualityEdges}, - {"attachLog", setAttachLog}, - {"tonemapMethod", setTonemapMethod}, - {"gamma", setGamma}, - {"exposure", setExposure}, - {"key", setKey}, - {"burn", setBurn}, - {"digits", setDigits}, - {"variable", setVariable}, - {"outputFilePath", setOutputFilePath}, - {"bloomFilePath", setBloomFilePath}, - {"cascadeCount", setCascadeCount}, - {"cascadeLuminanceBase", setCascadeLuminanceBase}, - {"cascadeLuminanceStart", setCascadeLuminanceStart}, - {"bloomScale", setBloomScale}, - {"bloomIntensity", setBloomIntensity}, - {"tonemapper", setTonemapperArgs}, - {"envmapRegularizationFactor", setEnvmapRegularizationFactor} - }; - - auto found = SetPropertyMap.find(_property.name); - if (found == SetPropertyMap.end()) - { - - invalidXMLFileStructure(logger,"No Film can have such property set with name: " + _property.name+"\nRemember we don't support \"render-time annotations\""); - return false; - } - found->second(); - return !error; -#endif - assert(false); - return false; + return retval; } bool CElementFilm::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index 04227927be..b87e4d370c 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -13,6 +13,14 @@ namespace nbl::ext::MitsubaLoader { +auto CElementFilm::compAddPropertyMap() -> AddPropertyMap +{ + using this_t = CElementFilm; + AddPropertyMap retval; + + return retval; +} + bool CElementIntegrator::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) { if (type>=Type::INVALID) diff --git a/src/nbl/ext/MitsubaLoader/ElementMacros.h b/src/nbl/ext/MitsubaLoader/ElementMacros.h index 7a210c3b0a..b214882ff1 100644 --- a/src/nbl/ext/MitsubaLoader/ElementMacros.h +++ b/src/nbl/ext/MitsubaLoader/ElementMacros.h @@ -5,17 +5,24 @@ #include "nbl/ext/MitsubaLoader/ParserUtil.h" +// Return value is if there's no error during the setting once basic checks are done +// For when you want to do custom handling of when property with string NAME and SNamedPropertyElement::Type::PROP_TYPE is getting added to this_t #define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(NAME,PROP_TYPE) retval.registerCallback(SNamedPropertyElement::Type::PROP_TYPE,NAME,{\ .func=[](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool +// when you know that there's a member of `this_t` with identifier equal to NAME #define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(NAME,PROP_TYPE) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(#NAME,PROP_TYPE) {\ _this->NAME = _property.getProperty(); \ return true;}}) - +// Similar to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY` but for `this_t` which declare `variant_list_t` (list of union types) +// this adds a compile-time filter against the constraint, such that only variant types matching the constraint are visited. +// Useful when multiple variants derive from the same base struct, or have the same member. #define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) retval.template registerCallback( \ SNamedPropertyElement::Type::PROP_TYPE,NAME,[](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool +// This it to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED` what `NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY` is to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY` +// So basically you know the member is the same across the constraint filtered types #define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(#NAME,PROP_TYPE,CONSTRAINT,__VA_ARGS__) {\ _this->visit([&_property](auto& state)->void{ \ if constexpr (CONSTRAINT,__VA_ARGS__>::value) \ diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index a79fdbb906..0fe612e2ec 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -426,7 +426,12 @@ ParserManager::ParserManager() : propertyElements({ {"alias", {.create=processAlias,.retvalGoesOnStack=true}}, {"ref", {.create=processRef,.retvalGoesOnStack=true}} }), addPropertyMaps({ - CElementSensor::compAddPropertyMap() + CElementIntegrator::compAddPropertyMap(), + CElementSensor::compAddPropertyMap(), + CElementFilm::compAddPropertyMap(), + CElementRFilter::compAddPropertyMap(), + CElementSampler::compAddPropertyMap(), + CElementEmissionProfile::compAddPropertyMap() }) { } auto ParserManager::processAlias(const char** _atts, SessionContext* ctx) -> SNamedElement From ade7a8a187ca50d43fae637af05659f55e68db74 Mon Sep 17 00:00:00 2001 From: devsh Date: Sun, 21 Dec 2025 23:09:37 +0100 Subject: [PATCH 297/472] refactor CElementIntegrator --- .../ext/MitsubaLoader/CElementIntegrator.h | 85 +++- .../ext/MitsubaLoader/CElementIntegrator.cpp | 371 +++++++----------- 2 files changed, 221 insertions(+), 235 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h index 94ea85b7b6..760f87fe5b 100644 --- a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h +++ b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h @@ -61,8 +61,10 @@ class CElementIntegrator final : public IElement }; } - struct AmbientOcclusion + struct AmbientOcclusion final { + constexpr static inline Type VariantType = Type::AO; + int32_t shadingSamples = 1; float rayLength = -1.f; }; @@ -71,8 +73,10 @@ class CElementIntegrator final : public IElement bool hideEmitters = false; bool hideEnvironment = false; }; - struct DirectIllumination : EmitterHideableBase + struct DirectIllumination final : EmitterHideableBase { + constexpr static inline Type VariantType = Type::DIRECT; + int32_t emitterSamples = static_cast(0xdeadbeefu); int32_t bsdfSamples = static_cast(0xdeadbeefu); bool strictNormals = false; @@ -84,16 +88,22 @@ class CElementIntegrator final : public IElement }; struct PathTracing : MonteCarloTracingBase,EmitterHideableBase { + constexpr static inline Type VariantType = Type::PATH; + bool strictNormals = false; }; struct SimpleVolumetricPathTracing : PathTracing { + constexpr static inline Type VariantType = Type::VOL_PATH_SIMPLE; }; struct ExtendedVolumetricPathTracing : SimpleVolumetricPathTracing { + constexpr static inline Type VariantType = Type::VOL_PATH; }; - struct BiDirectionalPathTracing : MonteCarloTracingBase + struct BiDirectionalPathTracing final : MonteCarloTracingBase { + constexpr static inline Type VariantType = Type::BDPT; + bool lightImage = true; bool sampleDirect = true; }; @@ -101,26 +111,31 @@ class CElementIntegrator final : public IElement { int32_t granularity = 0; }; - struct PhotonMapping : PhotonMappingBase, EmitterHideableBase + struct PhotonMapping final : PhotonMappingBase, EmitterHideableBase { + constexpr static inline Type VariantType = Type::PHOTONMAPPER; + int32_t directSamples = 16; int32_t glossySamples = 32; int32_t globalPhotons = 250000; int32_t causticPhotons = 250000; int32_t volumePhotons = 250000; - float globalLURadius = 0.05; - float causticLURadius = 0.0125; - int32_t LUSize = 120; + float globalLookupRadius = 0.05; + float causticLookupRadius = 0.0125; + int32_t lookupSize = 120; }; struct ProgressivePhotonMapping : PhotonMappingBase { + constexpr static inline Type VariantType = Type::PPM; + int32_t photonCount = 250000; float initialRadius = 0.f; float alpha = 0.7f; int32_t maxPasses = -1; }; - struct StochasticProgressivePhotonMapping : ProgressivePhotonMapping + struct StochasticProgressivePhotonMapping final : ProgressivePhotonMapping { + constexpr static inline Type VariantType = Type::SPPM; }; struct MetropolisLightTransportBase : MonteCarloTracingBase { @@ -128,8 +143,10 @@ class CElementIntegrator final : public IElement int32_t luminanceSamples = 100000; bool twoStage = false; }; - struct PrimarySampleSpaceMetropolisLightTransport : MetropolisLightTransportBase + struct PrimarySampleSpaceMetropolisLightTransport final : MetropolisLightTransportBase { + constexpr static inline Type VariantType = Type::PSSMLT; + bool bidirectional = true; float pLarge = 0.3f; }; @@ -141,30 +158,40 @@ class CElementIntegrator final : public IElement bool manifoldPerturbation = false; float lambda = 50.f; }; - struct PathSpaceMetropolisLightTransport : MetropolisLightTransportBase, PerturbateableBase + struct PathSpaceMetropolisLightTransport final : MetropolisLightTransportBase, PerturbateableBase { + constexpr static inline Type VariantType = Type::MLT; + bool bidirectionalMutation = true; }; - struct EnergyRedistributionPathTracing : MonteCarloTracingBase, PerturbateableBase + struct EnergyRedistributionPathTracing final : MonteCarloTracingBase, PerturbateableBase { + constexpr static inline Type VariantType = Type::ERPT; + float numChains = 1.f; float maxChains = 0.f; int32_t chainLength = 1; int32_t directSamples = 16; }; - struct AdjointParticleTracing : MonteCarloTracingBase + struct AdjointParticleTracing final : MonteCarloTracingBase { + constexpr static inline Type VariantType = Type::ADJ_P_TRACER; + uint32_t granularity = 200000; bool bruteForce = false; }; - struct VirtualPointLights + struct VirtualPointLights final { + constexpr static inline Type VariantType = Type::VPL; + int32_t maxPathDepth = 5; int32_t shadowMap = 512; float clamping = 0.1f; }; struct FieldExtraction { + constexpr static inline Type VariantType = Type::FIELD_EXTRACT; + enum Type { INVALID, @@ -193,14 +220,18 @@ class CElementIntegrator final : public IElement size_t childCount = 0u; CElementIntegrator* children[maxChildCount] = { nullptr }; }; - struct AdaptiveIntegrator : MetaIntegrator + struct AdaptiveIntegrator final : MetaIntegrator { + constexpr static inline Type VariantType = Type::ADAPTIVE; + float maxError = 0.05f; float pValue = 0.05f; int32_t maxSampleFactor = 32; }; - struct IrradianceCacheIntegrator : MetaIntegrator + struct IrradianceCacheIntegrator final : MetaIntegrator { + constexpr static inline Type VariantType = Type::IRR_CACHE; + int32_t resolution = 14; float quality = 1.f; bool gradients = true; @@ -211,10 +242,32 @@ class CElementIntegrator final : public IElement bool indirectOnly = false; bool debug = false; }; - struct MultiChannelIntegrator : MetaIntegrator + struct MultiChannelIntegrator final : MetaIntegrator { + constexpr static inline Type VariantType = Type::MULTI_CHANNEL; }; + // + using variant_list_t = core::type_list< + AmbientOcclusion, + DirectIllumination, + SimpleVolumetricPathTracing, + ExtendedVolumetricPathTracing, + PathTracing, + BiDirectionalPathTracing, + PhotonMapping, + ProgressivePhotonMapping, + StochasticProgressivePhotonMapping, + PrimarySampleSpaceMetropolisLightTransport, + PathSpaceMetropolisLightTransport, + EnergyRedistributionPathTracing, + AdjointParticleTracing, + AdaptiveIntegrator, + VirtualPointLights, + IrradianceCacheIntegrator, + MultiChannelIntegrator, + FieldExtraction + >; // static AddPropertyMap compAddPropertyMap(); diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index b87e4d370c..c487ab3933 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -13,233 +13,166 @@ namespace nbl::ext::MitsubaLoader { -auto CElementFilm::compAddPropertyMap() -> AddPropertyMap +auto CElementIntegrator::compAddPropertyMap() -> AddPropertyMap { - using this_t = CElementFilm; - AddPropertyMap retval; + using this_t = CElementIntegrator; + AddPropertyMap retval; - return retval; -} - -bool CElementIntegrator::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) -{ - if (type>=Type::INVALID) - return false; - bool error = false; -#if 0 -#define SET_PROPERTY_TEMPLATE(MEMBER,PROPERTY_TYPE, ... ) [&]() -> void { \ - visit([&](auto& state) -> void { \ - if constexpr (is_any_of::type,__VA_ARGS__>::value) \ - { \ - if (_property.type!=PROPERTY_TYPE) { \ - error = true; \ - return; \ - } \ - state. ## MEMBER = _property.getProperty(); \ - } \ - }); \ - } - - auto processRayLength = SET_PROPERTY_TEMPLATE(rayLength,SNamedPropertyElement::Type::FLOAT,AmbientOcclusion); - auto processEmitterSamples = SET_PROPERTY_TEMPLATE(emitterSamples,SNamedPropertyElement::Type::INTEGER,DirectIllumination); - auto processBSDFSamples = SET_PROPERTY_TEMPLATE(bsdfSamples,SNamedPropertyElement::Type::INTEGER,DirectIllumination); - auto processShadingSamples = [&]() -> void - { - visit([&](auto& state) -> void { - using state_type = std::remove_reference::type; + // ambient + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shadingSamples,INTEGER,std::is_same,AmbientOcclusion); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(rayLength,FLOAT,std::is_same,AmbientOcclusion); - if constexpr (std::is_same::value) + // emitter hideables + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(hideEmitters,BOOLEAN,derived_from,DirectIllumination); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(hideEnvironment,BOOLEAN,derived_from,DirectIllumination); + + // this one has really funny legacy behaviour which Mitsuba allowed contrary to its PDF docs + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("shadingSamples",INTEGER,std::is_same,DirectIllumination) + { + _this->direct.emitterSamples = _this->direct.bsdfSamples = _property.ivalue; + return true; + } + ); + // direct + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(emitterSamples,INTEGER,std::is_same,DirectIllumination); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(bsdfSamples,INTEGER,std::is_same,DirectIllumination); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(strictNormals,BOOLEAN,std::is_same,DirectIllumination); + + // monte carlo base + // Not using `NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED` because members have different names than XML names + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("maxDepth",INTEGER,derived_from,MonteCarloTracingBase) + { + _this->path.maxPathDepth = _property.ivalue; + return true; + } + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("rrDepth",INTEGER,derived_from,MonteCarloTracingBase) + { + _this->path.russianRouletteDepth = _property.ivalue; + return true; + } + ); + + // path tracing + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(strictNormals,BOOLEAN,derived_from,PathTracing); + + // bidirectional path tracing + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(lightImage,BOOLEAN,std::is_same,BiDirectionalPathTracing); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(sampleDirect,BOOLEAN,std::is_same,BiDirectionalPathTracing); + + // photon mapping base + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(granularity,INTEGER,derived_from,PhotonMappingBase); + + // photon mapping + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(directSamples,INTEGER,std::is_same,PhotonMapping); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(glossySamples,INTEGER,std::is_same,PhotonMapping); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(globalPhotons,INTEGER,std::is_same,PhotonMapping); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(causticPhotons,INTEGER,std::is_same,PhotonMapping); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(volumePhotons,INTEGER,std::is_same,PhotonMapping); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(globalLookupRadius,FLOAT,std::is_same,PhotonMapping); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(globalLookupRadius,FLOAT,std::is_same,PhotonMapping); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(lookupSize,INTEGER,std::is_same,PhotonMapping); + + // progressive + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(photonCount,INTEGER,derived_from,ProgressivePhotonMapping); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(initialRadius,FLOAT,derived_from,ProgressivePhotonMapping); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(alpha,FLOAT,derived_from,ProgressivePhotonMapping); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(maxPasses,INTEGER,derived_from,ProgressivePhotonMapping); + + // metropolis base + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(directSamples,INTEGER,derived_from,MetropolisLightTransportBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(luminanceSamples,INTEGER,derived_from,MetropolisLightTransportBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(twoStage,BOOLEAN,derived_from,MetropolisLightTransportBase); + + // primary sample space + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(bidirectional,BOOLEAN,std::is_same,PrimarySampleSpaceMetropolisLightTransport); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(pLarge,FLOAT,std::is_same,PrimarySampleSpaceMetropolisLightTransport); + + // permutable base + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(lensPerturbation,BOOLEAN,derived_from,PerturbateableBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(multiChainPerturbation,BOOLEAN,derived_from,PerturbateableBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(causticPerturbation,BOOLEAN,derived_from,PerturbateableBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(manifoldPerturbation,BOOLEAN,derived_from,PerturbateableBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(lambda,FLOAT,derived_from,PerturbateableBase); + + // path space metropolis + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(bidirectionalMutation,BOOLEAN,std::is_same,PathSpaceMetropolisLightTransport); + + // energy redistribution + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(numChains,FLOAT,std::is_same,EnergyRedistributionPathTracing); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(maxChains,FLOAT,std::is_same,EnergyRedistributionPathTracing); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(chainLength,INTEGER,std::is_same,EnergyRedistributionPathTracing); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(directSamples,INTEGER,std::is_same,EnergyRedistributionPathTracing); + + // adjoint + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(granularity,INTEGER,std::is_same,AdjointParticleTracing); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(bruteForce,BOOLEAN,std::is_same,AdjointParticleTracing); + + // vpl + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(maxPathDepth,INTEGER,std::is_same,VirtualPointLights); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shadowMap,INTEGER,std::is_same,VirtualPointLights); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(clamping,FLOAT,std::is_same,VirtualPointLights); + + // field extraction + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("field",STRING,std::is_same,FieldExtraction) + { + static const core::unordered_map StringToType = { - if (_property.type!=SNamedPropertyElement::Type::INTEGER) - { - error = true; - return; - } - state.shadingSamples = _property.getProperty(); - } + {"position",FieldExtraction::Type::POSITION}, + {"relPosition",FieldExtraction::Type::RELATIVE_POSITION}, + {"distance",FieldExtraction::Type::DISTANCE}, + {"geoNormal",FieldExtraction::Type::GEOMETRIC_NORMAL}, + {"shNormal",FieldExtraction::Type::SHADING_NORMAL}, + {"uv",FieldExtraction::Type::UV_COORD}, + {"albedo",FieldExtraction::Type::ALBEDO}, + {"shapeIndex",FieldExtraction::Type::SHAPE_INDEX}, + {"primIndex",FieldExtraction::Type::PRIMITIVE_INDEX} + }; + auto found = StringToType.find(_property.svalue); + if (found!=StringToType.end()) + _this->field.field = found->second; else - { - if constexpr (std::is_same::value) - { - processEmitterSamples(); - processBSDFSamples(); - } - } - }); - }; - auto processStrictNormals = SET_PROPERTY_TEMPLATE(strictNormals,SNamedPropertyElement::Type::BOOLEAN,DirectIllumination,PathTracing); - auto processHideEmitters = SET_PROPERTY_TEMPLATE(hideEmitters,SNamedPropertyElement::Type::BOOLEAN,DirectIllumination,PathTracing,PhotonMapping); - auto processHideEnvironment = SET_PROPERTY_TEMPLATE(hideEnvironment,SNamedPropertyElement::Type::BOOLEAN,DirectIllumination,PathTracing,PhotonMapping); -#define ALL_PHOTONMAPPING_TYPES PhotonMapping,ProgressivePhotonMapping,StochasticProgressivePhotonMapping -#define ALL_MLT_TYPES PrimarySampleSpaceMetropolisLightTransport,PathSpaceMetropolisLightTransport -#define ALL_MC_TYPES PathTracing,SimpleVolumetricPathTracing,ExtendedVolumetricPathTracing,BiDirectionalPathTracing, \ - ALL_PHOTONMAPPING_TYPES,ALL_MLT_TYPES,EnergyRedistributionPathTracing,AdjointParticleTracing - auto processMaxDepth = SET_PROPERTY_TEMPLATE(maxPathDepth,SNamedPropertyElement::Type::INTEGER,ALL_MC_TYPES, VirtualPointLights); - auto processRRDepth = SET_PROPERTY_TEMPLATE(russianRouletteDepth,SNamedPropertyElement::Type::INTEGER,ALL_MC_TYPES); -#undef ALL_MC_TYPES - auto processLightImage = SET_PROPERTY_TEMPLATE(lightImage,SNamedPropertyElement::Type::BOOLEAN,BiDirectionalPathTracing); - auto processSampleDirect = SET_PROPERTY_TEMPLATE(sampleDirect,SNamedPropertyElement::Type::BOOLEAN,BiDirectionalPathTracing); - auto processGranularity = SET_PROPERTY_TEMPLATE(granularity,SNamedPropertyElement::Type::INTEGER,ALL_PHOTONMAPPING_TYPES,AdjointParticleTracing); -#undef ALL_PHOTONMAPPING_TYPES - auto processDirectSamples = SET_PROPERTY_TEMPLATE(directSamples,SNamedPropertyElement::Type::INTEGER,PhotonMapping,ALL_MLT_TYPES,EnergyRedistributionPathTracing); - auto processGlossySamples = SET_PROPERTY_TEMPLATE(glossySamples,SNamedPropertyElement::Type::INTEGER,PhotonMapping); - auto processGlobalPhotons = SET_PROPERTY_TEMPLATE(globalPhotons,SNamedPropertyElement::Type::INTEGER,PhotonMapping); - auto processCausticPhotons = SET_PROPERTY_TEMPLATE(causticPhotons,SNamedPropertyElement::Type::INTEGER,PhotonMapping); - auto processVolumePhotons = SET_PROPERTY_TEMPLATE(volumePhotons,SNamedPropertyElement::Type::INTEGER,PhotonMapping); - auto processGlobalLookupRadius = SET_PROPERTY_TEMPLATE(globalLURadius,SNamedPropertyElement::Type::FLOAT,PhotonMapping); - auto processCausticLookupRadius = SET_PROPERTY_TEMPLATE(causticLURadius,SNamedPropertyElement::Type::FLOAT,PhotonMapping); - auto processLookupSize = SET_PROPERTY_TEMPLATE(LUSize,SNamedPropertyElement::Type::INTEGER,PhotonMapping); - auto processPhotonCount = SET_PROPERTY_TEMPLATE(photonCount,SNamedPropertyElement::Type::INTEGER,ProgressivePhotonMapping,StochasticProgressivePhotonMapping); - auto processInitialRadius = SET_PROPERTY_TEMPLATE(initialRadius,SNamedPropertyElement::Type::FLOAT,ProgressivePhotonMapping,StochasticProgressivePhotonMapping); - auto processAlpha = SET_PROPERTY_TEMPLATE(alpha,SNamedPropertyElement::Type::FLOAT,ProgressivePhotonMapping,StochasticProgressivePhotonMapping); - auto processMaxPasses = SET_PROPERTY_TEMPLATE(maxPasses,SNamedPropertyElement::Type::INTEGER,ProgressivePhotonMapping,StochasticProgressivePhotonMapping); - auto processLuminanceSamples = SET_PROPERTY_TEMPLATE(luminanceSamples,SNamedPropertyElement::Type::INTEGER,ALL_MLT_TYPES); - auto processTwoStage = SET_PROPERTY_TEMPLATE(twoStage,SNamedPropertyElement::Type::BOOLEAN,ALL_MLT_TYPES); -#undef ALL_MLT_TYPES - auto processBidirectional = SET_PROPERTY_TEMPLATE(bidirectional,SNamedPropertyElement::Type::BOOLEAN,PrimarySampleSpaceMetropolisLightTransport); - auto processPLarge = SET_PROPERTY_TEMPLATE(pLarge,SNamedPropertyElement::Type::FLOAT,PrimarySampleSpaceMetropolisLightTransport); - auto processLensPerturbation = SET_PROPERTY_TEMPLATE(lensPerturbation,SNamedPropertyElement::Type::BOOLEAN,PathSpaceMetropolisLightTransport,EnergyRedistributionPathTracing); - auto processMultiChainPerturbation = SET_PROPERTY_TEMPLATE(multiChainPerturbation,SNamedPropertyElement::Type::BOOLEAN,PathSpaceMetropolisLightTransport,EnergyRedistributionPathTracing); - auto processCausticPerturbation = SET_PROPERTY_TEMPLATE(causticPerturbation,SNamedPropertyElement::Type::BOOLEAN,PathSpaceMetropolisLightTransport,EnergyRedistributionPathTracing); - auto processManifoldPerturbation = SET_PROPERTY_TEMPLATE(manifoldPerturbation,SNamedPropertyElement::Type::BOOLEAN,PathSpaceMetropolisLightTransport,EnergyRedistributionPathTracing); - auto processLambda = SET_PROPERTY_TEMPLATE(lambda,SNamedPropertyElement::Type::FLOAT,PathSpaceMetropolisLightTransport,EnergyRedistributionPathTracing); - auto processBidirectionalMutation = SET_PROPERTY_TEMPLATE(bidirectionalMutation,SNamedPropertyElement::Type::BOOLEAN,PathSpaceMetropolisLightTransport); - auto processNumChains = SET_PROPERTY_TEMPLATE(numChains,SNamedPropertyElement::Type::FLOAT,EnergyRedistributionPathTracing); - auto processMaxChains = SET_PROPERTY_TEMPLATE(maxChains,SNamedPropertyElement::Type::FLOAT,EnergyRedistributionPathTracing); - auto processChainLength = SET_PROPERTY_TEMPLATE(chainLength,SNamedPropertyElement::Type::INTEGER,EnergyRedistributionPathTracing); - auto processBruteForce = SET_PROPERTY_TEMPLATE(bruteForce,SNamedPropertyElement::Type::BOOLEAN,AdjointParticleTracing); - auto processShadowMap = SET_PROPERTY_TEMPLATE(shadowMap,SNamedPropertyElement::Type::INTEGER,VirtualPointLights); - auto processClamping = SET_PROPERTY_TEMPLATE(clamping,SNamedPropertyElement::Type::FLOAT,VirtualPointLights); - auto processField = [&]() -> void - { - visit([&](auto& state) -> void + _this->field.field = FieldExtraction::Type::INVALID; + return true; + } + ); + // TODO: redo + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("undefined",FLOAT,std::is_same,FieldExtraction) { - using state_type = std::remove_reference::type; - if constexpr (std::is_same::value) - { - if (_property.type != SNamedPropertyElement::Type::STRING) - { - error = true; - return; - } - static const core::unordered_map StringToType = - { - {"position",FieldExtraction::Type::POSITION}, - {"relPosition",FieldExtraction::Type::RELATIVE_POSITION}, - {"distance",FieldExtraction::Type::DISTANCE}, - {"geoNormal",FieldExtraction::Type::GEOMETRIC_NORMAL}, - {"shNormal",FieldExtraction::Type::SHADING_NORMAL}, - {"uv",FieldExtraction::Type::UV_COORD}, - {"albedo",FieldExtraction::Type::ALBEDO}, - {"shapeIndex",FieldExtraction::Type::SHAPE_INDEX}, - {"primIndex",FieldExtraction::Type::PRIMITIVE_INDEX} - }; - auto found = StringToType.find(_property.svalue); - if (found!=StringToType.end()) - state.field = found->second; - else - state.field = FieldExtraction::Type::INVALID; - } - }); - }; - auto processUndefined = [&]() -> void - { - visit([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) - { - if (_property.type != SNamedPropertyElement::Type::FLOAT && _property.type != SNamedPropertyElement::Type::SPECTRUM) - { - error = true; - return; - } - state.undefined = _property; // TODO: redo - } - }); - }; - auto processMaxError = SET_PROPERTY_TEMPLATE(maxError,SNamedPropertyElement::Type::FLOAT,AdaptiveIntegrator); - auto processPValue = SET_PROPERTY_TEMPLATE(pValue,SNamedPropertyElement::Type::FLOAT,AdaptiveIntegrator); - auto processMaxSampleFactor = SET_PROPERTY_TEMPLATE(maxSampleFactor,SNamedPropertyElement::Type::INTEGER,AdaptiveIntegrator); - auto processResolution = SET_PROPERTY_TEMPLATE(resolution,SNamedPropertyElement::Type::INTEGER,IrradianceCacheIntegrator); - auto processQuality = SET_PROPERTY_TEMPLATE(quality,SNamedPropertyElement::Type::FLOAT,IrradianceCacheIntegrator); - auto processGradients = SET_PROPERTY_TEMPLATE(gradients,SNamedPropertyElement::Type::BOOLEAN,IrradianceCacheIntegrator); - auto processClampNeighbour = SET_PROPERTY_TEMPLATE(clampNeighbour,SNamedPropertyElement::Type::BOOLEAN,IrradianceCacheIntegrator); - auto processClampScreen = SET_PROPERTY_TEMPLATE(clampScreen,SNamedPropertyElement::Type::BOOLEAN,IrradianceCacheIntegrator); - auto processOverture = SET_PROPERTY_TEMPLATE(overture,SNamedPropertyElement::Type::BOOLEAN,IrradianceCacheIntegrator); - auto processQualityAdjustment = SET_PROPERTY_TEMPLATE(qualityAdjustment,SNamedPropertyElement::Type::FLOAT,IrradianceCacheIntegrator); - auto processIndirecOnly = SET_PROPERTY_TEMPLATE(indirectOnly,SNamedPropertyElement::Type::BOOLEAN,IrradianceCacheIntegrator); - auto processDebug = SET_PROPERTY_TEMPLATE(debug,SNamedPropertyElement::Type::BOOLEAN,IrradianceCacheIntegrator); + _this->field.undefined = _property; // TODO: redo + return true; + } + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("undefined",SPECTRUM,std::is_same,FieldExtraction) + { + _this->field.undefined = _property; // TODO: redo + return true; + } + ); + + // Now for the compound/nested integrators + // meta integrator has no members settable via properties + + // adaptive integrator + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(maxError,FLOAT,std::is_same,AdaptiveIntegrator); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(pValue,FLOAT,std::is_same,AdaptiveIntegrator); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(maxSampleFactor,INTEGER,std::is_same,AdaptiveIntegrator); + + // irradiance cache + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(resolution,INTEGER,std::is_same,IrradianceCacheIntegrator); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(quality,FLOAT,std::is_same,IrradianceCacheIntegrator); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(gradients,BOOLEAN,std::is_same,IrradianceCacheIntegrator); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(clampNeighbour,BOOLEAN,std::is_same,IrradianceCacheIntegrator); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(clampScreen,BOOLEAN,std::is_same,IrradianceCacheIntegrator); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(overture,BOOLEAN,std::is_same,IrradianceCacheIntegrator); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(qualityAdjustment,FLOAT,std::is_same,IrradianceCacheIntegrator); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(indirectOnly,BOOLEAN,std::is_same,IrradianceCacheIntegrator); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(debug,BOOLEAN,std::is_same,IrradianceCacheIntegrator); + + // multi channel no extra members - const core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> SetPropertyMap = - { - {"shadingSamples",processShadingSamples}, - {"rayLength",processRayLength}, - {"emitterSamples",processEmitterSamples}, - {"bsdfSamples",processBSDFSamples}, - {"strictNormals",processStrictNormals}, - {"hideEmitters",processHideEmitters}, - {"hideEnvironment",processHideEnvironment}, - {"maxDepth",processMaxDepth}, - {"rrDepth",processRRDepth}, - {"lightImage",processLightImage}, - {"sampleDirect",processSampleDirect}, - {"granularity",processGranularity}, - {"directSamples",processDirectSamples}, - {"glossySamples",processGlossySamples}, - {"globalPhotons",processGlobalPhotons}, - {"causticPhotons",processCausticPhotons}, - {"volumePhotons",processVolumePhotons}, - {"globalLookupRadius",processGlobalLookupRadius}, - {"causticLookupRadius",processCausticLookupRadius}, - {"lookupSize",processLookupSize}, - {"photonCount",processPhotonCount}, - {"initialRadius",processInitialRadius}, - {"alpha",processAlpha}, - {"maxPasses",processMaxPasses}, - {"luminanceSamples",processLuminanceSamples}, - {"twoStage",processTwoStage}, - {"bidirectional",processBidirectional}, - {"pLarge",processPLarge}, - {"lensPerturbation",processLensPerturbation}, - {"multiChainPerturbation",processMultiChainPerturbation}, - {"causticPerturbation",processCausticPerturbation}, - {"manifoldPerturbation",processManifoldPerturbation}, - {"lambda",processLambda}, - {"bidirectionalMutation",processBidirectionalMutation}, - {"numChains",processNumChains}, - {"maxChains",processMaxChains}, - {"chainLength",processChainLength}, - {"bruteForce",processBruteForce}, - {"shadowMap",processShadowMap}, - {"clamping",processClamping}, - {"field",processField}, - {"undefined",processUndefined}, - {"maxError",processMaxError}, - {"pValue",processPValue}, - {"maxSampleFactor",processMaxSampleFactor}, - {"resolution",processResolution}, - {"quality",processQuality}, - {"gradients",processGradients}, - {"clampNeighbour",processClampNeighbour}, - {"clampScreen",processClampScreen}, - {"overture",processOverture}, - {"qualityAdjustment",processQualityAdjustment}, - {"indirectOnly",processIndirecOnly}, - {"debug",processDebug}, - }; - - auto found = SetPropertyMap.find(_property.name); - if (found==SetPropertyMap.end()) - { - invalidXMLFileStructure(logger,"No Integrator can have such property set with name: "+_property.name); - return false; - } - - found->second(); -#endif - return !error; + return retval; } bool CElementIntegrator::onEndTag(CMitsubaMetadata* metadata, system::logger_opt_ptr logger) From 8cc46625853959fa653d4ed35a022534e0c54932 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 22 Dec 2025 00:09:14 +0100 Subject: [PATCH 298/472] refactor CElementEmissionProfile and CElementRFilter, also spot we need as many structs as we have Types for VariantType filtering --- .../MitsubaLoader/CElementEmissionProfile.h | 3 +- include/nbl/ext/MitsubaLoader/CElementFilm.h | 13 +++- .../nbl/ext/MitsubaLoader/CElementRFilter.h | 16 ++++- .../MitsubaLoader/CElementEmissionProfile.cpp | 69 +++++++------------ src/nbl/ext/MitsubaLoader/CElementRFilter.cpp | 59 +++++----------- 5 files changed, 70 insertions(+), 90 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h index 331abf2873..86368a5499 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h @@ -61,7 +61,8 @@ struct CElementEmissionProfile final : public IElement std::string filename; // TODO: test destructor runs E_NORMALIZE normalization; - float flatten; // TODO: why is this named this way? + // how much we flatten the profile towards a uniform distribution + float flatten; }; } diff --git a/include/nbl/ext/MitsubaLoader/CElementFilm.h b/include/nbl/ext/MitsubaLoader/CElementFilm.h index 267cd57938..8cdd607d79 100644 --- a/include/nbl/ext/MitsubaLoader/CElementFilm.h +++ b/include/nbl/ext/MitsubaLoader/CElementFilm.h @@ -57,6 +57,12 @@ class CElementFilm final : public IElement bool attachLog = true; }; + struct TiledHDR : HDR + { + constexpr static inline Type VariantType = Type::TILED_HDR; + + // TODO: sure we don't have more members? + }; struct LDR { constexpr static inline Type VariantType = Type::LDR_FILM; @@ -92,6 +98,7 @@ class CElementFilm final : public IElement // using variant_list_t = core::type_list< HDR, + TiledHDR, LDR, M >; @@ -195,9 +202,9 @@ class CElementFilm final : public IElement CElementRFilter rfilter; union { - HDR hdrfilm; - LDR ldrfilm; - M mfilm; + HDR hdrfilm; + LDR ldrfilm; + M mfilm; }; constexpr static inline size_t MaxPathLen = 256; diff --git a/include/nbl/ext/MitsubaLoader/CElementRFilter.h b/include/nbl/ext/MitsubaLoader/CElementRFilter.h index b3673c9e35..a84fbbbfec 100644 --- a/include/nbl/ext/MitsubaLoader/CElementRFilter.h +++ b/include/nbl/ext/MitsubaLoader/CElementRFilter.h @@ -29,21 +29,35 @@ class CElementRFilter final : public IElement struct Gaussian { + constexpr static inline Type VariantType = Type::GAUSSIAN; + float sigma = NAN; // can't look at mitsuba source to figure out the default it uses }; struct MitchellNetravali { + constexpr static inline Type VariantType = Type::MITCHELL; + + float B = 1.f / 3.f; + float C = 1.f / 3.f; + }; + struct CatmullRom + { + constexpr static inline Type VariantType = Type::CATMULLROM; + float B = 1.f / 3.f; float C = 1.f / 3.f; }; struct LanczosSinc { + constexpr static inline Type VariantType = Type::LANCZOS; + int32_t lobes = 3; }; using variant_list_t = core::type_list< Gaussian, MitchellNetravali, + CatmullRom, LanczosSinc >; static inline core::unordered_map compStringToTypeMap() @@ -112,7 +126,7 @@ class CElementRFilter final : public IElement { Gaussian gaussian; MitchellNetravali mitchell; - MitchellNetravali catmullrom; + CatmullRom catmullrom; LanczosSinc lanczos; }; float kappa = 0.f; diff --git a/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp b/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp index 0f360ccba0..4fbe7b3f4d 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp @@ -4,61 +4,40 @@ #include "nbl/ext/MitsubaLoader/CElementEmissionProfile.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" +#include "nbl/ext/MitsubaLoader/ElementMacros.h" + #include namespace nbl::ext::MitsubaLoader { -bool CElementEmissionProfile::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) +auto CElementEmissionProfile::compAddPropertyMap() -> AddPropertyMap { - if (_property.name=="filename") - { - if (_property.type!=SPropertyElementData::Type::STRING) - { - invalidXMLFileStructure(logger,"'s `filename` must be a string type, instead it's: "+_property.type); - return false; - } - filename = _property.getProperty(); - return true; - } - else if (_property.name=="normalization") - { - if (_property.type!=SPropertyElementData::Type::STRING) + using this_t = CElementEmissionProfile; + AddPropertyMap retval; + + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(filename,STRING); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("normalization",STRING) { - invalidXMLFileStructure(logger,"'s `normalization` must be a string type, instead it's: "+_property.type); - return false; + const auto normalizeS = std::string(_property.svalue); + if (normalizeS == "UNIT_MAX") + _this->normalization = EN_UNIT_MAX; + else if (normalizeS == "UNIT_AVERAGE_OVER_IMPLIED_DOMAIN") + _this->normalization = EN_UNIT_AVERAGE_OVER_IMPLIED_DOMAIN; + else if (normalizeS == "UNIT_AVERAGE_OVER_FULL_DOMAIN") + _this->normalization = EN_UNIT_AVERAGE_OVER_FULL_DOMAIN; + else + { + logger.log("'s `normalization` is unrecognized: \"%s\"",system::ILogger::ELL_ERROR,normalizeS.c_str()); + _this->normalization = EN_NONE; + } + return true; } + }); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(flatten,FLOAT); - const auto normalizeS = std::string(_property.getProperty()); - - if (normalizeS=="UNIT_MAX") - normalization = EN_UNIT_MAX; - else if(normalizeS=="UNIT_AVERAGE_OVER_IMPLIED_DOMAIN") - normalization = EN_UNIT_AVERAGE_OVER_IMPLIED_DOMAIN; - else if(normalizeS=="UNIT_AVERAGE_OVER_FULL_DOMAIN") - normalization = EN_UNIT_AVERAGE_OVER_FULL_DOMAIN; - else - { - invalidXMLFileStructure(logger,"'s `normalization` is unrecognized: "+ normalizeS); - normalization = EN_NONE; - } - - return true; - } - else if (_property.name=="flatten") - { - if (_property.type!=SPropertyElementData::Type::FLOAT) - return false; - - flatten = _property.getProperty(); - return true; - } - else - { - invalidXMLFileStructure(logger,"No emission profile can have such property set with name: "+_property.name); - return false; - } + return retval; } bool CElementEmissionProfile::processChildData(IElement* _child, const std::string& name) diff --git a/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp b/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp index de5bdf2255..ef7ebce9f8 100644 --- a/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp @@ -11,48 +11,27 @@ namespace nbl::ext::MitsubaLoader { - -bool CElementRFilter::addProperty(SNamedPropertyElement&& _property, system::logger_opt_ptr logger) + +auto CElementRFilter::compAddPropertyMap() -> AddPropertyMap { - if (_property.type == SNamedPropertyElement::Type::INTEGER) - { - if (core::strcmpi(_property.name,std::string("lobes"))) - { - invalidXMLFileStructure(logger,"\"lobes\" must be an integer property"); - return false; - } - lanczos.lobes = _property.ivalue; - return true; - } - else if (_property.type == SNamedPropertyElement::Type::FLOAT) - { - if (core::strcmpi(_property.name,std::string("b"))==0) - { - mitchell.B = _property.fvalue; - return true; - } - else if (core::strcmpi(_property.name,std::string("c"))==0) - { - mitchell.C = _property.fvalue; - return true; - } - else if (core::strcmpi(_property.name,std::string("kappa"))==0) - { - kappa = _property.fvalue; - return true; - } - else if (core::strcmpi(_property.name,std::string("Emin"))==0) - { - Emin = _property.fvalue; - return true; - } - else - invalidXMLFileStructure(logger,"unsupported rfilter property called: "+_property.name); - } - else - invalidXMLFileStructure(logger,"this reconstruction filter type does not take this parameter type for parameter: " + _property.name); + using this_t = CElementRFilter; + AddPropertyMap retval; + + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(sigma,FLOAT,std::is_same,Gaussian); + + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(b,FLOAT,std::is_same,MitchellNetravali); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(c,FLOAT,std::is_same,MitchellNetravali); + + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(b,FLOAT,std::is_same,CatmullRom); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(c,FLOAT,std::is_same,CatmullRom); + + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(lobes,INTEGER,std::is_same,LanczosSinc); + + // common + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(kappa,FLOAT); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(Emin,FLOAT); - return false; + return retval; } bool CElementRFilter::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) From 3d8358ad44e939979b81ae2aed032d43c42c3ad8 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 22 Dec 2025 00:44:01 +0100 Subject: [PATCH 299/472] didn't notice that can't register two callbacks for same property name and type --- .../nbl/ext/MitsubaLoader/PropertyElement.h | 3 +- .../ext/MitsubaLoader/CElementIntegrator.cpp | 64 ++++++++++++++----- src/nbl/ext/MitsubaLoader/CElementRFilter.cpp | 7 +- src/nbl/ext/MitsubaLoader/ElementMacros.h | 6 +- 4 files changed, 53 insertions(+), 27 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/PropertyElement.h b/include/nbl/ext/MitsubaLoader/PropertyElement.h index 320e12712e..0d8450ff62 100644 --- a/include/nbl/ext/MitsubaLoader/PropertyElement.h +++ b/include/nbl/ext/MitsubaLoader/PropertyElement.h @@ -15,8 +15,7 @@ namespace nbl::ext::MitsubaLoader inline void invalidXMLFileStructure(system::logger_opt_ptr logger, const std::string& errorMessage) { // TODO: print the line in the XML or something - std::string message = "Mitsuba loader error - Invalid .xml file structure: \'" + errorMessage + '\''; - logger.log(message,system::ILogger::E_LOG_LEVEL::ELL_ERROR); + logger.log("Mitsuba loader error - Invalid .xml file structure: \'%s\'",system::ILogger::E_LOG_LEVEL::ELL_ERROR,errorMessage.c_str()); _NBL_DEBUG_BREAK_IF(true); } diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index c487ab3933..fa7e89759d 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -13,30 +13,60 @@ namespace nbl::ext::MitsubaLoader { +namespace impl +{ +template +struct has_strictNormals +{ + constexpr static bool value = std::is_same_v || + std::is_base_of_v; +}; +template +struct has_granularity +{ + constexpr static bool value = std::is_base_of_v || + std::is_same_v; +}; +template +struct has_directSamples +{ + constexpr static bool value = std::is_same_v || + std::is_base_of_v || + std::is_same_v; +}; +} + auto CElementIntegrator::compAddPropertyMap() -> AddPropertyMap { using this_t = CElementIntegrator; AddPropertyMap retval; + // common + // this one has really funny legacy behaviour which Mitsuba allowed contrary to its PDF docs + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("shadingSamples",INTEGER,is_any_of,AmbientOcclusion,DirectIllumination) + { + if (_this->type == Type::AO) + _this->ao.shadingSamples = _property.ivalue; + else + _this->direct.emitterSamples = _this->direct.bsdfSamples = _property.ivalue; + return true; + } + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(strictNormals,BOOLEAN,impl::has_strictNormals); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(granularity,INTEGER,impl::has_granularity); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(directSamples,INTEGER,impl::has_directSamples); + // ambient - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shadingSamples,INTEGER,std::is_same,AmbientOcclusion); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(rayLength,FLOAT,std::is_same,AmbientOcclusion); // emitter hideables NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(hideEmitters,BOOLEAN,derived_from,DirectIllumination); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(hideEnvironment,BOOLEAN,derived_from,DirectIllumination); - - // this one has really funny legacy behaviour which Mitsuba allowed contrary to its PDF docs - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("shadingSamples",INTEGER,std::is_same,DirectIllumination) - { - _this->direct.emitterSamples = _this->direct.bsdfSamples = _property.ivalue; - return true; - } - ); + // direct NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(emitterSamples,INTEGER,std::is_same,DirectIllumination); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(bsdfSamples,INTEGER,std::is_same,DirectIllumination); - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(strictNormals,BOOLEAN,std::is_same,DirectIllumination); + // COMMON: strictNormals // monte carlo base // Not using `NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED` because members have different names than XML names @@ -54,23 +84,23 @@ auto CElementIntegrator::compAddPropertyMap() -> AddPropertyMap AddPropertyMap AddPropertyMap AddPropertyMap NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(sigma,FLOAT,std::is_same,Gaussian); - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(b,FLOAT,std::is_same,MitchellNetravali); - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(c,FLOAT,std::is_same,MitchellNetravali); - - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(b,FLOAT,std::is_same,CatmullRom); - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(c,FLOAT,std::is_same,CatmullRom); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(b,FLOAT,is_any_of,MitchellNetravali,CatmullRom); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(c,FLOAT,is_any_of,MitchellNetravali,CatmullRom); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(lobes,INTEGER,std::is_same,LanczosSinc); diff --git a/src/nbl/ext/MitsubaLoader/ElementMacros.h b/src/nbl/ext/MitsubaLoader/ElementMacros.h index b214882ff1..eea6e97004 100644 --- a/src/nbl/ext/MitsubaLoader/ElementMacros.h +++ b/src/nbl/ext/MitsubaLoader/ElementMacros.h @@ -18,14 +18,14 @@ // Similar to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY` but for `this_t` which declare `variant_list_t` (list of union types) // this adds a compile-time filter against the constraint, such that only variant types matching the constraint are visited. // Useful when multiple variants derive from the same base struct, or have the same member. -#define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) retval.template registerCallback( \ +#define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) retval.template registerCallback( \ SNamedPropertyElement::Type::PROP_TYPE,NAME,[](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool // This it to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED` what `NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY` is to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY` // So basically you know the member is the same across the constraint filtered types -#define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(#NAME,PROP_TYPE,CONSTRAINT,__VA_ARGS__) {\ +#define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(#NAME,PROP_TYPE,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) {\ _this->visit([&_property](auto& state)->void{ \ - if constexpr (CONSTRAINT,__VA_ARGS__>::value) \ + if constexpr (CONSTRAINT __VA_OPT__(,) __VA_ARGS__>::value) \ state. ## NAME = _property.getProperty(); \ }); return true;}) From cdd362b820e34f769706ef8c8479dd4d1e25b50b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 22 Dec 2025 11:06:52 +0700 Subject: [PATCH 300/472] some fixes to using/filling streaming buffer --- include/nbl/ext/DebugDraw/CDrawAABB.h | 89 ++++++++++++++++++++------- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 14 ----- 2 files changed, 67 insertions(+), 36 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 99f0fa9223..9c1f24f2fa 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -45,8 +45,8 @@ namespace nbl::ext::debug_draw video::IQueue* transfer = nullptr; // only used to make the 24 element index buffer and instanced pipeline on create core::smart_refctd_ptr assetManager = nullptr; - core::smart_refctd_ptr singlePipelineLayout; - core::smart_refctd_ptr batchPipelineLayout; + core::smart_refctd_ptr singlePipelineLayout = nullptr; + core::smart_refctd_ptr batchPipelineLayout = nullptr; core::smart_refctd_ptr renderpass = nullptr; inline bool validate() const @@ -77,7 +77,7 @@ namespace nbl::ext::debug_draw struct DrawParameters { video::IGPUCommandBuffer* commandBuffer = nullptr; - hlsl::float32_t4x4 cameraMat = hlsl::float32_t4x4(1); + hlsl::float32_t4x4 cameraMat; float lineWidth = 1.f; }; @@ -102,20 +102,23 @@ namespace nbl::ext::debug_draw // user has to set span of filled-in InstanceData; camera matrix used in push constant inline bool render(const DrawParameters& params, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances) { + system::logger_opt_ptr logger = m_cachedCreationParams.utilities->getLogger(); if (!(m_cachedCreationParams.drawMode & ADM_DRAW_BATCH)) { - m_cachedCreationParams.utilities->getLogger()->log("DrawAABB has not been enabled for draw batches!", system::ILogger::ELL_ERROR); + logger.log("DrawAABB has not been enabled for draw batches!", system::ILogger::ELL_ERROR); return false; } using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; - constexpr auto MdiSizes = std::to_array({ sizeof(hlsl::float32_t3), sizeof(InstanceData) }); - // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all - constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); + constexpr offset_t MaxAlignment = sizeof(InstanceData); // allocator initialization needs us to round up to PoT const auto MaxPOTAlignment = hlsl::roundUpToPoT(MaxAlignment); - auto* streaming = m_cachedCreationParams.streamingBuffer.get(); + if (streaming->getAddressAllocator().max_alignment() < MaxPOTAlignment) + { + logger.log("Draw AABB Streaming Buffer cannot guarantee the alignments we require!"); + return false; + } auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); assert(streamingPtr); @@ -126,37 +129,68 @@ namespace nbl::ext::debug_draw asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); + auto srcIt = aabbInstances.begin(); auto setInstancesRange = [&](InstanceData* data, uint32_t count) -> void { for (uint32_t i = 0; i < count; i++) { auto inst = data + i; - *inst = aabbInstances[i]; + *inst = *srcIt; inst->transform = hlsl::mul(params.cameraMat, inst->transform); + srcIt++; + + if (srcIt == aabbInstances.end()) + break; } - }; + }; const uint32_t numInstances = aabbInstances.size(); - const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); + const uint32_t instancesPerIter = streaming->max_size() / sizeof(InstanceData); if (numInstances > instancesPerIter) return false; using suballocator_t = core::LinearAddressAllocatorST; - uint32_t beginOffset = 0; - while (beginOffset < numInstances) + uint32_t blockOffset = 0u; + while (srcIt != aabbInstances.end()) { - const uint32_t instanceCount = hlsl::min(instancesPerIter, numInstances); - offset_t inputOffset = 0u; + uint32_t instanceCount = hlsl::min(instancesPerIter, numInstances); + offset_t inputOffset = blockOffset; offset_t ImaginarySizeUpperBound = 0x1 << 30; - suballocator_t imaginaryChunk(nullptr, inputOffset, 0, hlsl::roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); + suballocator_t imaginaryChunk(nullptr, inputOffset, 0, MaxPOTAlignment, ImaginarySizeUpperBound); uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); const uint32_t totalSize = imaginaryChunk.get_allocated_size(); + + uint32_t blockSize; + bool allocated = false; + for (uint32_t t = 0; t < 2; t++) + { + blockSize = hlsl::max(streaming->max_size(), totalSize); + while (blockSize >= totalSize) + { + inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; + std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); + if (streaming->multi_allocate(waitTill, 1, &inputOffset, &blockSize, &MaxAlignment) == 0u) + { + allocated = true; + break; + } + + streaming->cull_frees(); + blockSize >>= 1; + } - inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; - std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); - streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); + if (allocated) + break; + } + if (!allocated) + { + logger.log("Failed to allocate even the smallest chunk from streaming buffer for the next drawcall batch.", system::ILogger::ELL_ERROR); + return false; + } + + instanceCount = blockSize / sizeof(InstanceData); + blockOffset += blockSize; auto* const streamingInstancesPtr = reinterpret_cast(streamingPtr + instancesByteOffset); setInstancesRange(streamingInstancesPtr, instanceCount); - beginOffset += instanceCount; assert(!streaming->needsManualFlushOrInvalidate()); @@ -166,13 +200,24 @@ namespace nbl::ext::debug_draw commandBuffer->pushConstants(m_batchPipeline->getLayout(), asset::IShader::E_SHADER_STAGE::ESS_VERTEX, 0, sizeof(SPushConstants), &pc); commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); - streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); + streaming->multi_deallocate(1, &inputOffset, &blockSize, waitInfo); } return true; } - static hlsl::float32_t3x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); + static inline hlsl::float32_t3x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) + { + const auto diagonal = aabb.getExtent(); + hlsl::float32_t3x4 transform; + transform[0][3] = aabb.minVx.x; + transform[1][3] = aabb.minVx.y; + transform[2][3] = aabb.minVx.z; + transform[0][0] = diagonal.x; + transform[1][1] = diagonal.y; + transform[2][2] = diagonal.z; + return transform; + } protected: struct ConstructorParams diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index c32c591410..641811fe0f 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -226,7 +226,6 @@ bool DrawAABB::createStreamingBuffer(SCreationParameters& params) const auto validation = std::to_array ({ std::make_pair(buffer->getCreationParams().usage.hasFlags(SCachedCreationParameters::RequiredUsageFlags), "Streaming buffer must be created with IBuffer::EUF_STORAGE_BUFFER_BIT | IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT enabled!"), - std::make_pair(bool(buffer->getMemoryReqs().memoryTypeBits & params.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits()), "Streaming buffer must have up-streaming memory type bits enabled!"), std::make_pair(binding.memory->getAllocateFlags().hasFlags(SCachedCreationParameters::RequiredAllocateFlags), "Streaming buffer's memory must be allocated with IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT enabled!"), std::make_pair(binding.memory->isCurrentlyMapped(), "Streaming buffer's memory must be mapped!"), // streaming buffer contructor already validates it, but cannot assume user won't unmap its own buffer for some reason (sorry if you have just hit it) std::make_pair(binding.memory->getCurrentMappingAccess().hasFlags(getRequiredAccessFlags(binding.memory->getMemoryPropertyFlags())), "Streaming buffer's memory current mapping access flags don't meet requirements!") @@ -386,17 +385,4 @@ bool DrawAABB::renderSingle(const DrawParameters& params, const hlsl::shapes::AA return true; } -hlsl::float32_t3x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) -{ - const auto diagonal = aabb.getExtent(); - hlsl::float32_t3x4 transform; - transform[0][3] = aabb.minVx.x; - transform[1][3] = aabb.minVx.y; - transform[2][3] = aabb.minVx.z; - transform[0][0] = diagonal.x; - transform[1][1] = diagonal.y; - transform[2][2] = diagonal.z; - return transform; -} - } From 4ae7d8940b22e5e4057dc474c50d72d79d74b386 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 22 Dec 2025 15:10:27 +0700 Subject: [PATCH 301/472] combined draw aabb shaders into unified, added precompile shaders to spv for extension --- include/nbl/ext/DebugDraw/CDrawAABB.h | 6 +- .../builtin/hlsl/aabb_instances.fragment.hlsl | 13 --- .../builtin/hlsl/aabb_instances.vertex.hlsl | 21 ---- .../ext/DebugDraw/builtin/hlsl/common.hlsl | 10 +- .../builtin/hlsl/draw_aabb.unified.hlsl | 39 +++++++ .../DebugDraw/builtin/hlsl/single.vertex.hlsl | 20 ---- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 100 +++++++----------- src/nbl/ext/DebugDraw/CMakeLists.txt | 63 ++++++++--- 8 files changed, 139 insertions(+), 133 deletions(-) delete mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl delete mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl create mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/draw_aabb.unified.hlsl delete mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 9c1f24f2fa..5b42ac25ba 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -194,10 +194,10 @@ namespace nbl::ext::debug_draw assert(!streaming->needsManualFlushOrInvalidate()); - SPushConstants pc; + SInstancedPC pc; pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; - commandBuffer->pushConstants(m_batchPipeline->getLayout(), asset::IShader::E_SHADER_STAGE::ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + commandBuffer->pushConstants(m_batchPipeline->getLayout(), asset::IShader::E_SHADER_STAGE::ESS_VERTEX, offsetof(ext::debug_draw::PushConstants, ipc), sizeof(SInstancedPC), &pc); commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); streaming->multi_deallocate(1, &inputOffset, &blockSize, waitInfo); @@ -237,7 +237,7 @@ namespace nbl::ext::debug_draw ~DrawAABB() override {} private: - static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); + static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const DrawMode mode); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl deleted file mode 100644 index 09a12f3d07..0000000000 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl +++ /dev/null @@ -1,13 +0,0 @@ -#pragma shader_stage(fragment) - -#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" - -using namespace nbl::ext::debug_draw; - -[shader("pixel")] -float32_t4 main(PSInput input) : SV_TARGET -{ - float32_t4 outColor = input.color; - - return outColor; -} \ No newline at end of file diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl deleted file mode 100644 index 451243bbcc..0000000000 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ /dev/null @@ -1,21 +0,0 @@ -#pragma shader_stage(vertex) - -#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" - -using namespace nbl::hlsl; -using namespace nbl::ext::debug_draw; - -[[vk::push_constant]] SPushConstants pc; - -[shader("vertex")] -PSInput main() -{ - PSInput output; - const float32_t3 vertex = getUnitAABBVertex(); - InstanceData instance = vk::BufferPointer(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); - - output.position = math::linalg::promoted_mul(instance.transform, vertex); - output.color = instance.color; - - return output; -} \ No newline at end of file diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index ac5deaef5f..b665c9d43a 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -21,16 +21,22 @@ struct InstanceData hlsl::float32_t4 color; }; -struct SSinglePushConstants +struct SSinglePC { InstanceData instance; }; -struct SPushConstants +struct SInstancedPC { uint64_t pInstanceBuffer; }; +struct PushConstants +{ + SSinglePC spc; + SInstancedPC ipc; +}; + #ifdef __HLSL_VERSION struct PSInput { diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/draw_aabb.unified.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/draw_aabb.unified.hlsl new file mode 100644 index 0000000000..0b51f7de53 --- /dev/null +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/draw_aabb.unified.hlsl @@ -0,0 +1,39 @@ +#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" + +using namespace nbl::hlsl; +using namespace nbl::ext::debug_draw; + +[[vk::push_constant]] PushConstants pc; + +[shader("vertex")] +PSInput aabb_vertex_single() +{ + PSInput output; + float32_t3 vertex = getUnitAABBVertex(); + + output.position = math::linalg::promoted_mul(pc.spc.instance.transform, vertex); + output.color = pc.spc.instance.color; + + return output; +} + +[shader("vertex")] +PSInput aabb_vertex_instances() +{ + PSInput output; + const float32_t3 vertex = getUnitAABBVertex(); + InstanceData instance = vk::BufferPointer(pc.ipc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); + + output.position = math::linalg::promoted_mul(instance.transform, vertex); + output.color = instance.color; + + return output; +} + +[shader("pixel")] +float32_t4 aabb_fragment(PSInput input) : SV_TARGET +{ + float32_t4 outColor = input.color; + + return outColor; +} diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl deleted file mode 100644 index 5b4f2a39a7..0000000000 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -#pragma shader_stage(vertex) - -#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" - -using namespace nbl::hlsl; -using namespace nbl::ext::debug_draw; - -[[vk::push_constant]] SSinglePushConstants pc; - -[shader("vertex")] -PSInput main() -{ - PSInput output; - float32_t3 vertex = getUnitAABBVertex(); - - output.position = math::linalg::promoted_mul(pc.instance.transform, vertex); - output.color = pc.instance.color; - - return output; -} \ No newline at end of file diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 641811fe0f..49a41d2aa7 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -8,6 +8,8 @@ #include "nbl/ext/debug_draw/builtin/CArchive.h" #endif +#include "nbl/ext/DebugDraw/builtin/build/spirv/keys.hpp" + using namespace nbl; using namespace core; using namespace video; @@ -35,7 +37,7 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) auto pipelineLayout = params.singlePipelineLayout; if (!pipelineLayout) pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_SINGLE); - constructorParams.singlePipeline = createPipeline(params, pipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); + constructorParams.singlePipeline = createPipeline(params, pipelineLayout.get(), ADM_DRAW_SINGLE); if (!constructorParams.singlePipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); @@ -48,7 +50,7 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) auto pipelineLayout = params.batchPipelineLayout; if (!pipelineLayout) pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_BATCH); - constructorParams.batchPipeline = createPipeline(params, pipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); + constructorParams.batchPipeline = createPipeline(params, pipelineLayout.get(), ADM_DRAW_BATCH); if (!constructorParams.batchPipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); @@ -74,7 +76,7 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) } // note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly -constexpr std::string_view NBL_ARCHIVE_ENTRY = _ARCHIVE_ENTRY_KEY_; +constexpr std::string_view NBL_ARCHIVE_ENTRY = _ARCHIVE_ABSOLUTE_SPV_PATH_; const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const std::string_view archiveAlias) { @@ -85,75 +87,54 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l if (system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) { - logger->log("CDrawAABB directory is already mounted!", ILogger::ELL_WARNING); + logger->log("CDrawAABB .spv directory is already mounted!", ILogger::ELL_WARNING); return nullptr; } // extension should mount everything for you, regardless if content goes from virtual filesystem // or disk directly - and you should never rely on application framework to expose extension data -#ifdef NBL_EMBED_BUILTIN_RESOURCES - auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); - system->mount(smart_refctd_ptr(archive), archiveAlias.data()); -#else - auto NBL_EXTENSION_MOUNT_DIRECTORY_ENTRY = (path(_ARCHIVE_ABSOLUTE_ENTRY_PATH_) / NBL_ARCHIVE_ENTRY).make_preferred(); - auto archive = make_smart_refctd_ptr(std::move(NBL_EXTENSION_MOUNT_DIRECTORY_ENTRY), smart_refctd_ptr(logger), system); + auto archive = make_smart_refctd_ptr(std::move(NBL_ARCHIVE_ENTRY), smart_refctd_ptr(logger), system); system->mount(smart_refctd_ptr(archive), archiveAlias.data()); -#endif return smart_refctd_ptr(archive); } -smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath) +smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, DrawMode mode) { + system::logger_opt_ptr logger = params.utilities->getLogger(); auto system = smart_refctd_ptr(params.assetManager->getSystem()); - auto* set = params.assetManager->getCompilerSet(); - auto compiler = set->getShaderCompiler(IShader::E_CONTENT_TYPE::ECT_HLSL); - auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(system)); - auto includeLoader = includeFinder->getDefaultFileSystemLoader(); - includeFinder->addSearchPath(NBL_ARCHIVE_ENTRY.data(), includeLoader); - auto compileShader = [&](const std::string& filePath, IShader::E_SHADER_STAGE stage) -> smart_refctd_ptr + if (!system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); + + auto getShader = [&](const core::string& key)->smart_refctd_ptr { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = params.utilities->getLogger(); + lp.workingDirectory = _ARCHIVE_ABSOLUTE_SPV_PATH_; + auto bundle = params.assetManager->getAsset(key.c_str(), lp); + + const auto contents = bundle.getContents(); + + if (contents.empty()) { - IAssetLoader::SAssetLoadParams lparams = {}; - lparams.logger = params.utilities->getLogger(); - lparams.workingDirectory = NBL_ARCHIVE_ENTRY.data(); - auto bundle = params.assetManager->getAsset(filePath, lparams); - if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) - { - params.utilities->getLogger()->log("Shader %s not found!", ILogger::ELL_ERROR, filePath.c_str()); - exit(-1); - } - - const auto assets = bundle.getContents(); - assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - if (!shaderSrc) - return nullptr; - - CHLSLCompiler::SOptions options = {}; - options.stage = stage; - options.preprocessorOptions.sourceIdentifier = filePath; - options.preprocessorOptions.logger = params.utilities->getLogger(); - options.preprocessorOptions.includeFinder = includeFinder.get(); - shaderSrc = compiler->compileToSPIRV((const char*)shaderSrc->getContent()->getPointer(), options); - - return params.utilities->getLogicalDevice()->compileShader({ shaderSrc.get() }); - }; + logger.log("Failed to load shader %s from disk", ILogger::ELL_ERROR, key.c_str()); + return nullptr; + } - if (!system->areBuiltinsMounted()) - { - params.utilities->getLogger()->log("Nabla builtins are not mounted!", ILogger::ELL_ERROR); - return nullptr; - } + if (bundle.getAssetType() != IAsset::ET_SHADER) + { + logger.log("Loaded asset has wrong type!", ILogger::ELL_ERROR); + return nullptr; + } - if (!system->exists(path(NBL_ARCHIVE_ENTRY) / "common.hlsl", {})) - mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); + return IAsset::castDown(contents[0]); + }; - auto vertexShader = compileShader(vsPath, IShader::E_SHADER_STAGE::ESS_VERTEX); - auto fragmentShader = compileShader(fsPath, IShader::E_SHADER_STAGE::ESS_FRAGMENT); + auto key = nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(params.utilities->getLogicalDevice()); + smart_refctd_ptr unifiedShader = getShader(key); - if (!vertexShader || !fragmentShader) + if (!unifiedShader) { params.utilities->getLogger()->log("Could not compile shaders!", ILogger::ELL_ERROR); return nullptr; @@ -161,8 +142,8 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; pipelineParams[0].layout = pipelineLayout; - pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; - pipelineParams[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; + pipelineParams[0].vertexShader = { .shader = unifiedShader.get(), .entryPoint = (mode & ADM_DRAW_SINGLE) ? "aabb_vertex_single" : "aabb_vertex_instances" }; + pipelineParams[0].fragmentShader = { .shader = unifiedShader.get(), .entryPoint = "aabb_fragment" }; pipelineParams[0].cached = { .primitiveAssembly = { .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, @@ -351,10 +332,11 @@ core::smart_refctd_ptr DrawAABB::createPipelineLayout core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, DrawMode mode) { - const uint32_t pcSize = (mode & ADM_DRAW_BATCH) ? sizeof(SPushConstants) : sizeof(SSinglePushConstants); + const uint32_t offset = (mode & ADM_DRAW_BATCH) ? offsetof(ext::debug_draw::PushConstants, ipc) : offsetof(ext::debug_draw::PushConstants, spc); + const uint32_t pcSize = (mode & ADM_DRAW_BATCH) ? sizeof(SInstancedPC) : sizeof(SSinglePC); SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, - .offset = 0, + .offset = offset, .size = pcSize }; return createPipelineLayoutFromPCRange(device, pcRange); @@ -374,12 +356,12 @@ bool DrawAABB::renderSingle(const DrawParameters& params, const hlsl::shapes::AA asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); - SSinglePushConstants pc; + SSinglePC pc; hlsl::float32_t3x4 instanceTransform = getTransformFromAABB(aabb); pc.instance.transform = math::linalg::promoted_mul(params.cameraMat, instanceTransform); pc.instance.color = color; - commandBuffer->pushConstants(m_singlePipeline->getLayout(), ESS_VERTEX, 0, sizeof(SSinglePushConstants), &pc); + commandBuffer->pushConstants(m_singlePipeline->getLayout(), ESS_VERTEX, offsetof(ext::debug_draw::PushConstants, spc), sizeof(SSinglePC), &pc); commandBuffer->drawIndexed(IndicesCount, 1, 0, 0, 0); return true; diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 7a89caca0d..2eb05b739b 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -19,25 +19,58 @@ nbl_create_ext_library_project( "" ) -# this should be standard for all extensions -set(_ARCHIVE_ENTRY_KEY_ "nbl/ext/DebugDraw/builtin/hlsl") # then each one has unique archive key get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) -get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) -get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) -target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ABSOLUTE_ENTRY_PATH_="${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}") -target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ENTRY_KEY_="${_ARCHIVE_ENTRY_KEY_}") +set(NBL_DEBUG_DRAW_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/DebugDraw/builtin/hlsl") +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/common.hlsl + ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/single.vertex.hlsl + ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/aabb_instances.vertex.hlsl + ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/aabb_instances.fragment.hlsl +) +target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ABSOLUTE_SPV_PATH_="${OUTPUT_DIRECTORY}") -if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ extDebugDrawbuiltinResourceData) +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/draw_aabb.unified.hlsl", + "KEY": "draw_aabb", + } + +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "common.hlsl") - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "single.vertex.hlsl") # (*) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.vertex.hlsl") # (*) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.fragment.hlsl") # (*) +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${LIB_NAME}SPIRV + LINK_TO ${LIB_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_DEBUG_DRAW_HLSL_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/ext/DebugDraw/builtin/build/spirv/keys.hpp + NAMESPACE nbl::ext::debug_draw::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::ext::debug_draw::builtin::build + TARGET ${LIB_NAME}_builtinsBuild + LINK_TO ${LIB_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}" "${_ARCHIVE_ENTRY_KEY_}" "nbl::ext::debug_draw::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") - LINK_BUILTIN_RESOURCES_TO_TARGET(${LIB_NAME} ${_BR_TARGET_}) -endif() add_library(Nabla::ext::DebugDraw ALIAS ${LIB_NAME}) From 9d8c6e444575d57bbda53a3bfacb11f168b94309 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 22 Dec 2025 11:34:59 +0100 Subject: [PATCH 302/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 5acd059641..2d59279740 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5acd05964180897127d63d68b3db504ea8e46cc2 +Subproject commit 2d5927974073dd3ada6a0a52134355d8022876a3 From 7c0325544ea4f417b32e5ade092a7de3ab9ac659 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 22 Dec 2025 12:26:07 +0100 Subject: [PATCH 303/472] fix transform handling and realize unit test cases can be wrong too --- examples_tests | 2 +- include/nbl/ext/MitsubaLoader/CElementTransform.h | 4 ++-- include/nbl/ext/MitsubaLoader/ParserUtil.h | 1 + src/nbl/ext/MitsubaLoader/CElementTransform.cpp | 2 +- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 8 +++++--- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/examples_tests b/examples_tests index bfcff8a686..d20b9c67bf 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit bfcff8a686409dd7c0d55607bb8cb6bcc0e0b80a +Subproject commit d20b9c67bf5ef5c4d782a13709a78ce59b24e1e4 diff --git a/include/nbl/ext/MitsubaLoader/CElementTransform.h b/include/nbl/ext/MitsubaLoader/CElementTransform.h index 45612174ad..de7bcd51e7 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTransform.h +++ b/include/nbl/ext/MitsubaLoader/CElementTransform.h @@ -16,7 +16,7 @@ class CElementTransform final : public IElement public: static AddPropertyMap compAddPropertyMap(); - inline CElementTransform() : IElement(""), matrix() {} + inline CElementTransform() : IElement(""), matrix(1.f) {} inline ~CElementTransform() {} inline bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override {return true;} @@ -33,7 +33,7 @@ class CElementTransform final : public IElement } */ - hlsl::float32_t4x4 matrix; + hlsl::float32_t4x4 matrix; // TODO: HLSL diagonal(1.f) }; } diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index 5c2b3efbef..1d6bd55715 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -87,6 +87,7 @@ class ParserManager final CElementRFilter, CElementSampler, /// CElementShape, + CElementTransform, /// CElementBSDF, /// CElementTexture, /// CElementEmitter, diff --git a/src/nbl/ext/MitsubaLoader/CElementTransform.cpp b/src/nbl/ext/MitsubaLoader/CElementTransform.cpp index cb583c211c..3097022a1a 100644 --- a/src/nbl/ext/MitsubaLoader/CElementTransform.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementTransform.cpp @@ -15,7 +15,7 @@ auto CElementTransform::compAddPropertyMap() -> AddPropertyMapbool { - _this->matrix = _property.mvalue; + _this->matrix = mul(_property.mvalue,_this->matrix); return true; }; for (const auto& type : { diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 0fe612e2ec..ab3d82901f 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -225,7 +225,7 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts nameIt = typeMap.find(""); if (nameIt==typeMap.end()) { - session->invalidXMLFileStructure("There's no Property named (TODO) of Type (TODO) supported by ElementType (TODO)"); + session->invalidXMLFileStructure("There's no Property named \""+property.name+"\" of Type (TODO) supported by ElementType (TODO)"); return; } const auto& callback = nameIt->second; @@ -233,7 +233,7 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts if constexpr (!std::is_same_v) if (std::find(callback.allowedVariantTypes.begin(),callback.allowedVariantTypes.end(),typedElement->type)==callback.allowedVariantTypes.end()) { - session->invalidXMLFileStructure("There's no Property named (TODO) of Type (TODO) not supported on ElementType (TODO) of Variant (TODO)"); + session->invalidXMLFileStructure("There's no Property named \""+property.name+"\" of Type(TODO) not supported on ElementType(TODO) of Variant(TODO)"); return; } callback(typedElement,std::move(property),session->params->logger); @@ -246,7 +246,8 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts ); if (unsupportedElement) { - session->invalidXMLFileStructure("Current Element Type doesn't have a AddPropertyMap at all (no property adding supported)!"); + const core::string typeName; // TODO = system::to_string(element->getType()); + session->invalidXMLFileStructure("Current Element Type "+typeName+" doesn't have a AddPropertyMap at all (no property adding supported)!"); return; } return; @@ -431,6 +432,7 @@ ParserManager::ParserManager() : propertyElements({ CElementFilm::compAddPropertyMap(), CElementRFilter::compAddPropertyMap(), CElementSampler::compAddPropertyMap(), + CElementTransform::compAddPropertyMap(), CElementEmissionProfile::compAddPropertyMap() }) { } From 70eb689601a6c4a57f4a68ec3354670ef795ac16 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 22 Dec 2025 14:14:41 +0100 Subject: [PATCH 304/472] can't believe I spent an hour on this --- include/nbl/ext/MitsubaLoader/CElementRFilter.h | 8 ++++---- src/nbl/ext/MitsubaLoader/CElementRFilter.cpp | 4 ++-- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementRFilter.h b/include/nbl/ext/MitsubaLoader/CElementRFilter.h index a84fbbbfec..4d4be06424 100644 --- a/include/nbl/ext/MitsubaLoader/CElementRFilter.h +++ b/include/nbl/ext/MitsubaLoader/CElementRFilter.h @@ -89,16 +89,16 @@ class CElementRFilter final : public IElement case Type::TENT: break; case Type::GAUSSIAN: - visit(gaussian); + visitor(gaussian); break; case Type::MITCHELL: - visit(mitchell); + visitor(mitchell); break; case Type::CATMULLROM: - visit(catmullrom); + visitor(catmullrom); break; case Type::LANCZOS: - visit(lanczos); + visitor(lanczos); break; default: break; diff --git a/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp b/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp index a4586c8309..9bd4b864df 100644 --- a/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementRFilter.cpp @@ -19,8 +19,8 @@ auto CElementRFilter::compAddPropertyMap() -> AddPropertyMap NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(sigma,FLOAT,std::is_same,Gaussian); - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(b,FLOAT,is_any_of,MitchellNetravali,CatmullRom); - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(c,FLOAT,is_any_of,MitchellNetravali,CatmullRom); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(B,FLOAT,is_any_of,MitchellNetravali,CatmullRom); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(C,FLOAT,is_any_of,MitchellNetravali,CatmullRom); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(lobes,INTEGER,std::is_same,LanczosSinc); diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index ab3d82901f..9646fca073 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -231,7 +231,7 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts const auto& callback = nameIt->second; auto* typedElement = static_cast(element); if constexpr (!std::is_same_v) - if (std::find(callback.allowedVariantTypes.begin(),callback.allowedVariantTypes.end(),typedElement->type)==callback.allowedVariantTypes.end()) + if (!callback.allowedVariantTypes.empty() && std::find(callback.allowedVariantTypes.begin(),callback.allowedVariantTypes.end(),typedElement->type)==callback.allowedVariantTypes.end()) { session->invalidXMLFileStructure("There's no Property named \""+property.name+"\" of Type(TODO) not supported on ElementType(TODO) of Variant(TODO)"); return; From 0e38e803546699c40eafd744e3bc49345ad7ad3f Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 22 Dec 2025 17:02:11 +0100 Subject: [PATCH 305/472] fix lookat matrices --- src/nbl/ext/MitsubaLoader/PropertyElement.cpp | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/nbl/ext/MitsubaLoader/PropertyElement.cpp b/src/nbl/ext/MitsubaLoader/PropertyElement.cpp index d6144b6919..1beff22eb7 100644 --- a/src/nbl/ext/MitsubaLoader/PropertyElement.cpp +++ b/src/nbl/ext/MitsubaLoader/PropertyElement.cpp @@ -202,19 +202,20 @@ std::optional CPropertyElementManager::createPropertyData invalidXMLFileStructure(logger,"Invalid element, name:\'"+result.name+"\' Axis can't be (0,0,0)"); return {}; } + // TODO: quaternion after the rework using namespace nbl::hlsl::math;//::linalg; result.mvalue = linalg::promote_affine<4,4>(linalg::rotation_mat(hlsl::radians(atof(desiredAttributes[0])),axis)); } break; case SPropertyElementData::Type::SCALE: result.mvalue = hlsl::float32_t4x4(1.f); - if (desiredAttributes[0]) + if (desiredAttributes[0]) // you either get this one attribute { const float uniformScale = atof(desiredAttributes[0]); for (auto i=0u; i<3u; i++) result.mvalue[i][i] = uniformScale; } - else + else // or x,y,z { for (auto i=0u; i<3u; i++) if (desiredAttributes[i+1u]) @@ -255,9 +256,16 @@ std::optional CPropertyElementManager::createPropertyData } up[index] = 1.f; } + // TODO: after the rm-core matrix PR we need to get rid of the tranpose (I transpose only because of GLM and HLSL mixup) + const auto lookAtGLM = reinterpret_cast(glm::lookAtLH(origin,target,up)); + const auto lookAt = hlsl::transpose(lookAtGLM); // mitsuba understands look-at and right-handed camera little bit differently than I do - const auto actualLookAt = reinterpret_cast(glm::lookAtLH(origin,target,up)); - result.mvalue = hlsl::inverse(actualLookAt); + const auto rotation = hlsl::inverse(hlsl::float32_t3x3(lookAt)); + // set the origin to avoid numerical issues + for (auto r=0; r<3; r++) + { + result.mvalue[r][3] = origin[r]; + } } break; default: @@ -291,8 +299,8 @@ hlsl::float32_t4x4 CPropertyElementManager::retrieveMatrix(const std::string_vie std::stringstream ss; ss << str; - for (auto r=0u; r<16u; r++) - for (auto c=0u; c<16u; c++) + for (auto r=0u; r<4u; r++) + for (auto c=0u; c<4u; c++) { float f = std::numeric_limits::quiet_NaN(); ss >> f; From 1a17718cb4656d04a16d59d2e7bf1fa75ca2b2d5 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 22 Dec 2025 19:40:12 +0100 Subject: [PATCH 306/472] MAke CElementShape parse, also add logger to processChildData --- include/nbl/ext/MitsubaLoader/CElementBSDF.h | 2 +- .../MitsubaLoader/CElementEmissionProfile.h | 2 +- .../nbl/ext/MitsubaLoader/CElementEmitter.h | 2 +- include/nbl/ext/MitsubaLoader/CElementFilm.h | 8 +- .../ext/MitsubaLoader/CElementIntegrator.h | 12 +- .../nbl/ext/MitsubaLoader/CElementSensor.h | 10 +- include/nbl/ext/MitsubaLoader/CElementShape.h | 196 +++++++------- .../nbl/ext/MitsubaLoader/CElementTexture.h | 2 +- include/nbl/ext/MitsubaLoader/IElement.h | 21 +- include/nbl/ext/MitsubaLoader/ParserUtil.h | 2 +- .../MitsubaLoader/CElementEmissionProfile.cpp | 2 +- src/nbl/ext/MitsubaLoader/CElementFilm.cpp | 23 +- src/nbl/ext/MitsubaLoader/CElementShape.cpp | 253 ++++-------------- src/nbl/ext/MitsubaLoader/CMakeLists.txt | 2 +- src/nbl/ext/MitsubaLoader/ElementMacros.h | 108 -------- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 7 +- 16 files changed, 215 insertions(+), 437 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementBSDF.h b/include/nbl/ext/MitsubaLoader/CElementBSDF.h index 7f5e73c13f..0873c6e41d 100644 --- a/include/nbl/ext/MitsubaLoader/CElementBSDF.h +++ b/include/nbl/ext/MitsubaLoader/CElementBSDF.h @@ -379,7 +379,7 @@ class CElementBSDF : public IElement inline IElement::Type getType() const override { return ElementType; } std::string getLogName() const override { return "bsdf"; } - bool processChildData(IElement* _child, const std::string& name) override; + bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) override; inline bool isMeta() const { diff --git a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h index 86368a5499..ecf24c9f63 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h @@ -44,7 +44,7 @@ struct CElementEmissionProfile final : public IElement inline bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) override {return true;} - bool processChildData(IElement* _child, const std::string& name) override; + bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) override; constexpr static inline auto ElementType = IElement::Type::EMISSION_PROFILE; inline IElement::Type getType() const override { return ElementType; } diff --git a/include/nbl/ext/MitsubaLoader/CElementEmitter.h b/include/nbl/ext/MitsubaLoader/CElementEmitter.h index 5de6861d7f..04833986d1 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmitter.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmitter.h @@ -209,7 +209,7 @@ class CElementEmitter : public IElement inline IElement::Type getType() const override { return ElementType; } std::string getLogName() const override { return "emitter"; } - bool processChildData(IElement* _child, const std::string& name) override + bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) override { if (!_child) return true; diff --git a/include/nbl/ext/MitsubaLoader/CElementFilm.h b/include/nbl/ext/MitsubaLoader/CElementFilm.h index 8cdd607d79..3da02a21b8 100644 --- a/include/nbl/ext/MitsubaLoader/CElementFilm.h +++ b/include/nbl/ext/MitsubaLoader/CElementFilm.h @@ -177,15 +177,21 @@ class CElementFilm final : public IElement inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "film"; } - inline bool processChildData(IElement* _child, const std::string& name) override + inline bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) override { if (!_child) return true; if (_child->getType() != IElement::Type::RFILTER) + { + logger.log("CElementFilm only expects type %d children, is %d instead",system::ILogger::ELL_ERROR,IElement::Type::RFILTER,_child->getType()); return false; + } auto _rfilter = static_cast(_child); if (_rfilter->type == CElementRFilter::Type::INVALID) + { + logger.log("CElementRFilter::Type::INVALID used as child in CElementFilm",system::ILogger::ELL_ERROR); return false; + } rfilter = *_rfilter; return true; } diff --git a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h index 760f87fe5b..b3061ba380 100644 --- a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h +++ b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h @@ -366,7 +366,7 @@ class CElementIntegrator final : public IElement inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "integrator"; } - inline bool processChildData(IElement* _child, const std::string& name) override + inline bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) override { if (!_child) return true; @@ -377,10 +377,11 @@ class CElementIntegrator final : public IElement [[fallthrough]]; case Type::MULTI_CHANNEL: if (_child->getType() != IElement::Type::INTEGRATOR) - return false; - break; + break; + [[fallthrough]]; default: - break; + logger.log("Only IrradianceCache or MultiChannel can only have another nested inside", system::ILogger::ELL_ERROR); + return false; } switch (type) { @@ -396,7 +397,8 @@ class CElementIntegrator final : public IElement return true; } break; - default: + default: // to make compiler shut up + assert(false); break; } return false; diff --git a/include/nbl/ext/MitsubaLoader/CElementSensor.h b/include/nbl/ext/MitsubaLoader/CElementSensor.h index 4ef41e42d5..fecd248ad0 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSensor.h +++ b/include/nbl/ext/MitsubaLoader/CElementSensor.h @@ -207,7 +207,7 @@ class CElementSensor final : public IElement inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "sensor"; } - inline bool processChildData(IElement* _child, const std::string& name) override + inline bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) override { if (!_child) return true; @@ -216,8 +216,11 @@ class CElementSensor final : public IElement case IElement::Type::TRANSFORM: { auto tform = static_cast(_child); - if (name!="toWorld") + if (name != "toWorld") + { + logger.log("The nested inside needs to be named \"toWorld\"",system::ILogger::ELL_ERROR); return false; + } //toWorldType = IElement::Type::TRANSFORM; transform = *tform; return true; @@ -241,7 +244,10 @@ class CElementSensor final : public IElement if (sampler.type!=CElementSampler::Type::INVALID) return true; break; + default: + break; } + logger.log("Only valid nested children inside are: VALID , , and . The is not supported yet.",system::ILogger::ELL_ERROR); return false; } diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index 23018079a1..1ce78a399b 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -18,7 +18,7 @@ namespace nbl::ext::MitsubaLoader class CElementShape final : public IElement { public: - enum Type + enum Type : uint8_t { INVALID, CUBE, @@ -34,51 +34,88 @@ class CElementShape final : public IElement //HAIR, //HEIGHTFIELD }; + static inline core::unordered_map compStringToTypeMap() + { + return { + {"cube", CElementShape::Type::CUBE}, + {"sphere", CElementShape::Type::SPHERE}, + {"cylinder", CElementShape::Type::CYLINDER}, + {"rectangle", CElementShape::Type::RECTANGLE}, + {"disk", CElementShape::Type::DISK}, + {"obj", CElementShape::Type::OBJ}, + {"ply", CElementShape::Type::PLY}, + {"serialized", CElementShape::Type::SERIALIZED}, + {"shapegroup", CElementShape::Type::SHAPEGROUP}, + {"instance", CElementShape::Type::INSTANCE}/*, + {"hair", CElementShape::Type::HAIR}, + {"heightfield", CElementShape::Type::HEIGHTFIELD}*/ + }; + } + struct Base { bool flipNormals = false; }; + struct Cube : Base + { + constexpr static inline Type VariantType = Type::CUBE; + }; struct Sphere : Base { - core::vectorSIMDf center = core::vectorSIMDf(0,0,0); + constexpr static inline Type VariantType = Type::SPHERE; + + hlsl::float32_t3 center = {0,0,0}; float radius = 1.f; }; struct Cylinder : Base { - core::vectorSIMDf p0 = core::vectorSIMDf(0,0,0); - core::vectorSIMDf p1 = core::vectorSIMDf(0,0,1); + constexpr static inline Type VariantType = Type::CYLINDER; + + hlsl::float32_t3 p0 = {0,0,0}; + hlsl::float32_t3 p1 = {0,0,1}; float radius = 1.f; }; struct LoadedFromFileBase : Base { - SPropertyElementData filename; + constexpr static inline uint16_t MaxPathLen = 1024u; + + char filename[MaxPathLen]; //! Use face normals (any per-vertex normals will be discarded) - bool faceNormals = false; - float maxSmoothAngle = NAN; + bool faceNormals = false; + float maxSmoothAngle = NAN; }; struct Obj : LoadedFromFileBase { + constexpr static inline Type VariantType = Type::OBJ; + bool flipTexCoords = true; bool collapse = false; }; struct Ply : LoadedFromFileBase { - bool flipNormals = false; + constexpr static inline Type VariantType = Type::PLY; + bool srgb = true; }; struct Serialized : LoadedFromFileBase { + constexpr static inline Type VariantType = Type::SERIALIZED; + int32_t shapeIndex; - bool flipNormals; }; + // geometries basically struct ShapeGroup { - _NBL_STATIC_INLINE_CONSTEXPR size_t MaxChildCount = 128u; + constexpr static inline Type VariantType = Type::SHAPEGROUP; + constexpr static inline size_t MaxChildCount = 128u; + size_t childCount = 0u; CElementShape* children[MaxChildCount] = { nullptr }; }; struct Instance { + constexpr static inline Type VariantType = Type::INSTANCE; + CElementShape* parent = nullptr; };/* struct Hair : Base @@ -88,17 +125,27 @@ class CElementShape final : public IElement float angleThreshold = 1.f; float reduction = 0.f; }; - struct HeightField + struct HeightField : Base { SPropertyElementData filename; boolean shadingNormals; - boolean flipNormals; int32_t width; int32_t height; float scale; CElementTexture* texture; };*/ + // + using variant_list_t = core::type_list< + Sphere, + Cylinder, + Obj, + Ply, + Serialized, + ShapeGroup, + Instance + >; + // static AddPropertyMap compAddPropertyMap(); inline CElementShape(const char* id) : IElement(id), type(Type::INVALID), /*toWorldType(IElement::Type::TRANSFORM),*/ transform(), bsdf(nullptr), emitter(nullptr) @@ -116,104 +163,69 @@ class CElementShape final : public IElement { } - inline CElementShape& operator=(const CElementShape& other) + template + inline void visit(Visitor&& visitor) { - IElement::operator=(other); - transform = other.transform; - type = other.type; switch (type) { - case Type::CUBE: - cube = other.cube; + case CElementShape::Type::CUBE: + visitor(cube); break; - case Type::SPHERE: - sphere = other.sphere; + case CElementShape::Type::SPHERE: + visitor(sphere); break; - case Type::CYLINDER: - cylinder = other.cylinder; + case CElementShape::Type::CYLINDER: + visitor(cylinder); break; - case Type::RECTANGLE: - rectangle = other.rectangle; + case CElementShape::Type::RECTANGLE: + visitor(rectangle); break; - case Type::DISK: - disk = other.disk; + case CElementShape::Type::DISK: + visitor(disk); break; - case Type::OBJ: - obj = other.obj; + case CElementShape::Type::OBJ: + visitor(obj); break; - case Type::PLY: - ply = other.ply; + case CElementShape::Type::PLY: + visitor(ply); break; - case Type::SERIALIZED: - serialized = other.serialized; + case CElementShape::Type::SERIALIZED: + visitor(serialized); break; - case Type::SHAPEGROUP: - shapegroup = other.shapegroup; + case CElementShape::Type::SHAPEGROUP: + visitor(shapegroup); break; - case Type::INSTANCE: - instance = other.instance; + case CElementShape::Type::INSTANCE: + visitor(instance); break;/* - case Type::HAIR: - hair = other.hair; + case CElementShape::Type::HAIR: + visitor(hair); break; - case Type::HEIGHTFIELD: - heightfield = other.heightfield; + case CElementShape::Type::HEIGHTFIELD: + visitor(heightfield); break;*/ default: break; } - bsdf = other.bsdf; - emitter = other.emitter; - return *this; } - inline CElementShape& operator=(CElementShape&& other) + template + inline void visit(Visitor&& visitor) const { - IElement::operator=(std::move(other)); - std::swap(transform,other.transform); - std::swap(type,other.type); - switch (type) - { - case Type::CUBE: - std::swap(cube,other.cube); - break; - case Type::SPHERE: - std::swap(sphere,other.sphere); - break; - case Type::CYLINDER: - std::swap(cylinder,other.cylinder); - break; - case Type::RECTANGLE: - std::swap(rectangle,other.rectangle); - break; - case Type::DISK: - std::swap(disk,other.disk); - break; - case Type::OBJ: - std::swap(obj,other.obj); - break; - case Type::PLY: - std::swap(ply,other.ply); - break; - case Type::SERIALIZED: - std::swap(serialized,other.serialized); - break; - case Type::SHAPEGROUP: - std::swap(shapegroup,other.shapegroup); - break; - case Type::INSTANCE: - std::swap(instance,other.instance); - break;/* - case Type::HAIR: - std::swap(hair,other.hair); - break; - case Type::HEIGHTFIELD: - std::swap(heightfield,other.heightfield); - break;*/ - default: - break; - } - std::swap(bsdf,other.bsdf); - std::swap(emitter,other.emitter); + const_cast(this)->visit([&](T& var)->void + { + visitor(const_cast(var)); + } + ); + } + + inline CElementShape& operator=(const CElementShape& other) + { + IElement::operator=(other); + transform = other.transform; + type = other.type; + IElement::copyVariant(this,&other); + bsdf = other.bsdf; + emitter = other.emitter; return *this; } @@ -247,7 +259,7 @@ class CElementShape final : public IElement } - bool processChildData(IElement* _child, const std::string& name) override; + bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) override; // Type type; @@ -261,7 +273,7 @@ class CElementShape final : public IElement };*/ union { - Base cube; + Cube cube; Sphere sphere; Cylinder cylinder; Base rectangle; diff --git a/include/nbl/ext/MitsubaLoader/CElementTexture.h b/include/nbl/ext/MitsubaLoader/CElementTexture.h index ece070785e..8cad6f7ce3 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTexture.h +++ b/include/nbl/ext/MitsubaLoader/CElementTexture.h @@ -245,7 +245,7 @@ class CElementTexture : public IElement inline IElement::Type getType() const override { return ElementType; } inline std::string getLogName() const override { return "texture"; } - bool processChildData(IElement* _child, const std::string& name) override; + bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) override; // Type type; diff --git a/include/nbl/ext/MitsubaLoader/IElement.h b/include/nbl/ext/MitsubaLoader/IElement.h index 4da3dbb848..f4b9b09ebb 100644 --- a/include/nbl/ext/MitsubaLoader/IElement.h +++ b/include/nbl/ext/MitsubaLoader/IElement.h @@ -55,8 +55,6 @@ class IElement TRANSFORM, ANIMATION }; - public: - std::string id; IElement(const char* _id) : id(_id ? _id:"") {} virtual ~IElement() = default; @@ -66,7 +64,7 @@ class IElement virtual bool onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) = 0; //! default implementation for elements that doesnt have any children - virtual bool processChildData(IElement* _child, const std::string& name) + virtual bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) { return !_child; } @@ -198,6 +196,23 @@ class IElement std::array,SNamedPropertyElement::Type::INVALID> byPropertyType = {}; }; + + // members + std::string id; + + protected: + static inline void setLimitedString(const std::string_view memberName, std::span out, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger) + { + auto len = strlen(_property.svalue); + if (len>=out.size()) + logger.log( + "String property assigned to %s is too long, max allowed length %d, is %d, property value: \"%s\"", + system::ILogger::ELL_ERROR,memberName.data(),out.size(),len,_property.svalue + ); + len = std::min(out.size()-1,len); + memcpy(out.data(),_property.svalue,len); + out[len] = 0; + } }; namespace impl diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index 1d6bd55715..3ac2c6fe4d 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -86,7 +86,7 @@ class ParserManager final CElementFilm, CElementRFilter, CElementSampler, -/// CElementShape, + CElementShape, CElementTransform, /// CElementBSDF, /// CElementTexture, diff --git a/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp b/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp index 4fbe7b3f4d..8b0b34e3e4 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp @@ -40,7 +40,7 @@ auto CElementEmissionProfile::compAddPropertyMap() -> AddPropertyMap out, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger) -{ - auto len = strlen(_property.svalue); - if (len>=out.size()) - logger.log( - "String property assigned to %s is too long, max allowed length %d, is %d, property value: \"%s\"", - system::ILogger::ELL_ERROR,memberName.data(),out.size(),len,_property.svalue - ); - len = std::min(out.size()-1,len); - memcpy(out.data(),_property.svalue,len); - out[len] = 0; - return true; -} - auto CElementFilm::compAddPropertyMap() -> AddPropertyMap { using this_t = CElementFilm; @@ -118,22 +103,22 @@ auto CElementFilm::compAddPropertyMap() -> AddPropertyMap NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("variable",STRING,std::is_same,M) { - return setLimitedString("variable",_this->outputFilePath,std::move(_property),logger); + setLimitedString("variable",_this->outputFilePath,std::move(_property),logger); return true; } ); NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("outputFilePath",STRING) { - return setLimitedString("outputFilePath",_this->outputFilePath,std::move(_property),logger); + setLimitedString("outputFilePath",_this->outputFilePath,std::move(_property),logger); return true; } }); NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("bloomFilePath",STRING) { - return setLimitedString("bloomFilePath",_this->denoiserTonemapperArgs,std::move(_property),logger); + setLimitedString("bloomFilePath",_this->denoiserTonemapperArgs,std::move(_property),logger); return true; } }); NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("tonemapper",STRING) { - return setLimitedString("tonemapper",_this->denoiserTonemapperArgs,std::move(_property),logger); + setLimitedString("tonemapper",_this->denoiserTonemapperArgs,std::move(_property),logger); return true; } }); diff --git a/src/nbl/ext/MitsubaLoader/CElementShape.cpp b/src/nbl/ext/MitsubaLoader/CElementShape.cpp index 0af5b97172..c6db773afa 100644 --- a/src/nbl/ext/MitsubaLoader/CElementShape.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementShape.cpp @@ -1,213 +1,59 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" +#include "nbl/ext/MitsubaLoader/CElementShape.h" + +#include "nbl/ext/MitsubaLoader/ElementMacros.h" +#include "nbl/type_traits.h" // legacy stuff for `is_any_of` #include -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader +namespace nbl::ext::MitsubaLoader { -template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) +auto CElementShape::compAddPropertyMap() -> AddPropertyMap { - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr,""); + using this_t = CElementShape; + AddPropertyMap retval; - static const core::unordered_map StringToType = - { - {"cube", CElementShape::Type::CUBE}, - {"sphere", CElementShape::Type::SPHERE}, - {"cylinder", CElementShape::Type::CYLINDER}, - {"rectangle", CElementShape::Type::RECTANGLE}, - {"disk", CElementShape::Type::DISK}, - {"obj", CElementShape::Type::OBJ}, - {"ply", CElementShape::Type::PLY}, - {"serialized", CElementShape::Type::SERIALIZED}, - {"shapegroup", CElementShape::Type::SHAPEGROUP}, - {"instance", CElementShape::Type::INSTANCE}/*, - {"hair", CElementShape::Type::HAIR}, - {"heightfield", CElementShape::Type::HEIGHTFIELD}*/ - }; - auto found = StringToType.find(type); - if (found==StringToType.end()) - { - ParserLog::invalidXMLFileStructure("unknown type"); - _NBL_DEBUG_BREAK_IF(false); - return CElementFactory::return_type(nullptr, ""); - } + // base + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(flipNormals,BOOLEAN,derived_from,Base); + // cube has nothing - CElementShape* obj = _util->objects.construct(id); - if (!obj) - return CElementFactory::return_type(nullptr, ""); + // sphere + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(center,POINT,std::is_same,Sphere); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(radius,FLOAT,is_any_of,Sphere,Cylinder/*,Hair*/); - obj->type = found->second; - // defaults - switch (obj->type) - { - case CElementShape::Type::CUBE: - obj->cube = CElementShape::Base(); - break; - case CElementShape::Type::SPHERE: - obj->sphere = CElementShape::Sphere(); - break; - case CElementShape::Type::CYLINDER: - obj->cylinder = CElementShape::Cylinder(); - break; - case CElementShape::Type::RECTANGLE: - obj->rectangle = CElementShape::Base(); - break; - case CElementShape::Type::DISK: - obj->disk = CElementShape::Base(); - break; - case CElementShape::Type::OBJ: - obj->obj = CElementShape::Obj(); - break; - case CElementShape::Type::PLY: - obj->ply = CElementShape::Ply(); - break; - case CElementShape::Type::SERIALIZED: - obj->serialized = CElementShape::Serialized(); - break; - case CElementShape::Type::SHAPEGROUP: - obj->shapegroup = CElementShape::ShapeGroup(); - break; - case CElementShape::Type::INSTANCE: - obj->instance = CElementShape::Instance(); - break; - default: - break; - } - return CElementFactory::return_type(obj, std::move(name)); -} + // cylinder + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(p0,POINT,std::is_same,Cylinder); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(p1,POINT,std::is_same,Cylinder); + // COMMON: radius -bool CElementShape::addProperty(SNamedPropertyElement&& _property) -{ - bool error = false; - auto dispatch = [&](auto func) -> void - { - switch (type) + // LoadedFromFileBase + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("filename",STRING,derived_from,LoadedFromFileBase) { - case CElementShape::Type::CUBE: - func(cube); - break; - case CElementShape::Type::SPHERE: - func(sphere); - break; - case CElementShape::Type::CYLINDER: - func(cylinder); - break; - case CElementShape::Type::RECTANGLE: - func(rectangle); - break; - case CElementShape::Type::DISK: - func(disk); - break; - case CElementShape::Type::OBJ: - func(obj); - break; - case CElementShape::Type::PLY: - func(ply); - break; - case CElementShape::Type::SERIALIZED: - func(serialized); - break; - case CElementShape::Type::SHAPEGROUP: - func(shapegroup); - break; - case CElementShape::Type::INSTANCE: - func(instance); - break;/* - case CElementShape::Type::HAIR: - func(hair); - break; - case CElementShape::Type::HEIGHTFIELD: - func(heightfield); - break;*/ - default: - error = true; - break; + setLimitedString("filename",_this->serialized.filename,std::move(_property),logger); return true; } - }; + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(faceNormals,BOOLEAN,derived_from,LoadedFromFileBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(maxSmoothAngle,FLOAT,derived_from,LoadedFromFileBase); -#define SET_PROPERTY_TEMPLATE(MEMBER,PROPERTY_TYPE, ... ) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ - if constexpr (is_any_of::type,__VA_ARGS__>::value) \ - { \ - if (_property.type!=PROPERTY_TYPE) { \ - error = true; \ - return; \ - } \ - state. ## MEMBER = _property.getProperty(); \ - } \ - }); \ - } + // Obj + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(flipTexCoords,BOOLEAN,std::is_same,Obj); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(collapse,BOOLEAN,std::is_same,Obj); - auto setFlipNormals = SET_PROPERTY_TEMPLATE(flipNormals,SNamedPropertyElement::Type::BOOLEAN, Base,Sphere,Cylinder,Obj,Ply,Serialized/*,Heightfield*/); - auto setCenter = SET_PROPERTY_TEMPLATE(center,SNamedPropertyElement::Type::POINT, Sphere); - auto setRadius = SET_PROPERTY_TEMPLATE(radius,SNamedPropertyElement::Type::FLOAT, Sphere,Cylinder/*,Hair*/); - auto setP0 = SET_PROPERTY_TEMPLATE(p0,SNamedPropertyElement::Type::POINT, Cylinder); - auto setP1 = SET_PROPERTY_TEMPLATE(p1,SNamedPropertyElement::Type::POINT, Cylinder); - auto setFilename = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; + // Ply + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(srgb,BOOLEAN,std::is_same,Ply); - if constexpr (is_any_of::value) - { - state.filename = std::move(_property); - } - }); - }; - auto setFaceNormals = SET_PROPERTY_TEMPLATE(faceNormals,SNamedPropertyElement::Type::BOOLEAN, Obj,Ply,Serialized); - auto setMaxSmoothAngle = SET_PROPERTY_TEMPLATE(maxSmoothAngle,SNamedPropertyElement::Type::FLOAT, Obj,Ply,Serialized); - auto setFlipTexCoords = SET_PROPERTY_TEMPLATE(flipTexCoords,SNamedPropertyElement::Type::BOOLEAN, Obj); - auto setCollapse = SET_PROPERTY_TEMPLATE(collapse,SNamedPropertyElement::Type::BOOLEAN, Obj); - auto setSRGB = SET_PROPERTY_TEMPLATE(srgb,SNamedPropertyElement::Type::BOOLEAN, Ply); - auto setShapeIndex = SET_PROPERTY_TEMPLATE(shapeIndex,SNamedPropertyElement::Type::INTEGER, Serialized); - //auto setToWorld = SET_PROPERTY_TEMPLATE(toWorld, SNamedPropertyElement::Type::MATRIX, Instance); + // Serialized + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shapeIndex,INTEGER,std::is_same,Serialized); - const core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> SetPropertyMap = - { - {"flipNormals", setFlipNormals}, - {"center", setCenter}, - {"radius", setRadius}, - {"p0", setP0}, - {"p1", setP1}, - {"filename", setFilename}, - {"faceNormals", setFaceNormals}, - {"maxSmoothAngle", setMaxSmoothAngle}, - {"flipTexCoords", setFlipTexCoords}, - {"collapse", setCollapse}, - {"srgb", setSRGB}, - {"shapeIndex", setShapeIndex}/*, - {"", set}*/ - }; - - - auto found = SetPropertyMap.find(_property.name); - if (found==SetPropertyMap.end()) - { - _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("No Integrator can have such property set with name: "+_property.name); - return false; - } - - found->second(); - return !error; + return retval; } -bool CElementShape::processChildData(IElement* _child, const std::string& name) +bool CElementShape::processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) { if (!_child) return true; @@ -217,7 +63,10 @@ bool CElementShape::processChildData(IElement* _child, const std::string& name) { auto tform = static_cast(_child); if (name!="toWorld") + { + logger.log("The nested inside needs to be named \"toWorld\"",system::ILogger::ELL_ERROR); return false; + } //toWorldType = IElement::Type::TRANSFORM; transform = *tform; return true; @@ -237,33 +86,44 @@ bool CElementShape::processChildData(IElement* _child, const std::string& name) switch (type) { case Type::SHAPEGROUP: - if (child->type==Type::INVALID || child->type==Type::SHAPEGROUP) + if (child->type == Type::INVALID || child->type == Type::SHAPEGROUP) + { + logger.log(" cannot be nested inside each other or have INVALID shapes nested inside.",system::ILogger::ELL_ERROR); return false; - if (shapegroup.childCount == ShapeGroup::MaxChildCount) + } + if (shapegroup.childCount==ShapeGroup::MaxChildCount) { - ParserLog::invalidXMLFileStructure("Maximum shape-group children exceeded."); + logger.log("The 's MaxChildCount of %d exceeded!",system::ILogger::ELL_ERROR,ShapeGroup::MaxChildCount); return false; } shapegroup.children[shapegroup.childCount++] = child; return true; - break; case Type::INSTANCE: - if (child->type != Type::SHAPEGROUP) + if (child->type!=Type::SHAPEGROUP) + { + logger.log("Only can be nested inside ",system::ILogger::ELL_ERROR); return false; - instance.parent = child; // yeah I kknow its fucked up, but its the XML child, but Abstract Syntax Tree (or Scene Tree) parent + } + if (instance.parent) + logger.log(" 's parent already set to %s, overriding",system::ILogger::ELL_WARNING,instance.parent->id.c_str()); + instance.parent = child; // yeah I know its messed up, but its the XML child, not the Abstract Syntax Tree (or Scene Tree) parent return true; - break; default: - break; + logger.log("Only and support nesting other s inside",system::ILogger::ELL_ERROR); + return false; } } break; case IElement::Type::BSDF: + if (bsdf) + logger.log("'s BSDF already set to %s, overriding",system::ILogger::ELL_WARNING,bsdf->id.c_str()); bsdf = static_cast(_child); if (bsdf->type != CElementBSDF::Type::INVALID) return true; break; case IElement::Type::EMITTER: + if (emitter) + logger.log("'s Emitter already set to %s, overriding",system::ILogger::ELL_WARNING,emitter->id.c_str()); emitter = static_cast(_child); if (emitter->type != CElementEmitter::Type::INVALID) return true; @@ -271,6 +131,7 @@ bool CElementShape::processChildData(IElement* _child, const std::string& name) default: break; } + logger.log("Invalid or unsupported child with ID %s and Name %s nested inside of ",system::ILogger::ELL_ERROR,_child->id.c_str(),name.c_str()); return false; } @@ -283,6 +144,4 @@ bool CElementShape::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_op return true; } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CMakeLists.txt b/src/nbl/ext/MitsubaLoader/CMakeLists.txt index 7bedf035b8..ef3f326ad5 100644 --- a/src/nbl/ext/MitsubaLoader/CMakeLists.txt +++ b/src/nbl/ext/MitsubaLoader/CMakeLists.txt @@ -33,7 +33,7 @@ set(NBL_EXT_MITSUBA_LOADER_SRC CElementRFilter.cpp CElementSampler.cpp CElementTransform.cpp -# CElementShape.cpp + CElementShape.cpp # CElementBSDF.cpp # CElementTexture.cpp # CElementEmitter.cpp diff --git a/src/nbl/ext/MitsubaLoader/ElementMacros.h b/src/nbl/ext/MitsubaLoader/ElementMacros.h index eea6e97004..6db96f452f 100644 --- a/src/nbl/ext/MitsubaLoader/ElementMacros.h +++ b/src/nbl/ext/MitsubaLoader/ElementMacros.h @@ -37,114 +37,6 @@ template struct derived_from : std::is_base_of {}; } -/* -template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) -{ - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr, ""); - - static const core::unordered_map StringToType = - { - {"independent", CElementSampler::Type::INDEPENDENT}, - {"stratified", CElementSampler::Type::STRATIFIED}, - {"ldsampler", CElementSampler::Type::LDSAMPLER}, - {"halton", CElementSampler::Type::HALTON}, - {"hammersley", CElementSampler::Type::HAMMERSLEY}, - {"sobol", CElementSampler::Type::SOBOL} - }; - - auto found = StringToType.find(type); - if (found==StringToType.end()) - { - ParserLog::invalidXMLFileStructure("unknown type"); - _NBL_DEBUG_BREAK_IF(false); - return CElementFactory::return_type(nullptr, ""); - } - - CElementSampler* obj = _util->objects.construct(id); - if (!obj) - return CElementFactory::return_type(nullptr, ""); - - obj->type = found->second; - obj->sampleCount = 4; - //validation - switch (obj->type) - { - case CElementSampler::Type::STRATIFIED: - [[fallthrough]]; - case CElementSampler::Type::LDSAMPLER: - obj->dimension = 4; - break; - case CElementSampler::Type::HALTON: - [[fallthrough]]; - case CElementSampler::Type::HAMMERSLEY: - obj->scramble = -1; - break; - case CElementSampler::Type::SOBOL: - obj->scramble = 0; - break; - default: - break; - } - return CElementFactory::return_type(obj, std::move(name)); -} - -bool CElementSampler::addProperty(SNamedPropertyElement&& _property) -{ - if (_property.type == SNamedPropertyElement::Type::INTEGER && - _property.name == "sampleCount") - { - sampleCount = _property.ivalue; - switch (type) - { - case Type::STRATIFIED: - sampleCount = ceilf(sqrtf(sampleCount)); - break; - case Type::LDSAMPLER: - //sampleCount = core::roundUpToPoT(sampleCount); - break; - default: - break; - } - } - else - if (_property.type == SNamedPropertyElement::Type::INTEGER && - _property.name == "dimension") - { - dimension = _property.ivalue; - if (type == Type::INDEPENDENT || type == Type::HALTON || type == Type::HAMMERSLEY) - { - ParserLog::invalidXMLFileStructure("this sampler type does not take these parameters"); - _NBL_DEBUG_BREAK_IF(true); - return false; - } - } - else - if (_property.type == SNamedPropertyElement::Type::INTEGER && - _property.name == "scramble") - { - scramble = _property.ivalue; - if (type==Type::INDEPENDENT || type==Type::STRATIFIED || type == Type::LDSAMPLER) - { - ParserLog::invalidXMLFileStructure("this sampler type does not take these parameters"); - _NBL_DEBUG_BREAK_IF(true); - return false; - } - } - else - { - _NBL_DEBUG_BREAK_IF(true); - return false; - } - - return true; -} -*/ - #define NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(NON_FATAL) if (type==Type::INVALID) \ { \ invalidXMLFileStructure(logger,getLogName()+": type not specified"); \ diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 9646fca073..6f1af1e301 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -9,7 +9,7 @@ #include "nbl/ext/MitsubaLoader/CElementFilm.h" #include "nbl/ext/MitsubaLoader/CElementRFilter.h" #include "nbl/ext/MitsubaLoader/CElementSampler.h" -//#include "nbl/ext/MitsubaLoader/CElementShape.h" +#include "nbl/ext/MitsubaLoader/CElementShape.h" #include "nbl/ext/MitsubaLoader/CElementTransform.h" //#include "nbl/ext/MitsubaLoader/CElementAnimation.h" //#include "nbl/ext/MitsubaLoader/CElementBSDF.h" @@ -308,7 +308,7 @@ void ParserManager::XMLContext::onEnd(const char* _el) if (!elements.empty()) { IElement* parent = elements.top().element; - if (parent && !parent->processChildData(element.element,element.name)) + if (parent && !parent->processChildData(element.element,element.name,session->params->logger)) { if (element.element) killParseWithError(element.element->getLogName()+" could not processChildData with name: "+element.name); @@ -417,7 +417,7 @@ ParserManager::ParserManager() : propertyElements({ {"film", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, {"rfilter", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, {"sampler", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, -// {"shape", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, + {"shape", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, {"transform", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, // {"animation", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, // {"bsdf", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, @@ -432,6 +432,7 @@ ParserManager::ParserManager() : propertyElements({ CElementFilm::compAddPropertyMap(), CElementRFilter::compAddPropertyMap(), CElementSampler::compAddPropertyMap(), + CElementShape::compAddPropertyMap(), CElementTransform::compAddPropertyMap(), CElementEmissionProfile::compAddPropertyMap() }) { } From e29bb56762c6c9c9ef5719352e0c8352edd270d7 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 22 Dec 2025 19:10:53 -0300 Subject: [PATCH 307/472] Adds a flag to NSC to support preprocessing shaders and storing the result --- tools/nsc/main.cpp | 52 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index c4ce43b326..edc56de84c 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -153,6 +153,7 @@ class ShaderCompiler final : public system::IApplicationFramework }); }; + auto preprocessOnly = findOutputFlag("-P") != m_arguments.end(); auto output_flag_pos_fc = findOutputFlag("-Fc"); auto output_flag_pos_fo = findOutputFlag("-Fo"); if (output_flag_pos_fc != m_arguments.end() && output_flag_pos_fo != m_arguments.end()) { @@ -195,7 +196,8 @@ class ShaderCompiler final : public system::IApplicationFramework return false; } - m_logger->log("Compiled shader code will be saved to " + output_filepath, ILogger::ELL_INFO); + std::string outputType = preprocessOnly ? "Preprocessed" : "Compiled"; + m_logger->log(outputType + " shader code will be saved to " + output_filepath, ILogger::ELL_INFO); } #ifndef NBL_EMBED_BUILTIN_RESOURCES @@ -227,13 +229,27 @@ class ShaderCompiler final : public system::IApplicationFramework } auto start = std::chrono::high_resolution_clock::now(); - auto compilation_result = compile_shader(shader.get(), shaderStage, file_to_compile); + smart_refctd_ptr compilation_result; + std::string preprocessing_result; + std::string_view result_view; + if (preprocessOnly) + { + preprocessing_result = preprocess_shader(shader.get(), shaderStage, file_to_compile); + result_view = preprocessing_result; + } + else + { + compilation_result = compile_shader(shader.get(), shaderStage, file_to_compile); + result_view = { (const char*)compilation_result->getContent()->getPointer(), compilation_result->getContent()->getSize() }; + } auto end = std::chrono::high_resolution_clock::now(); - // writie compiled shader to file as bytes - if (compilation_result) + // write compiled/preprocessed shader to file as bytes + std::string operationType = preprocessOnly ? "preprocessing" : "compilation"; + const bool success = preprocessOnly ? preprocessing_result != std::string{} : bool(compilation_result); + if (success) { - m_logger->log("Shader compilation successful.", ILogger::ELL_INFO); + m_logger->log("Shader " + operationType + " successful.", ILogger::ELL_INFO); const auto took = std::to_string(std::chrono::duration_cast(end - start).count()); m_logger->log("Took %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); { @@ -258,7 +274,7 @@ class ShaderCompiler final : public system::IApplicationFramework return false; } - output_file.write((const char*)compilation_result->getContent()->getPointer(), compilation_result->getContent()->getSize()); + output_file.write(result_view.data(), result_view.size()); if (output_file.fail()) { @@ -279,7 +295,7 @@ class ShaderCompiler final : public system::IApplicationFramework } else { - m_logger->log("Shader compilation failed.", ILogger::ELL_ERROR); + m_logger->log("Shader " + operationType + " failed.", ILogger::ELL_ERROR); return false; } } @@ -291,6 +307,28 @@ class ShaderCompiler final : public system::IApplicationFramework private: + std::string preprocess_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier) { + smart_refctd_ptr hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + + CHLSLCompiler::SPreprocessorOptions options = {}; + options.sourceIdentifier = sourceIdentifier; + options.logger = m_logger.get(); + + auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + auto includeLoader = includeFinder->getDefaultFileSystemLoader(); + + // because before real compilation we do preprocess the input it doesn't really matter we proxy include search direcotries further with dxcOptions since at the end all includes are resolved to single file + for (const auto& it : m_include_search_paths) + includeFinder->addSearchPath(it, includeLoader); + + options.includeFinder = includeFinder.get(); + + const char* code_ptr = (const char*)shader->getContent()->getPointer(); + std::string_view code({ code_ptr, strlen(code_ptr)}); + + return hlslcompiler->preprocessShader(std::string(code), shaderStage, options, nullptr); + } + core::smart_refctd_ptr compile_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier) { smart_refctd_ptr hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); From 49ffbc62a70bc27d09d69e20a63aec5cf3c5e402 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 23 Dec 2025 00:57:36 +0100 Subject: [PATCH 308/472] implement `CElementTexture` and `CElementEmitter`, also refactor the ElementMacros.h --- .../nbl/ext/MitsubaLoader/CElementEmitter.h | 240 ++++------- .../nbl/ext/MitsubaLoader/CElementTexture.h | 108 ++--- src/nbl/ext/MitsubaLoader/CElementEmitter.cpp | 407 +++++++----------- src/nbl/ext/MitsubaLoader/CElementTexture.cpp | 324 ++++---------- src/nbl/ext/MitsubaLoader/ElementMacros.h | 36 +- 5 files changed, 417 insertions(+), 698 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementEmitter.h b/include/nbl/ext/MitsubaLoader/CElementEmitter.h index 04833986d1..8eaebcae74 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmitter.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmitter.h @@ -22,7 +22,7 @@ class CElementEmitter : public IElement INVALID, POINT, AREA, - SPOT, + //SPOT, // deprecated, use POINT with an IES profile instead! DIRECTIONAL, COLLIMATED, //SKY, @@ -31,43 +31,67 @@ class CElementEmitter : public IElement ENVMAP, CONSTANT }; + static inline core::unordered_map compStringToTypeMap() + { + return { + {"point", CElementEmitter::Type::POINT}, + {"area", CElementEmitter::Type::AREA}, + {"directional", CElementEmitter::Type::DIRECTIONAL}, + {"collimated", CElementEmitter::Type::COLLIMATED},/* + {"sky", CElementEmitter::Type::SKY}, + {"sun", CElementEmitter::Type::SUN}, + {"sunsky", CElementEmitter::Type::SUNSKY},*/ + {"envmap", CElementEmitter::Type::ENVMAP}, + {"constant", CElementEmitter::Type::CONSTANT} + }; + } struct SampledEmitter { - SampledEmitter() : samplingWeight(1.f) {} + inline SampledEmitter() : samplingWeight(1.f) {} float samplingWeight; }; - struct Point : SampledEmitter - { - core::vectorSIMDf intensity = core::vectorSIMDf(1.f); // Watts Steradian^-1 - }; - struct Area : SampledEmitter + struct DeltaDistributionEmitter : SampledEmitter + { + // Watts + hlsl::float32_t3 intensity = {1.f,1.f,1.f}; + }; + struct SolidAngleEmitter : SampledEmitter + { + // Watts Steradian^-1 + hlsl::float32_t3 radiance = {1.f,1.f,1.f}; + }; + struct EmissionProfileEmitter + { + CElementEmissionProfile* emissionProfile = nullptr; + }; + struct Point : DeltaDistributionEmitter, EmissionProfileEmitter { - core::vectorSIMDf radiance = core::vectorSIMDf(1.f); // Watts Meter^-2 Steradian^-1 - CElementEmissionProfile* emissionProfile = nullptr; + constexpr static inline Type VariantType = Type::POINT; }; - struct Spot : SampledEmitter + struct Area : SolidAngleEmitter, EmissionProfileEmitter { - core::vectorSIMDf intensity = core::vectorSIMDf(1.f); // Watts Steradian^-1 - float cutoffAngle = 20.f; // degrees, its the cone half-angle - float beamWidth = NAN; - CElementTexture* texture = nullptr; + constexpr static inline Type VariantType = Type::AREA; }; struct Directional : SampledEmitter { - core::vectorSIMDf irradiance = core::vectorSIMDf(1.f); // Watts Meter^-2 + constexpr static inline Type VariantType = Type::DIRECTIONAL; + + hlsl::float32_t3 irradiance = {1.f,1.f,1.f}; // Watts Meter^-2 }; struct Collimated : SampledEmitter { - core::vectorSIMDf power = core::vectorSIMDf(1.f); // Watts + constexpr static inline Type VariantType = Type::COLLIMATED; + + hlsl::float32_t3 power = {1.f,1.f,1.f}; // Watts };/* struct Sky : SampledEmitter { float turbidity = 3.f; - core::vectorSIMDf albedo = core::vectorSIMDf(0.15f); - core::vectorSIMDf sunDirection = calculate default from tokyo japan at 15:00 on 10.07.2010; + hlsl::float32_t3 albedo = {0.15f,0.15f,0.15f}; + hlsl::float32_t3 sunDirection = calculate default from tokyo japan at 15:00 on 10.07.2010; float stretch = 1.f; // must be [1,2] int32_t resolution = 512; float scale = 1.f; @@ -75,7 +99,7 @@ class CElementEmitter : public IElement struct Sun : SampledEmitter { float turbidity = 3.f; - core::vectorSIMDf sunDirection = calculate default from tokyo japan at 15:00 on 10.07.2010; + hlsl::float32_t3 sunDirection = calculate default from tokyo japan at 15:00 on 10.07.2010; int32_t resolution = 512; float scale = 1.f; float sunRadiusScale = 1.f; @@ -87,16 +111,31 @@ class CElementEmitter : public IElement };*/ struct EnvMap : SampledEmitter { - SPropertyElementData filename; // TODO: make sure destructor runs - float scale = 1.f; - float gamma = NAN; + constexpr static inline Type VariantType = Type::ENVMAP; + constexpr static inline uint16_t MaxPathLen = 1024u; + + char filename[MaxPathLen]; + float scale = 1.f; + float gamma = NAN; //bool cache = false; }; - struct Constant : SampledEmitter + struct Constant : SolidAngleEmitter { - core::vectorSIMDf radiance = core::vectorSIMDf(1.f); // Watts Meter^-2 Steradian^-1 + constexpr static inline Type VariantType = Type::CONSTANT; }; + // + using variant_list_t = core::type_list< + Point, + Area, + Directional, + Collimated, +// Sky, +// Sun, +// SunSky, + EnvMap, + Constant + >; // static AddPropertyMap compAddPropertyMap(); @@ -109,97 +148,62 @@ class CElementEmitter : public IElement { operator=(other); } - inline CElementEmitter(CElementEmitter&& other) : IElement(""), transform() - { - operator=(std::move(other)); - } virtual ~CElementEmitter() { } - inline CElementEmitter& operator=(const CElementEmitter& other) + template + inline void visit(Visitor&& func) { - IElement::operator=(other); - transform = other.transform; - type = other.type; switch (type) { case Type::POINT: - point = other.point; + func(point); break; case Type::AREA: - area = other.area; - break; - case Type::SPOT: - spot = other.spot; + func(area); break; case Type::DIRECTIONAL: - directional = other.directional; + func(directional); break; case Type::COLLIMATED: - collimated = other.collimated; + func(collimated); break;/* case Type::SKY: - sky = other.sky; + func(sky); break; case Type::SUN: - sun = other.sun; + func(sun); break; case Type::SUNSKY: - sunsky = other.sunsky; + func(sunsky); break;*/ case Type::ENVMAP: - envmap = other.envmap; + func(envmap); break; case Type::CONSTANT: - constant = other.constant; + func(constant); break; default: break; } - return *this; + } + template + inline void visit(Visitor&& visitor) const + { + const_cast(this)->visit([&](T& var)->void + { + visitor(const_cast(var)); + } + ); } - inline CElementEmitter& operator=(CElementEmitter&& other) + inline CElementEmitter& operator=(const CElementEmitter& other) { - IElement::operator=(std::move(other)); - std::swap(transform,other.transform); - std::swap(type,other.type); - switch (type) - { - case Type::POINT: - std::swap(point,other.point); - break; - case Type::AREA: - std::swap(area,other.area); - break; - case Type::SPOT: - std::swap(spot,other.spot); - break; - case Type::DIRECTIONAL: - std::swap(directional,other.directional); - break; - case Type::COLLIMATED: - std::swap(collimated,other.collimated); - break;/* - case Type::SKY: - sky,other.sky; - break; - case Type::SUN: - sun,other.sun; - break; - case Type::SUNSKY: - sunsky,other.sunsky; - break;*/ - case Type::ENVMAP: - std::swap(envmap,other.envmap); - break; - case Type::CONSTANT: - std::swap(constant,other.constant); - break; - default: - break; - } + IElement::operator=(other); + transform = other.transform; + type = other.type; + IElement::copyVariant(this,&other); return *this; } @@ -209,72 +213,7 @@ class CElementEmitter : public IElement inline IElement::Type getType() const override { return ElementType; } std::string getLogName() const override { return "emitter"; } - bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) override - { - if (!_child) - return true; - switch (_child->getType()) - { - case IElement::Type::TRANSFORM: - { - auto tform = static_cast(_child); - if (name!="toWorld") - return false; - //toWorldType = IElement::Type::TRANSFORM; - switch (type) - { - case Type::POINT: - [[fallthrough]]; - case Type::SPOT: - [[fallthrough]]; - case Type::DIRECTIONAL: - [[fallthrough]]; - case Type::COLLIMATED: - [[fallthrough]]; - case Type::AREA: - [[fallthrough]]; - /* - case Type::SKY: - [[fallthrough]]; - case Type::SUN: - [[fallthrough]]; - case Type::SUNSKY: - [[fallthrough]];*/ - case Type::ENVMAP: - transform = *tform; - return true; - default: - break; - } - return false; - } - break;/* - case IElement::Type::ANIMATION: - auto anim = static_cast(_child); - if (anim->name!="toWorld") - return false; - toWorlType = IElement::Type::ANIMATION; - animation = anim; - return true; - break;*/ - case IElement::Type::EMISSION_PROFILE: { - if (type == Type::AREA) { - area.emissionProfile = static_cast(_child); - return true; - } - return false; - } - case IElement::Type::TEXTURE: - if (type!=SPOT || name!="texture") - return false; - spot.texture = static_cast(_child); - return true; - break; - default: - break; - } - return false; - } + bool processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) override; // Type type; @@ -290,7 +229,6 @@ class CElementEmitter : public IElement { Point point; Area area; - Spot spot; Directional directional; Collimated collimated;/* Sky sky; diff --git a/include/nbl/ext/MitsubaLoader/CElementTexture.h b/include/nbl/ext/MitsubaLoader/CElementTexture.h index 8cad6f7ce3..5381920a38 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTexture.h +++ b/include/nbl/ext/MitsubaLoader/CElementTexture.h @@ -105,8 +105,19 @@ class CElementTexture : public IElement //WIREFRAME, //CURVATURE }; + static inline core::unordered_map compStringToTypeMap() + { + return { + {"bitmap", CElementTexture::Type::BITMAP}, + {"scale", CElementTexture::Type::SCALE} + }; + } + struct Bitmap { + constexpr static inline Type VariantType = Type::BITMAP; + constexpr static inline uint16_t MaxPathLen = 1024u; + enum WRAP_MODE { REPEAT, @@ -133,18 +144,18 @@ class CElementTexture : public IElement Z*/ }; - SPropertyElementData filename; // TODO: make sure destructor runs - WRAP_MODE wrapModeU = REPEAT; - WRAP_MODE wrapModeV = REPEAT; - float gamma = NAN; + char filename[MaxPathLen]; + WRAP_MODE wrapModeU = REPEAT; + WRAP_MODE wrapModeV = REPEAT; + float gamma = NAN; FILTER_TYPE filterType = EWA; - float maxAnisotropy = 20.f; + float maxAnisotropy = 20.f; //bool cache = false; - float uoffset = 0.f; - float voffset = 0.f; - float uscale = 1.f; - float vscale = 1.f; - CHANNEL channel = INVALID; + float uoffset = 0.f; + float voffset = 0.f; + float uscale = 1.f; + float vscale = 1.f; + CHANNEL channel = INVALID; }; struct MetaTexture { @@ -152,9 +163,14 @@ class CElementTexture : public IElement }; struct Scale : MetaTexture { - float scale; + constexpr static inline Type VariantType = Type::SCALE; + + // only monochrome scaling for now! + float scale = 1.f; }; + // + using variant_list_t = core::type_list; // static AddPropertyMap compAddPropertyMap(); @@ -166,76 +182,40 @@ class CElementTexture : public IElement { operator=(other); } - inline CElementTexture(CElementTexture&& other) : CElementTexture("") - { - operator=(std::move(other)); - } inline virtual ~CElementTexture() { } - - inline CElementTexture& operator=(const CElementTexture& other) + + template + inline void visit(Visitor&& func) { - IElement::operator=(other); - type = other.type; switch (type) { case CElementTexture::Type::BITMAP: - bitmap = other.bitmap; + func(bitmap); break; - //case CElementTexture::Type::CHECKERBOARD: - //checkerboard = CheckerBoard(); - //break; - //case CElementTexture::Type::GRID: - //grid = Grid(); - //break; case CElementTexture::Type::SCALE: - scale = other.scale; + func(scale); break; - //case CElementTexture::Type::VERTEXCOLOR: - //vertexcolor = VertexColor(); - //break; - //case CElementTexture::Type::WIREFRAME: - //wireframe = Wireframe(); - //break; - //case CElementTexture::Type::CURVATURE: - //curvature = Curvature(); - //break; default: break; } - return *this; } - inline CElementTexture& operator=(CElementTexture&& other) + template + inline void visit(Visitor&& visitor) const + { + const_cast(this)->visit([&](T& var)->void + { + visitor(const_cast(var)); + } + ); + } + + inline CElementTexture& operator=(const CElementTexture& other) { IElement::operator=(other); type = other.type; - switch (type) - { - case CElementTexture::Type::BITMAP: - std::swap(bitmap,other.bitmap); - break; - //case CElementTexture::Type::CHECKERBOARD: - //checkerboard = CheckerBoard(); - //break; - //case CElementTexture::Type::GRID: - //grid = Grid(); - //break; - case CElementTexture::Type::SCALE: - std::swap(scale,other.scale); - break; - //case CElementTexture::Type::VERTEXCOLOR: - //vertexcolor = VertexColor(); - //break; - //case CElementTexture::Type::WIREFRAME: - //wireframe = Wireframe(); - //break; - //case CElementTexture::Type::CURVATURE: - //curvature = Curvature(); - //break; - default: - break; - } + IElement::copyVariant(this,&other); return *this; } diff --git a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp index 1a0389c684..c7115dfcef 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp @@ -1,287 +1,206 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" +#include "nbl/ext/MitsubaLoader/CElementEmitter.h" + +#include "nbl/ext/MitsubaLoader/ElementMacros.h" +#include "nbl/type_traits.h" // legacy stuff for `is_any_of` #include -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ +#include "nbl/builtin/hlsl/math/linalg/transform.hlsl" +#include "glm/gtc/matrix_transform.hpp" -template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) + +namespace nbl::ext::MitsubaLoader { - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr,""); - static const core::unordered_map StringToType = - { - {"point", CElementEmitter::Type::POINT}, - {"area", CElementEmitter::Type::AREA}, - {"spot", CElementEmitter::Type::SPOT}, - {"directional", CElementEmitter::Type::DIRECTIONAL}, - {"collimated", CElementEmitter::Type::COLLIMATED},/* - {"sky", CElementEmitter::Type::SKY}, - {"sun", CElementEmitter::Type::SUN}, - {"sunsky", CElementEmitter::Type::SUNSKY},*/ - {"envmap", CElementEmitter::Type::ENVMAP}, - {"constant", CElementEmitter::Type::CONSTANT} - }; +auto CElementEmitter::compAddPropertyMap() -> AddPropertyMap +{ + using this_t = CElementEmitter; + AddPropertyMap retval; - auto found = StringToType.find(type); - if (found==StringToType.end()) - { - ParserLog::invalidXMLFileStructure("unknown type"); - _NBL_DEBUG_BREAK_IF(false); - return CElementFactory::return_type(nullptr, ""); + // funky transform setting + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("position",POINT) + { + if (_this->type!=Type::POINT && _this->type!=Type::COLLIMATED) + return false; + for (auto r=0; r<3; r++) + _this->transform.matrix[r][3] = _property.vvalue[r]; + return true; + } + }); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("direction",VECTOR) + { + // for point lights direction gets concatenated with IES rotation + if (_this->type!=Type::POINT && _this->type!=Type::DIRECTIONAL && _this->type!=Type::COLLIMATED) + return false; + hlsl::float32_t3 up = {0,0,0}; + { + uint32_t index = 0u; + { + float maxDot = std::abs(_property.vvalue[0]); + for (auto i=1u; i<3u; i++) + { + float thisAbs = std::abs(_property.vvalue[i]); + if (thisAbs < maxDot) + { + maxDot = thisAbs; + index = i; + } + } + } + up[index] = hlsl::sign(_property.vvalue[index]); + } + // TODO: check if correct + const hlsl::float32_t3 target = (-_property.vvalue).xyz; + // TODO: after the rm-core matrix PR we need to get rid of the tranpose (I transpose only because of GLM and HLSL mixup) + const auto lookAtGLM = reinterpret_cast(glm::lookAtRH({},target,up)); + const auto lookAt = hlsl::transpose(lookAtGLM); + // turn lookat into a rotation matrix + const auto rotation = hlsl::inverse(hlsl::float32_t3x3(lookAt)); + _NBL_DEBUG_BREAK_IF(true); // no idea if matrix is correct + for (auto r=0; r<3; r++) + _this->transform.matrix[r].xyz = rotation[r]; + return true; + } + }); + + // spectrum setting +#define ADD_SPECTRUM(MEMBER,CONSTRAINT,...) { \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,FLOAT,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER.x = state. ## MEMBER.y = state. ## MEMBER.z = _property.getProperty(); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,RGB,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = _property.getProperty(); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,SRGB,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = _property.getProperty(); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,SPECTRUM,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = _property.getProperty(); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ } - CElementEmitter* obj = _util->objects.construct(id); - if (!obj) - return CElementFactory::return_type(nullptr, ""); + // base + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(samplingWeight,FLOAT,derived_from,SampledEmitter); - obj->type = found->second; - // defaults - switch (obj->type) - { - case CElementEmitter::Type::POINT: - obj->point = CElementEmitter::Point(); - break; - case CElementEmitter::Type::AREA: - obj->area = CElementEmitter::Area(); - break; - case CElementEmitter::Type::SPOT: - obj->spot = CElementEmitter::Spot(); - break; - case CElementEmitter::Type::DIRECTIONAL: - obj->directional = CElementEmitter::Directional(); - break; - case CElementEmitter::Type::COLLIMATED: - obj->collimated = CElementEmitter::Collimated(); - break;/* - case CElementEmitter::Type::SKY: - obj->sky = CElementEmitter::Sky(); - break; - case CElementEmitter::Type::SUN: - obj->ply = CElementEmitter::Sun(); - break; - case CElementEmitter::Type::SUNSKY: - obj->serialized = CElementEmitter::SunSky(); - break;*/ - case CElementEmitter::Type::ENVMAP: - obj->envmap = CElementEmitter::EnvMap(); - break; - case CElementEmitter::Type::CONSTANT: - obj->constant = CElementEmitter::Constant(); - break; - default: - break; - } - return CElementFactory::return_type(obj, std::move(name)); -} + // delta + ADD_SPECTRUM(intensity,derived_from,DeltaDistributionEmitter); + // point covered by delta -bool CElementEmitter::addProperty(SNamedPropertyElement&& _property) -{ - bool error = false; - auto dispatch = [&](auto func) -> void - { - switch (type) - { - case Type::POINT: - func(point); - break; - case Type::AREA: - func(area); - break; - case Type::SPOT: - func(spot); - break; - case Type::DIRECTIONAL: - func(directional); - break; - case Type::COLLIMATED: - func(collimated); - break;/* - case Type::SKY: - func(sky); - break; - case Type::SUN: - func(sun); - break; - case Type::SUNSKY: - func(sunsky); - break;*/ - case Type::ENVMAP: - func(envmap); - break; - case Type::CONSTANT: - func(constant); - break; - default: - error = true; - break; - } - }; + // non zero solid angle + ADD_SPECTRUM(radiance,derived_from,SolidAngleEmitter); + // area covered by solid angle -#define SET_PROPERTY_TEMPLATE(MEMBER,PROPERTY_TYPE, ... ) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ - if constexpr (is_any_of::type,__VA_ARGS__>::value) \ - { \ - if (_property.type!=PROPERTY_TYPE) { \ - error = true; \ - return; \ - } \ - state. ## MEMBER = _property.getProperty(); \ - } \ - }); \ - } -#define SET_SPECTRUM(MEMBER, ... ) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ - if constexpr (is_any_of::type,__VA_ARGS__>::value) \ - { \ - switch (_property.type) { \ - case SPropertyElementData::Type::FLOAT: \ - state. ## MEMBER.x = state. ## MEMBER.y = state. ## MEMBER.z = _property.getProperty(); \ - break; \ - case SPropertyElementData::Type::RGB: \ - state. ## MEMBER = _property.getProperty(); \ - break; \ - case SPropertyElementData::Type::SRGB: \ - state. ## MEMBER = _property.getProperty(); \ - break; \ - case SPropertyElementData::Type::SPECTRUM: \ - state. ## MEMBER = _property.getProperty(); \ - break; \ - default: \ - error = true; \ - break; \ - } \ - } \ - }); \ - } + // directional + ADD_SPECTRUM(irradiance,std::is_same,Directional); - auto setSamplingWeight = SET_PROPERTY_TEMPLATE(samplingWeight, SNamedPropertyElement::Type::FLOAT, Point,Area,Spot,Directional,Collimated,/*Sky,Sun,SunSky,*/EnvMap,Constant); - auto setIntensity = SET_SPECTRUM(intensity, Point,Spot); - auto setPosition = [&]() -> void { - if (_property.type!=SNamedPropertyElement::Type::POINT || type!=Type::POINT) - { - error = true; - return; - } - transform.matrix.setTranslation(_property.vvalue); - }; - auto setRadiance = SET_SPECTRUM(radiance, Area,Constant); - auto setCutoffAngle = SET_PROPERTY_TEMPLATE(cutoffAngle, SNamedPropertyElement::Type::FLOAT, Spot); - auto setBeamWidth = SET_PROPERTY_TEMPLATE(beamWidth, SNamedPropertyElement::Type::FLOAT, Spot); - auto setDirection = [&]() -> void { - if (_property.type != SNamedPropertyElement::Type::VECTOR || type != Type::DIRECTIONAL) - { - error = true; - return; - } - core::vectorSIMDf up(0.f); - float maxDot = _property.vvalue[0]; - uint32_t index = 0u; - for (auto i=1u; i<3u; i++) - if (_property.vvalue[i] < maxDot) + // collimated + ADD_SPECTRUM(power,std::is_same,Collimated); + + // environment map + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("filename",STRING,std::is_same,EnvMap) { - maxDot = _property.vvalue[i]; - index = i; + setLimitedString("filename",_this->envmap.filename,std::move(_property),logger); return true; } - up[index] = 1.f; - // hope it works - core::matrix3x4SIMD tmp; - core::matrix3x4SIMD::buildCameraLookAtMatrixRH(core::vectorSIMDf(),-_property.vvalue,up).getInverse(tmp); - transform.matrix = core::matrix4SIMD(tmp); - _NBL_DEBUG_BREAK_IF(true); // no idea if matrix is correct - }; - auto setPower = SET_SPECTRUM(power, Collimated); - auto setFilename = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(scale,FLOAT,std::is_same,EnvMap); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(gamma,FLOAT,std::is_same,EnvMap); - if constexpr (std::is_same::value) - { - envmap.filename = std::move(_property); - } - }); - }; - auto setScale = SET_PROPERTY_TEMPLATE(scale, SNamedPropertyElement::Type::FLOAT, EnvMap); - auto setGamma = SET_PROPERTY_TEMPLATE(gamma, SNamedPropertyElement::Type::FLOAT, EnvMap); - //auto setCache = SET_PROPERTY_TEMPLATE(cache, SNamedPropertyElement::Type::BOOLEAN, EnvMap); -#undef SET_SPECTRUM -#undef SET_PROPERTY_TEMPLATE - const core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> SetPropertyMap = - { - {"samplingWeight", setSamplingWeight}, - {"intensity", setIntensity}, - {"position", setPosition}, - {"radiance", setRadiance}, - {"cutoffAngle", setCutoffAngle}, - {"beamWidth", setBeamWidth}, - {"direction", setDirection}, - {"power", setPower},/* - {"turbidity", setTurbidity}, - {"", set}, - {"sunRadiusScale", setSunRadiusScale},*/ - {"filename", setFilename}, - {"scale", setScale}, - {"gamma", setGamma}//, - //{"cache", setCache}, - }; +#undef ADD_SPECTRUM + return retval; +} + +bool CElementEmitter::processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) +{ + if (!_child) + return true; - auto found = SetPropertyMap.find(_property.name); - if (found==SetPropertyMap.end()) + switch (_child->getType()) { - _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("No Emitter can have such property set with name: " + _property.name); - return false; + case IElement::Type::TRANSFORM: + { + auto tform = static_cast(_child); + if (name!="toWorld") + { + logger.log("The nested inside needs to be named \"toWorld\"",system::ILogger::ELL_ERROR); + return false; + } + //toWorldType = IElement::Type::TRANSFORM; + switch (type) + { + case Type::POINT: + [[fallthrough]]; + case Type::DIRECTIONAL: + [[fallthrough]]; + case Type::COLLIMATED: + [[fallthrough]]; + case Type::AREA: + [[fallthrough]]; + /* + case Type::SKY: + [[fallthrough]]; + case Type::SUN: + [[fallthrough]]; + case Type::SUNSKY: + [[fallthrough]];*/ + case Type::ENVMAP: + transform = *tform; + return true; + default: + logger.log(" does not support ",system::ILogger::ELL_ERROR,type); + return false; + } + } + break;/* + case IElement::Type::ANIMATION: + auto anim = static_cast(_child); + if (anim->name!="toWorld") + return false; + toWorlType = IElement::Type::ANIMATION; + animation = anim; + return true; + break;*/ + case IElement::Type::EMISSION_PROFILE: + if (type!=Type::AREA && type!=Type::POINT) + { + logger.log(" does not support nested emission profiles, only Point and Area lights do",system::ILogger::ELL_ERROR,type); + return false; + } + area.emissionProfile = static_cast(_child); + return true; + default: + break; } - - found->second(); - return !error; + logger.log(" does not support nested <%s> elements",system::ILogger::ELL_ERROR,type,_child->getLogName()); + return false; } -bool CElementEmitter::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* metadata) +bool CElementEmitter::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) { // TODO: some more validation switch (type) { case Type::INVALID: - ParserLog::invalidXMLFileStructure(getLogName() + ": type not specified"); + logger.log("'s type not specified!",system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); return true; break; - case Type::SPOT: - if (std::isnan(spot.beamWidth)) - spot.beamWidth = spot.cutoffAngle * 0.75f; - default: - break; - } - - switch (type) - { case Type::AREA: break; default: - metadata->m_global.m_emitters.push_back(*this); + // TODO: slap into the scene instead! +// globalMetadata->m_global.m_emitters.push_back(*this); break; } - return true; } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementTexture.cpp b/src/nbl/ext/MitsubaLoader/CElementTexture.cpp index 410ab8508f..0add6ac3ca 100644 --- a/src/nbl/ext/MitsubaLoader/CElementTexture.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementTexture.cpp @@ -2,255 +2,118 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" +#include "nbl/ext/MitsubaLoader/CElementTexture.h" + +#include "nbl/ext/MitsubaLoader/ElementMacros.h" #include namespace nbl::ext::MitsubaLoader { - - -template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) +inline CElementTexture::Bitmap::WRAP_MODE getWrapMode(const SPropertyElementData& _property) { - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr,""); - - static const core::unordered_map StringToType = + using mode_e = CElementTexture::Bitmap::WRAP_MODE; + static const core::unordered_map StringToWrap = { - {"bitmap", CElementTexture::Type::BITMAP}, - {"scale", CElementTexture::Type::SCALE} + {"repeat", mode_e::REPEAT}, + {"mirror", mode_e::MIRROR}, + {"clamp", mode_e::CLAMP}, + {"zero", mode_e::ZERO}, + {"one", mode_e::ONE} }; - - auto found = StringToType.find(type); - if (found==StringToType.end()) - { - ParserLog::invalidXMLFileStructure("unknown type"); - _NBL_DEBUG_BREAK_IF(false); - return CElementFactory::return_type(nullptr,""); - } - - CElementTexture* obj = _util->objects.construct(id); - if (!obj) - return CElementFactory::return_type(nullptr,""); - - obj->type = found->second; - // defaults - switch (obj->type) - { - case CElementTexture::Type::BITMAP: - obj->bitmap = CElementTexture::Bitmap(); - break; - case CElementTexture::Type::SCALE: - obj->scale = CElementTexture::Scale(); - break; - default: - break; - } - return CElementFactory::return_type(obj, std::move(name)); + assert(_property.type==SPropertyElementData::Type::STRING); + auto found = StringToWrap.find(_property.getProperty()); + if (found != StringToWrap.end()) + return found->second; + return mode_e::REPEAT; } -bool CElementTexture::addProperty(SNamedPropertyElement&& _property) +auto CElementTexture::compAddPropertyMap() -> AddPropertyMap { - if (type==CElementTexture::Type::SCALE) - { - if (_property.type!=SPropertyElementData::Type::FLOAT) - return false; - scale.scale = _property.fvalue; - return true; - } - + using this_t = CElementTexture; + AddPropertyMap retval; - bool error = false; - auto dispatch = [&](auto func) -> void - { - switch (type) + // bitmap + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("filename",STRING,std::is_same,Bitmap) { - case CElementTexture::Type::BITMAP: - func(bitmap); - break; - case CElementTexture::Type::SCALE: - func(scale); - break; - default: - error = true; - break; + setLimitedString("filename",_this->bitmap.filename,std::move(_property),logger); return true; } - }; - -#define SET_PROPERTY_TEMPLATE(MEMBER,PROPERTY_TYPE, ... ) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ - if constexpr (is_any_of::type,__VA_ARGS__>::value) \ - { \ - if (_property.type!=PROPERTY_TYPE) { \ - error = true; \ - return; \ - } \ - state. ## MEMBER = _property.getProperty(); \ - } \ - }); \ - } - - auto processFilename = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) - { - bitmap.filename = std::move(_property); - } - }); - }; - auto getWrapMode = [&]() -> Bitmap::WRAP_MODE { - static const core::unordered_map StringToWrap = + ); + // special + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("wrapMode",STRING,std::is_same,Bitmap) { - {"repeat", Bitmap::WRAP_MODE::REPEAT}, - {"mirror", Bitmap::WRAP_MODE::MIRROR}, - {"clamp", Bitmap::WRAP_MODE::CLAMP}, - {"zero", Bitmap::WRAP_MODE::ZERO}, - {"one", Bitmap::WRAP_MODE::ONE} - }; - auto found = StringToWrap.end(); - if (_property.type == SPropertyElementData::Type::STRING) - found = StringToWrap.find(_property.getProperty()); - if (found != StringToWrap.end()) - return found->second; - return Bitmap::WRAP_MODE::REPEAT; - }; - auto processWrapMode = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) - { - auto value = getWrapMode(); - state.wrapModeU = value; - state.wrapModeV = value; - } - }); - }; - auto processWrapModeU = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) - { - state.wrapModeU = getWrapMode(); - } - }); - }; - auto processWrapModeV = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) - { - state.wrapModeV = getWrapMode(); - } - }); - }; - auto processGamma = SET_PROPERTY_TEMPLATE(gamma,SPropertyElementData::Type::FLOAT,Bitmap); - auto processFilterType = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) + _this->bitmap.wrapModeV = _this->bitmap.wrapModeU = getWrapMode(_property); + return true; + } + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("wrapModeU",STRING,std::is_same,Bitmap) + { + _this->bitmap.wrapModeU = getWrapMode(_property); + return true; + } + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("wrapModeV",STRING,std::is_same,Bitmap) + { + _this->bitmap.wrapModeV = getWrapMode(_property); + return true; + } + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(gamma,FLOAT,std::is_same,Bitmap); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("filterType",STRING,std::is_same,Bitmap) + { + static const core::unordered_map StringToType = { - static const core::unordered_map StringToType = - { - {"ewa", Bitmap::FILTER_TYPE::EWA}, - {"trilinear", Bitmap::FILTER_TYPE::TRILINEAR}, - {"nearest", Bitmap::FILTER_TYPE::NEAREST} - }; - auto found = StringToType.end(); - if (_property.type==SPropertyElementData::Type::STRING) - found = StringToType.find(_property.getProperty()); - if (found==StringToType.end()) - { - error = true; - return; - } - state.filterType = found->second; - } - }); - }; - auto processMaxAnisotropy = SET_PROPERTY_TEMPLATE(maxAnisotropy,SPropertyElementData::Type::FLOAT,Bitmap); - auto processCache = []() -> void {}; // silently drop - auto processUoffset = SET_PROPERTY_TEMPLATE(uoffset,SPropertyElementData::Type::FLOAT,Bitmap); - auto processVoffset = SET_PROPERTY_TEMPLATE(voffset,SPropertyElementData::Type::FLOAT,Bitmap); - auto processUscale = SET_PROPERTY_TEMPLATE(uscale,SPropertyElementData::Type::FLOAT,Bitmap); - auto processVscale = SET_PROPERTY_TEMPLATE(vscale,SPropertyElementData::Type::FLOAT,Bitmap); - auto processChannel = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) + {"ewa", Bitmap::FILTER_TYPE::EWA}, + {"trilinear", Bitmap::FILTER_TYPE::TRILINEAR}, + {"nearest", Bitmap::FILTER_TYPE::NEAREST} + }; + auto found = StringToType.find(_property.getProperty()); + if (found==StringToType.end()) + return false; + _this->bitmap.filterType = found->second; + return true; + } + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(maxAnisotropy,FLOAT,std::is_same,Bitmap); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("cache",BOOLEAN,std::is_same,Bitmap) + { + return true; // silently drop + } + ); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(uoffset,FLOAT,std::is_same,Bitmap); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(voffset,FLOAT,std::is_same,Bitmap); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(uscale,FLOAT,std::is_same,Bitmap); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(vscale,FLOAT,std::is_same,Bitmap); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(vscale,FLOAT,std::is_same,Bitmap); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("filename",STRING,std::is_same,Bitmap) + { + static const core::unordered_map StringToType = { - static const core::unordered_map StringToType = - { - {"r", Bitmap::CHANNEL::R}, - {"g", Bitmap::CHANNEL::G}, - {"b", Bitmap::CHANNEL::B}, - {"a", Bitmap::CHANNEL::A}/*, - {"x", Bitmap::CHANNEL::X}, - {"y", Bitmap::CHANNEL::Y}, - {"z", Bitmap::CHANNEL::Z}*/ - }; - auto found = StringToType.end(); - if (_property.type == SPropertyElementData::Type::STRING) - found = StringToType.find(_property.getProperty()); - if (found == StringToType.end()) - { - error = true; - return; - } - state.channel = found->second; - } - }); - }; - + {"r", Bitmap::CHANNEL::R}, + {"g", Bitmap::CHANNEL::G}, + {"b", Bitmap::CHANNEL::B}, + {"a", Bitmap::CHANNEL::A}/*, + {"x", Bitmap::CHANNEL::X}, + {"y", Bitmap::CHANNEL::Y}, + {"z", Bitmap::CHANNEL::Z}*/ + }; + auto found = StringToType.find(_property.getProperty()); + if (found==StringToType.end()) + return false; + _this->bitmap.channel = found->second; + return true; + } + ); - const core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> SetPropertyMap = - { - {"filename", processFilename}, - {"wrapMode", processWrapMode}, - {"wrapModeU", processWrapModeU}, - {"wrapModeV", processWrapModeV}, - {"gamma", processGamma}, - {"filterType", processFilterType}, - {"maxAnisotropy", processMaxAnisotropy}, - {"cache", processCache}, - {"uoffset", processUoffset}, - {"voffset", processVoffset}, - {"uscale", processUscale}, - {"vscale", processVscale}, - {"channel", processChannel} - }; - - auto found = SetPropertyMap.find(_property.name); - if (found==SetPropertyMap.end()) - { - _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("No BSDF can have such property set with name: "+_property.name); - return false; - } + // scale + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(scale,FLOAT,std::is_same,Scale); - found->second(); - return !error; + return retval; } -bool CElementTexture::processChildData(IElement* _child, const std::string& name) +bool CElementTexture::processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) { if (!_child) return true; @@ -267,17 +130,16 @@ bool CElementTexture::processChildData(IElement* _child, const std::string& name break; default: _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("No supported texture can have a texture as child element, except for \"scale\""); + logger.log("Only can have nested elements",system::ILogger::ELL_ERROR); return false; - break; } } - break; + return true; default: - return false; break; } - return true; + logger.log(" does not support nested <%s> elements",system::ILogger::ELL_ERROR,type,_child->getLogName()); + return false; } bool CElementTexture::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) diff --git a/src/nbl/ext/MitsubaLoader/ElementMacros.h b/src/nbl/ext/MitsubaLoader/ElementMacros.h index 6db96f452f..ab88a11cb4 100644 --- a/src/nbl/ext/MitsubaLoader/ElementMacros.h +++ b/src/nbl/ext/MitsubaLoader/ElementMacros.h @@ -11,9 +11,13 @@ .func=[](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool // when you know that there's a member of `this_t` with identifier equal to NAME -#define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(NAME,PROP_TYPE) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(#NAME,PROP_TYPE) {\ - _this->NAME = _property.getProperty(); \ - return true;}}) +#define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(NAME,PROP_TYPE) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(#NAME,PROP_TYPE) \ + {\ + _this->NAME = _property.getProperty(); \ + return true; \ + } \ + } \ +) // Similar to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY` but for `this_t` which declare `variant_list_t` (list of union types) // this adds a compile-time filter against the constraint, such that only variant types matching the constraint are visited. @@ -21,13 +25,29 @@ #define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) retval.template registerCallback( \ SNamedPropertyElement::Type::PROP_TYPE,NAME,[](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool +// TODO: docs +#define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(#NAME,PROP_TYPE,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + {\ + bool success = false; \ + _this->visit([&_property,&success](auto& state)->void \ + { \ + if constexpr (CONSTRAINT __VA_OPT__(,) __VA_ARGS__>::value) \ + { + +#define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END \ + } \ + } \ + ); \ + return success; \ + } \ +) + // This it to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED` what `NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY` is to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY` // So basically you know the member is the same across the constraint filtered types -#define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(#NAME,PROP_TYPE,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) {\ - _this->visit([&_property](auto& state)->void{ \ - if constexpr (CONSTRAINT __VA_OPT__(,) __VA_ARGS__>::value) \ - state. ## NAME = _property.getProperty(); \ - }); return true;}) +#define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## NAME = _property.getProperty(); \ + success = true; \ +NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END // just to reverse `is_base_of` From 9513614dec8e1dd58dc7f120d7c5756586e4fb2d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 23 Dec 2025 14:17:53 +0700 Subject: [PATCH 309/472] restore ifdef for mounting builtin resources, minor fixes to mounting --- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 16 ++++++++++------ src/nbl/ext/DebugDraw/CMakeLists.txt | 6 +----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 49a41d2aa7..3873ad9041 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -76,7 +76,7 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) } // note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly -constexpr std::string_view NBL_ARCHIVE_ENTRY = _ARCHIVE_ABSOLUTE_SPV_PATH_; +constexpr std::string_view NBL_ARCHIVE_ENTRY = NBL_DEBUG_DRAW_HLSL_MOUNT_POINT; const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const std::string_view archiveAlias) { @@ -85,7 +85,7 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l if (!system) return nullptr; - if (system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) + if (system->exists(path(NBL_ARCHIVE_ENTRY), {})) { logger->log("CDrawAABB .spv directory is already mounted!", ILogger::ELL_WARNING); return nullptr; @@ -93,9 +93,13 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l // extension should mount everything for you, regardless if content goes from virtual filesystem // or disk directly - and you should never rely on application framework to expose extension data - +#ifdef NBL_EMBED_BUILTIN_RESOURCES + auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); +#else auto archive = make_smart_refctd_ptr(std::move(NBL_ARCHIVE_ENTRY), smart_refctd_ptr(logger), system); system->mount(smart_refctd_ptr(archive), archiveAlias.data()); +#endif return smart_refctd_ptr(archive); } @@ -105,13 +109,13 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet system::logger_opt_ptr logger = params.utilities->getLogger(); auto system = smart_refctd_ptr(params.assetManager->getSystem()); - if (!system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) - mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); + if (!system->exists(path(NBL_ARCHIVE_ENTRY), {})) + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); auto getShader = [&](const core::string& key)->smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = params.utilities->getLogger(); - lp.workingDirectory = _ARCHIVE_ABSOLUTE_SPV_PATH_; + lp.workingDirectory = NBL_DEBUG_DRAW_HLSL_MOUNT_POINT; auto bundle = params.assetManager->getAsset(key.c_str(), lp); const auto contents = bundle.getContents(); diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 2eb05b739b..25e4be718a 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -25,15 +25,11 @@ set(NBL_DEBUG_DRAW_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/De set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(DEPENDS ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/common.hlsl - ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/single.vertex.hlsl - ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/aabb_instances.vertex.hlsl - ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/aabb_instances.fragment.hlsl + ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/draw_aabb.unified.hlsl ) target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) -target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ABSOLUTE_SPV_PATH_="${OUTPUT_DIRECTORY}") - set(SM 6_8) set(JSON [=[ [ From ac518879c28107ed01273bf78f3cded06e57add1 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 23 Dec 2025 16:30:46 +0700 Subject: [PATCH 310/472] simplified usage of streaming buffer alignments, flush unused memory range --- include/nbl/ext/DebugDraw/CDrawAABB.h | 58 ++++++++++----------------- 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 5b42ac25ba..dd6e4e6c78 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -144,63 +144,49 @@ namespace nbl::ext::debug_draw }; const uint32_t numInstances = aabbInstances.size(); - const uint32_t instancesPerIter = streaming->max_size() / sizeof(InstanceData); - if (numInstances > instancesPerIter) - return false; - using suballocator_t = core::LinearAddressAllocatorST; - uint32_t blockOffset = 0u; - while (srcIt != aabbInstances.end()) + uint32_t remainingInstancesBytes = numInstances * sizeof(InstanceData); + while (srcIt != aabbInstances.end() && remainingInstancesBytes > 0u) { - uint32_t instanceCount = hlsl::min(instancesPerIter, numInstances); - offset_t inputOffset = blockOffset; - offset_t ImaginarySizeUpperBound = 0x1 << 30; - suballocator_t imaginaryChunk(nullptr, inputOffset, 0, MaxPOTAlignment, ImaginarySizeUpperBound); - uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); - const uint32_t totalSize = imaginaryChunk.get_allocated_size(); - - uint32_t blockSize; + uint32_t blockByteSize = hlsl::min(streaming->max_size(), core::alignUp(remainingInstancesBytes, MaxAlignment)); bool allocated = false; + + offset_t blockOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; for (uint32_t t = 0; t < 2; t++) { - blockSize = hlsl::max(streaming->max_size(), totalSize); - while (blockSize >= totalSize) + std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); + if (streaming->multi_allocate(waitTill, 1, &blockOffset, &blockByteSize, &MaxAlignment) == 0u) { - inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; - std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); - if (streaming->multi_allocate(waitTill, 1, &inputOffset, &blockSize, &MaxAlignment) == 0u) - { - allocated = true; - break; - } - - streaming->cull_frees(); - blockSize >>= 1; - } - - if (allocated) + allocated = true; break; + } + streaming->cull_frees(); } if (!allocated) { - logger.log("Failed to allocate even the smallest chunk from streaming buffer for the next drawcall batch.", system::ILogger::ELL_ERROR); + logger.log("Failed to allocate a chunk from streaming buffer for the next drawcall batch.", system::ILogger::ELL_ERROR); return false; } - instanceCount = blockSize / sizeof(InstanceData); - blockOffset += blockSize; - auto* const streamingInstancesPtr = reinterpret_cast(streamingPtr + instancesByteOffset); + const uint32_t instanceCount = blockByteSize / sizeof(InstanceData); + auto* const streamingInstancesPtr = reinterpret_cast(streamingPtr + blockOffset); setInstancesRange(streamingInstancesPtr, instanceCount); - assert(!streaming->needsManualFlushOrInvalidate()); + if (streaming->needsManualFlushOrInvalidate()) + { + const video::ILogicalDevice::MappedMemoryRange flushRange(streaming->getBuffer()->getBoundMemory().memory, blockOffset, blockByteSize); + m_cachedCreationParams.utilities->getLogicalDevice()->flushMappedMemoryRanges(1, &flushRange); + } + + remainingInstancesBytes -= blockByteSize; SInstancedPC pc; - pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; + pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + blockOffset; commandBuffer->pushConstants(m_batchPipeline->getLayout(), asset::IShader::E_SHADER_STAGE::ESS_VERTEX, offsetof(ext::debug_draw::PushConstants, ipc), sizeof(SInstancedPC), &pc); commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); - streaming->multi_deallocate(1, &inputOffset, &blockSize, waitInfo); + streaming->multi_deallocate(1, &blockOffset, &blockByteSize, waitInfo); } return true; From 5ebfc6526f62e6822416c986ace761a8fe5f4a1c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 23 Dec 2025 16:38:21 +0700 Subject: [PATCH 311/472] fix calculating remaining instances bytes --- include/nbl/ext/DebugDraw/CDrawAABB.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index dd6e4e6c78..1a73f85942 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -145,7 +145,7 @@ namespace nbl::ext::debug_draw const uint32_t numInstances = aabbInstances.size(); uint32_t remainingInstancesBytes = numInstances * sizeof(InstanceData); - while (srcIt != aabbInstances.end() && remainingInstancesBytes > 0u) + while (srcIt != aabbInstances.end()) { uint32_t blockByteSize = hlsl::min(streaming->max_size(), core::alignUp(remainingInstancesBytes, MaxAlignment)); bool allocated = false; @@ -178,7 +178,7 @@ namespace nbl::ext::debug_draw m_cachedCreationParams.utilities->getLogicalDevice()->flushMappedMemoryRanges(1, &flushRange); } - remainingInstancesBytes -= blockByteSize; + remainingInstancesBytes -= instanceCount * sizeof(InstanceData); SInstancedPC pc; pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + blockOffset; From d5c49f50d7752adbf72422f2e99048c0700ec957 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 23 Dec 2025 15:15:31 +0300 Subject: [PATCH 312/472] include `thmath.hlsl` in `functions.hlsl` --- include/nbl/builtin/hlsl/math/functions.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index a52eb21c23..a1c51d4e51 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -5,6 +5,7 @@ #define _NBL_BUILTIN_HLSL_MATH_FUNCTIONS_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/tgmath.hlsl" #include "nbl/builtin/hlsl/numbers.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" #include "nbl/builtin/hlsl/concepts/vector.hlsl" From 6f4ef5b4211219737b555883f3bf4373de793107 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 23 Dec 2025 19:41:50 +0700 Subject: [PATCH 313/472] check whether spirv exists --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 1a73f85942..48c8b50400 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -91,7 +91,7 @@ namespace nbl::ext::debug_draw static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, DrawMode mode = ADM_DRAW_BATCH); //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included - static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const core::string& spvPath, const std::string_view archiveAlias = ""); inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 3873ad9041..f7706a15c8 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -78,14 +78,14 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) // note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly constexpr std::string_view NBL_ARCHIVE_ENTRY = NBL_DEBUG_DRAW_HLSL_MOUNT_POINT; -const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const std::string_view archiveAlias) +const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const core::string& spvPath, const std::string_view archiveAlias) { assert(system); if (!system) return nullptr; - if (system->exists(path(NBL_ARCHIVE_ENTRY), {})) + if (system->exists(path(NBL_ARCHIVE_ENTRY) / spvPath.c_str(), {})) { logger->log("CDrawAABB .spv directory is already mounted!", ILogger::ELL_WARNING); return nullptr; @@ -109,8 +109,8 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet system::logger_opt_ptr logger = params.utilities->getLogger(); auto system = smart_refctd_ptr(params.assetManager->getSystem()); - if (!system->exists(path(NBL_ARCHIVE_ENTRY), {})) - mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); + const auto key = nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(params.utilities->getLogicalDevice()); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), key, NBL_ARCHIVE_ENTRY); auto getShader = [&](const core::string& key)->smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; @@ -135,9 +135,7 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet return IAsset::castDown(contents[0]); }; - auto key = nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(params.utilities->getLogicalDevice()); smart_refctd_ptr unifiedShader = getShader(key); - if (!unifiedShader) { params.utilities->getLogger()->log("Could not compile shaders!", ILogger::ELL_ERROR); From bea9962166e0f753dff96d098c9d7486b75bc799 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 23 Dec 2025 14:19:49 +0100 Subject: [PATCH 314/472] Update examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 2d59279740..f84ef3315c 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2d5927974073dd3ada6a0a52134355d8022876a3 +Subproject commit f84ef3315c2cd25aa2167f6d94abf658a19a2f9e From 1e8171c9a06d90c451e9279cc49ca048ea971ff0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 23 Dec 2025 20:24:28 +0700 Subject: [PATCH 315/472] try to fit as much as possible even when fail to allocate, go down by half each time --- include/nbl/ext/DebugDraw/CDrawAABB.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 48c8b50400..9442e24c28 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -147,11 +147,12 @@ namespace nbl::ext::debug_draw uint32_t remainingInstancesBytes = numInstances * sizeof(InstanceData); while (srcIt != aabbInstances.end()) { - uint32_t blockByteSize = hlsl::min(streaming->max_size(), core::alignUp(remainingInstancesBytes, MaxAlignment)); + uint32_t blockByteSize = core::alignUp(remainingInstancesBytes, MaxAlignment); bool allocated = false; offset_t blockOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; - for (uint32_t t = 0; t < 2; t++) + const uint32_t smallestAlloc = hlsl::max(core::alignUp(sizeof(InstanceData), MaxAlignment), streaming->getAddressAllocator().min_size()); + while (blockByteSize >= smallestAlloc) { std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); if (streaming->multi_allocate(waitTill, 1, &blockOffset, &blockByteSize, &MaxAlignment) == 0u) @@ -159,7 +160,9 @@ namespace nbl::ext::debug_draw allocated = true; break; } + streaming->cull_frees(); + blockByteSize >>= 1; } if (!allocated) From 9aee495baa361aad10de76a13a436ab27d5e8b13 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 23 Dec 2025 14:50:43 +0100 Subject: [PATCH 316/472] update docker/compiler-explorer submodule --- docker/compiler-explorer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/compiler-explorer b/docker/compiler-explorer index 45866dfa87..04c693f866 160000 --- a/docker/compiler-explorer +++ b/docker/compiler-explorer @@ -1 +1 @@ -Subproject commit 45866dfa8782404fc121f25ce15ad0626b474db0 +Subproject commit 04c693f8668d7f09f999434745afbb58fc9c7025 From c5c813b14001fd8c9b71f3ecd3728e36858353c6 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 23 Dec 2025 16:49:55 +0100 Subject: [PATCH 317/472] update docker/compiler-explorer submodule --- docker/compiler-explorer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/compiler-explorer b/docker/compiler-explorer index 04c693f866..265166a574 160000 --- a/docker/compiler-explorer +++ b/docker/compiler-explorer @@ -1 +1 @@ -Subproject commit 04c693f8668d7f09f999434745afbb58fc9c7025 +Subproject commit 265166a574c3b0dae59e57d6d8605f0fa37c31e1 From 4fc80d8c148833a662dac22eb8284309a5c2fc24 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 23 Dec 2025 17:54:00 +0100 Subject: [PATCH 318/472] CElementBSDF is done! also fix typos in CElementTexture --- include/nbl/core/string/stringutil.h | 4 +- include/nbl/ext/MitsubaLoader/CElementBSDF.h | 274 ++++-- .../ext/MitsubaLoader/CElementIntegrator.h | 3 +- .../nbl/ext/MitsubaLoader/CElementTexture.h | 61 +- include/nbl/ext/MitsubaLoader/IElement.h | 17 +- include/nbl/ext/MitsubaLoader/ParserUtil.h | 6 +- src/nbl/ext/MitsubaLoader/CElementBSDF.cpp | 919 +++++------------- .../MitsubaLoader/CElementEmissionProfile.cpp | 7 +- src/nbl/ext/MitsubaLoader/CElementEmitter.cpp | 52 +- src/nbl/ext/MitsubaLoader/CElementFilm.cpp | 8 +- .../ext/MitsubaLoader/CElementIntegrator.cpp | 5 +- src/nbl/ext/MitsubaLoader/CElementShape.cpp | 2 +- src/nbl/ext/MitsubaLoader/CElementTexture.cpp | 5 +- src/nbl/ext/MitsubaLoader/CMakeLists.txt | 6 +- src/nbl/ext/MitsubaLoader/ElementMacros.h | 4 +- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 13 +- 16 files changed, 571 insertions(+), 815 deletions(-) diff --git a/include/nbl/core/string/stringutil.h b/include/nbl/core/string/stringutil.h index 3183ef149b..0eb9628250 100644 --- a/include/nbl/core/string/stringutil.h +++ b/include/nbl/core/string/stringutil.h @@ -180,7 +180,7 @@ namespace core //! DOCUMENTATION TODO struct CaseInsensitiveHash { - inline std::size_t operator()(const std::string& val) const + inline std::size_t operator()(const std::string_view val) const { std::size_t seed = 0; for (auto it = val.begin(); it != val.end(); it++) @@ -192,7 +192,7 @@ namespace core }; struct CaseInsensitiveEquals { - inline bool operator()(const std::string& A, const std::string& B) const + inline bool operator()(const std::string_view A, const std::string_view B) const { return core::strcmpi(A, B)==0; } diff --git a/include/nbl/ext/MitsubaLoader/CElementBSDF.h b/include/nbl/ext/MitsubaLoader/CElementBSDF.h index 0873c6e41d..432064b847 100644 --- a/include/nbl/ext/MitsubaLoader/CElementBSDF.h +++ b/include/nbl/ext/MitsubaLoader/CElementBSDF.h @@ -29,6 +29,7 @@ class CElementBSDF : public IElement COATING, ROUGHCOATING, BUMPMAP, + NORMALMAP, PHONG, WARD, MIXTURE_BSDF, @@ -39,9 +40,39 @@ class CElementBSDF : public IElement //HANRAHAN_KRUEGER, //IRAWAN_MARSCHNER }; + static inline core::unordered_map compStringToTypeMap() + { + return { + {"diffuse", CElementBSDF::Type::DIFFUSE}, + {"roughdiffuse", CElementBSDF::Type::ROUGHDIFFUSE}, + {"dielectric", CElementBSDF::Type::DIELECTRIC}, + {"thindielectric", CElementBSDF::Type::THINDIELECTRIC}, + {"roughdielectric", CElementBSDF::Type::ROUGHDIELECTRIC}, + {"conductor", CElementBSDF::Type::CONDUCTOR}, + {"roughconductor", CElementBSDF::Type::ROUGHCONDUCTOR}, + {"plastic", CElementBSDF::Type::PLASTIC}, + {"roughplastic", CElementBSDF::Type::ROUGHPLASTIC}, + {"coating", CElementBSDF::Type::COATING}, + {"roughcoating", CElementBSDF::Type::ROUGHCOATING}, + {"bumpmap", CElementBSDF::Type::BUMPMAP}, + {"normalmap", CElementBSDF::Type::NORMALMAP}, + {"phong", CElementBSDF::Type::PHONG}, + {"ward", CElementBSDF::Type::WARD}, + {"mixturebsdf", CElementBSDF::Type::MIXTURE_BSDF}, + {"blendbsdf", CElementBSDF::Type::BLEND_BSDF}, + {"mask", CElementBSDF::Type::MASK}, + {"twosided", CElementBSDF::Type::TWO_SIDED}, + {"difftrans", CElementBSDF::Type::DIFFUSE_TRANSMITTER}//, + //{"hk", CElementBSDF::Type::HANRAHAN_KRUEGER}, + //{"irawan", CElementBSDF::Type::IRAWAN_MARSCHNER} + }; + } + struct DiffuseTransmitter { - DiffuseTransmitter() : transmittance(0.5f) {} + constexpr static inline Type VariantType = Type::DIFFUSE_TRANSMITTER; + + inline DiffuseTransmitter() : transmittance(0.5f) {} inline DiffuseTransmitter& operator=(const DiffuseTransmitter& other) { @@ -53,8 +84,8 @@ class CElementBSDF : public IElement }; struct AllDiffuse { - AllDiffuse() : reflectance(0.5f), alpha(0.2f), useFastApprox(false) {} - ~AllDiffuse() {} + inline AllDiffuse() : reflectance(0.5f), alpha(0.2f), useFastApprox(false) {} + inline ~AllDiffuse() {} inline AllDiffuse& operator=(const AllDiffuse& other) { @@ -72,6 +103,14 @@ class CElementBSDF : public IElement CElementTexture::FloatOrTexture alpha; // not the parameter from Oren-Nayar bool useFastApprox; }; + struct Diffuse : AllDiffuse + { + constexpr static inline Type VariantType = Type::DIFFUSE; + }; + struct RoughDiffuse : AllDiffuse + { + constexpr static inline Type VariantType = Type::ROUGHDIFFUSE; + }; struct RoughSpecularBase { enum NormalDistributionFunction : uint32_t @@ -82,13 +121,13 @@ class CElementBSDF : public IElement ASHIKHMIN_SHIRLEY }; - RoughSpecularBase(float defaultAlpha) : distribution(GGX), specularReflectance(1.f), + inline RoughSpecularBase(float defaultAlpha) : distribution(GGX), specularReflectance(1.f), // union ignores ctors, and ctors are important to not try to free garbage strings alphaU(core::nan()), alphaV(core::nan()) { alpha = defaultAlpha; } - virtual ~RoughSpecularBase() {} + virtual inline ~RoughSpecularBase() {} inline RoughSpecularBase& operator=(const RoughSpecularBase& other) { @@ -119,12 +158,40 @@ class CElementBSDF : public IElement }; CElementTexture::SpectrumOrTexture specularReflectance; }; + struct AllConductor : RoughSpecularBase + { + inline AllConductor() : AllConductor("cu",nullptr) {} + inline AllConductor(const std::string_view material, system::logger_opt_ptr logger); + inline AllConductor(SPropertyElementData&& _eta, SPropertyElementData&& _k, system::logger_opt_ptr logger) : + RoughSpecularBase(0.1f), eta(_eta), k(_k), extEta(TransmissiveBase::findIOR("air",logger)) {} + + inline AllConductor& operator=(const AllConductor& other) + { + RoughSpecularBase::operator=(other); + eta = other.eta; + k = other.k; + extEta = other.extEta; + return *this; + } + + SPropertyElementData eta,k; + float extEta; + }; + struct Conductor : AllConductor + { + constexpr static inline Type VariantType = Type::CONDUCTOR; + }; + struct RoughConductor : AllConductor + { + constexpr static inline Type VariantType = Type::ROUGHCONDUCTOR; + }; struct TransmissiveBase { - static float findIOR(const std::string& name); + static float findIOR(const std::string_view name, system::logger_opt_ptr logger); - TransmissiveBase(float _intIOR, float _extIOR) : intIOR(_intIOR), extIOR(_extIOR), specularTransmittance(1.f) {} - TransmissiveBase(const std::string& _intIOR, const std::string& _extIOR) : TransmissiveBase(findIOR(_intIOR), findIOR(_extIOR)) {} + inline TransmissiveBase(float _intIOR, float _extIOR) : intIOR(_intIOR), extIOR(_extIOR), specularTransmittance(1.f) {} + inline TransmissiveBase(const std::string_view _intIOR, const std::string_view _extIOR, system::logger_opt_ptr logger) : + TransmissiveBase(findIOR(_intIOR,logger), findIOR(_extIOR,logger)) {} inline TransmissiveBase& operator=(const TransmissiveBase& other) { @@ -140,9 +207,10 @@ class CElementBSDF : public IElement }; struct AllDielectric : RoughSpecularBase, TransmissiveBase { - AllDielectric() : RoughSpecularBase(0.1f), TransmissiveBase("bk7","air") {} - AllDielectric(float intIOR, float extIOR) : RoughSpecularBase(0.1f), TransmissiveBase(intIOR,extIOR) {} - AllDielectric(const std::string& intIOR, const std::string& extIOR) : RoughSpecularBase(0.1f), TransmissiveBase(intIOR,extIOR) {} + inline AllDielectric() : RoughSpecularBase(0.1f), TransmissiveBase("bk7","air",nullptr) {} + inline AllDielectric(float intIOR, float extIOR) : RoughSpecularBase(0.1f), TransmissiveBase(intIOR,extIOR) {} + inline AllDielectric(const std::string_view intIOR, const std::string_view extIOR, system::logger_opt_ptr logger) : + RoughSpecularBase(0.1f), TransmissiveBase(intIOR,extIOR,logger) {} inline AllDielectric& operator=(const AllDielectric& other) { @@ -151,29 +219,25 @@ class CElementBSDF : public IElement return *this; } }; - struct AllConductor : RoughSpecularBase + struct Dielectric : AllDielectric { - AllConductor() : AllConductor("cu") {} - AllConductor(const std::string& material); - AllConductor(SPropertyElementData&& _eta, SPropertyElementData&& _k) : RoughSpecularBase(0.1f), eta(_eta), k(_k), extEta(TransmissiveBase::findIOR("air")) {} - - inline AllConductor& operator=(const AllConductor& other) - { - RoughSpecularBase::operator=(other); - eta = other.eta; - k = other.k; - extEta = other.extEta; - return *this; - } - - SPropertyElementData eta,k; - float extEta; + constexpr static inline Type VariantType = Type::DIELECTRIC; + }; + struct ThinDielectric : AllDielectric + { + constexpr static inline Type VariantType = Type::THINDIELECTRIC; + }; + struct RoughDielectric : AllDielectric + { + constexpr static inline Type VariantType = Type::ROUGHDIELECTRIC; }; struct AllPlastic : RoughSpecularBase, TransmissiveBase { - AllPlastic() : RoughSpecularBase(0.1f), TransmissiveBase("polypropylene", "air"), nonlinear(false) {} - AllPlastic(float intIOR, float extIOR) : RoughSpecularBase(0.1f), TransmissiveBase(intIOR, extIOR), nonlinear(false) {} - AllPlastic(const std::string& intIOR, const std::string& extIOR) : RoughSpecularBase(0.1f), TransmissiveBase(intIOR, extIOR), nonlinear(false) {} + inline AllPlastic() : RoughSpecularBase(0.1f), TransmissiveBase("polypropylene","air",nullptr), nonlinear(false) {} + inline AllPlastic(float intIOR, float extIOR) : + RoughSpecularBase(0.1f), TransmissiveBase(intIOR,extIOR), nonlinear(false) {} + inline AllPlastic(const std::string_view intIOR, const std::string_view extIOR, system::logger_opt_ptr logger) : + RoughSpecularBase(0.1f), TransmissiveBase(intIOR,extIOR,logger), nonlinear(false) {} inline AllPlastic& operator=(const AllPlastic& other) { @@ -186,13 +250,21 @@ class CElementBSDF : public IElement bool nonlinear; CElementTexture::SpectrumOrTexture diffuseReflectance = 0.5f; + }; + struct Plastic : AllPlastic + { + constexpr static inline Type VariantType = Type::PLASTIC; + }; + struct RoughPlastic : AllPlastic + { + constexpr static inline Type VariantType = Type::ROUGHPLASTIC; };/* struct HanrahanKrueger { class CPhaseElement { }; - HanrahanKrueger(const std::string& material); + HanrahanKrueger(const std::string_view material); HanrahanKrueger() : HanrahanKrueger("skin1") {} bool tNOTs = false; @@ -214,15 +286,16 @@ class CElementBSDF : public IElement };*/ struct MetaBSDF { - _NBL_STATIC_INLINE_CONSTEXPR size_t MaxChildCount = 32u; + constexpr static inline size_t MaxChildCount = 32u; size_t childCount = 0u; CElementBSDF* bsdf[MaxChildCount] = { nullptr }; }; struct AllCoating : MetaBSDF, RoughSpecularBase, TransmissiveBase { - _NBL_STATIC_INLINE_CONSTEXPR size_t MaxChildCount = 1u; + constexpr static inline size_t MaxChildCount = 1u; - AllCoating() : MetaBSDF(), RoughSpecularBase(0.1f), TransmissiveBase("bk7","air"), thickness(1.f), sigmaA(0.f) {} + inline AllCoating() final : MetaBSDF(), RoughSpecularBase(0.1f), + TransmissiveBase("bk7","air",nullptr), thickness(1.f), sigmaA(0.f) {} inline AllCoating& operator=(const AllCoating& other) { @@ -237,45 +310,69 @@ class CElementBSDF : public IElement float thickness; CElementTexture::SpectrumOrTexture sigmaA; }; - struct BumpMap : MetaBSDF + struct Coating final : AllCoating + { + constexpr static inline Type VariantType = Type::COATING; + }; + struct RoughCoating final : AllCoating + { + constexpr static inline Type VariantType = Type::ROUGHCOATING; + }; + struct BumpMap final : MetaBSDF { - CElementTexture* texture; - bool wasNormal; + constexpr static inline Type VariantType = Type::BUMPMAP; + + CElementTexture* texture = nullptr; + }; + struct NormalMap final : MetaBSDF + { + constexpr static inline Type VariantType = Type::NORMALMAP; + + CElementTexture* texture = nullptr; }; - struct MixtureBSDF : MetaBSDF + struct MixtureBSDF final : MetaBSDF { + constexpr static inline Type VariantType = Type::MIXTURE_BSDF; + uint32_t weightCount = 0u; float weights[MetaBSDF::MaxChildCount] = { 1.f }; }; - struct BlendBSDF : MetaBSDF + struct BlendBSDF final : MetaBSDF { - _NBL_STATIC_INLINE_CONSTEXPR size_t MaxChildCount = 2u; + constexpr static inline Type VariantType = Type::BLEND_BSDF; + constexpr static inline size_t MaxChildCount = 2u; - BlendBSDF() : weight(0.5f) {} + inline BlendBSDF() : weight(0.5f) {} CElementTexture::SpectrumOrTexture weight; }; - struct Mask : MetaBSDF + struct Mask final : MetaBSDF { - _NBL_STATIC_INLINE_CONSTEXPR size_t MaxChildCount = 1u; + constexpr static inline Type VariantType = Type::MASK; + constexpr static inline size_t MaxChildCount = 1u; - Mask() : opacity(0.5f) {} + inline Mask() : opacity(0.5f) {} CElementTexture::SpectrumOrTexture opacity; }; - struct TwoSided : MetaBSDF + struct TwoSided final : MetaBSDF { - _NBL_STATIC_INLINE_CONSTEXPR size_t MaxChildCount = 1u; + constexpr static inline Type VariantType = Type::TWO_SIDED; + constexpr static inline size_t MaxChildCount = 1u; }; // legacy and evil struct Phong { + constexpr static inline Type VariantType = Type::PHONG; + CElementTexture::FloatOrTexture exponent = 30.f; CElementTexture::SpectrumOrTexture specularReflectance = 0.2f; CElementTexture::SpectrumOrTexture diffuseReflectance = 0.5f; }; struct Ward { + constexpr static inline Type VariantType = Type::WARD; + enum Type : uint32_t { WARD, @@ -288,7 +385,32 @@ class CElementBSDF : public IElement CElementTexture::SpectrumOrTexture specularReflectance = 0.2f; CElementTexture::SpectrumOrTexture diffuseReflectance = 0.5f; }; - + + // + using variant_list_t = core::type_list< + Diffuse, + RoughDiffuse, + Dielectric, + ThinDielectric, + RoughDielectric, + Conductor, + RoughConductor, + Plastic, + RoughPlastic, + Coating, + RoughCoating, + BumpMap, + NormalMap, + Phong, + Ward, + MixtureBSDF, + BlendBSDF, + Mask, + TwoSided, + DiffuseTransmitter/*, + HanrahanKrueger, + IrawanMarschner*/ + >; // static AddPropertyMap compAddPropertyMap(); @@ -304,72 +426,90 @@ class CElementBSDF : public IElement { } - inline CElementBSDF& operator=(const CElementBSDF& other) + template + inline void visit(Visitor&& func) { - IElement::operator=(other); - type = other.type; switch (type) { case CElementBSDF::Type::DIFFUSE: [[fallthrough]]; case CElementBSDF::Type::ROUGHDIFFUSE: - diffuse = other.diffuse; + func(diffuse); break; case CElementBSDF::Type::DIELECTRIC: [[fallthrough]]; case CElementBSDF::Type::THINDIELECTRIC: [[fallthrough]]; case CElementBSDF::Type::ROUGHDIELECTRIC: - dielectric = other.dielectric; + func(dielectric); break; case CElementBSDF::Type::CONDUCTOR: [[fallthrough]]; case CElementBSDF::Type::ROUGHCONDUCTOR: - conductor = other.conductor; + func(conductor); break; case CElementBSDF::Type::PLASTIC: [[fallthrough]]; case CElementBSDF::Type::ROUGHPLASTIC: - plastic = other.plastic; + func(plastic); break; case CElementBSDF::Type::COATING: [[fallthrough]]; case CElementBSDF::Type::ROUGHCOATING: - coating = other.coating; + func(coating); break; case CElementBSDF::Type::BUMPMAP: - bumpmap = other.bumpmap; + func(bumpmap); + break; + case CElementBSDF::Type::NORMALMAP: + func(normalmap); break; case CElementBSDF::Type::PHONG: - phong = other.phong; + func(phong); break; case CElementBSDF::Type::WARD: - ward = other.ward; + func(ward); break; case CElementBSDF::Type::MIXTURE_BSDF: - mixturebsdf = other.mixturebsdf; + func(mixturebsdf); break; case CElementBSDF::Type::BLEND_BSDF: - blendbsdf = other.blendbsdf; + func(blendbsdf); break; case CElementBSDF::Type::MASK: - mask = other.mask; + func(mask); break; case CElementBSDF::Type::TWO_SIDED: - twosided = other.twosided; + func(twosided); break; case CElementBSDF::Type::DIFFUSE_TRANSMITTER: - difftrans = other.difftrans; + func(difftrans); break; //case CElementBSDF::Type::HANRAHAN_KRUEGER: - //hk = HanrahanKrueger(); + //func(hk); //break; //case CElementBSDF::Type::IRAWAN_MARSCHNER: - //irawan = IrawanMarschner(); + //func(irwan); //break; default: break; } + } + template + inline void visit(Visitor&& visitor) const + { + const_cast(this)->visit([&](T& var)->void + { + visitor(const_cast(var)); + } + ); + } + + inline CElementBSDF& operator=(const CElementBSDF& other) + { + IElement::operator=(other); + type = other.type; + IElement::copyVariant(this,&other); return *this; } @@ -391,7 +531,8 @@ class CElementBSDF : public IElement case MASK: [[fallthrough]]; case BLEND_BSDF: [[fallthrough]]; case MIXTURE_BSDF: [[fallthrough]]; - case BUMPMAP: + case BUMPMAP: [[fallthrough]]; + case NORMALMAP: return true; default: return false; @@ -409,6 +550,7 @@ class CElementBSDF : public IElement AllPlastic plastic; AllCoating coating; BumpMap bumpmap; + NormalMap normalmap; Phong phong; Ward ward; MixtureBSDF mixturebsdf; diff --git a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h index b3061ba380..875bb08378 100644 --- a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h +++ b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h @@ -212,7 +212,8 @@ class CElementIntegrator final : public IElement } Type field; - SPropertyElementData undefined; // TODO: test destructor runs + // TODO: Specral properties really need a redo + SPropertyElementData undefined; }; struct MetaIntegrator { diff --git a/include/nbl/ext/MitsubaLoader/CElementTexture.h b/include/nbl/ext/MitsubaLoader/CElementTexture.h index 5381920a38..bf986f4ddb 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTexture.h +++ b/include/nbl/ext/MitsubaLoader/CElementTexture.h @@ -17,59 +17,40 @@ class CElementTexture : public IElement public: struct FloatOrTexture { - FloatOrTexture(CElementTexture* _tex) + inline FloatOrTexture(CElementTexture* _tex) { - value.type = SPropertyElementData::Type::INVALID; + value = std::numeric_limits::quiet_NaN(); texture = _tex; } - FloatOrTexture(float _value) + inline FloatOrTexture(const float _value) { - value.type = SPropertyElementData::Type::FLOAT; - value.fvalue = _value; + value = _value; texture = nullptr; } - FloatOrTexture(const SPropertyElementData& _other) : FloatOrTexture(nullptr) - { - operator=(_other); - } - FloatOrTexture(SPropertyElementData&& _other) : FloatOrTexture(nullptr) - { - operator=(std::move(_other)); - } - inline FloatOrTexture& operator=(const SPropertyElementData& _other) - { - return operator=(SPropertyElementData(_other)); - } - inline FloatOrTexture& operator=(SPropertyElementData&& _other) - { - switch (_other.type) - { - case SPropertyElementData::Type::INVALID: - case SPropertyElementData::Type::FLOAT: - value = std::move(_other); - break; - default: - _NBL_DEBUG_BREAK_IF(true); - break; - } - return *this; - } + inline FloatOrTexture(const FloatOrTexture&) = default; - SPropertyElementData value = {}; - CElementTexture* texture = nullptr; // only used if value.type==INVALID + inline FloatOrTexture& operator=(const FloatOrTexture&) = default; + + float value = 0.f; + CElementTexture* texture = nullptr; }; - struct SpectrumOrTexture : FloatOrTexture + struct SpectrumOrTexture { - SpectrumOrTexture(CElementTexture* _tex) : FloatOrTexture(_tex) {} - SpectrumOrTexture(float _value) : FloatOrTexture(_value) {} - SpectrumOrTexture(const SPropertyElementData& _other) : SpectrumOrTexture(nullptr) + inline SpectrumOrTexture(CElementTexture* _tex) + { + value.type = SPropertyElementData::Type::INVALID; + texture = _tex; + } + inline SpectrumOrTexture(const SPropertyElementData& _other) : SpectrumOrTexture(nullptr) { operator=(_other); } - SpectrumOrTexture(SPropertyElementData&& _other) : SpectrumOrTexture(nullptr) + inline SpectrumOrTexture(SPropertyElementData&& _other) : SpectrumOrTexture(nullptr) { operator=(std::move(_other)); } + inline SpectrumOrTexture(const float _value) : SpectrumOrTexture(SPropertyElementData{_value}) {} + inline SpectrumOrTexture& operator=(const SPropertyElementData& _other) { return operator=(SPropertyElementData(_other)); @@ -90,8 +71,12 @@ class CElementTexture : public IElement _NBL_DEBUG_BREAK_IF(true); break; } + texture = nullptr; return *this; } + + SPropertyElementData value = {}; + CElementTexture* texture = nullptr; }; enum Type diff --git a/include/nbl/ext/MitsubaLoader/IElement.h b/include/nbl/ext/MitsubaLoader/IElement.h index f4b9b09ebb..a09761e87f 100644 --- a/include/nbl/ext/MitsubaLoader/IElement.h +++ b/include/nbl/ext/MitsubaLoader/IElement.h @@ -196,12 +196,27 @@ class IElement std::array,SNamedPropertyElement::Type::INVALID> byPropertyType = {}; }; + // + template + struct ProcessChildCallback + { + using element_t = Derived; + // TODO: list or map of supported variants (if `visit` is present) + using func_t = bool(*)(Derived*,IElement* _child,const system::logger_opt_ptr); + + inline bool operator()(Derived* d, IElement* _child, const system::logger_opt_ptr l) const {return func(d,_child,l);} + + func_t func; + // TODO: allowed IElement types + }; + template + using ProcessChildCallbackMap = core::unordered_map,core::CaseInsensitiveHash,core::CaseInsensitiveEquals>; // members std::string id; protected: - static inline void setLimitedString(const std::string_view memberName, std::span out, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger) + static inline void setLimitedString(const std::string_view memberName, std::span out, const SNamedPropertyElement& _property, const system::logger_opt_ptr logger) { auto len = strlen(_property.svalue); if (len>=out.size()) diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index 3ac2c6fe4d..787c1a534a 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -88,9 +88,9 @@ class ParserManager final CElementSampler, CElementShape, CElementTransform, -/// CElementBSDF, -/// CElementTexture, -/// CElementEmitter, + CElementBSDF, + CElementTexture, + CElementEmitter, CElementEmissionProfile >; diff --git a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp index 93187d723f..3790e89748 100644 --- a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp @@ -1,140 +1,197 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #include "nbl/ext/MitsubaLoader/ParserUtil.h" -#include "nbl/ext/MitsubaLoader/CElementFactory.h" +#include "nbl/ext/MitsubaLoader/CElementBSDF.h" + +#include "nbl/ext/MitsubaLoader/ElementMacros.h" -#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" +#include "nbl/builtin/hlsl/complex.hlsl" +#include "nbl/type_traits.h" // legacy stuff for `is_any_of` #include -namespace nbl + +namespace nbl::ext::MitsubaLoader { -namespace ext +namespace impl { -namespace MitsubaLoader +template +struct has_alpha { + constexpr static bool value = std::is_base_of_v || std::is_base_of_v; +}; +template +struct has_diffuseReflectance +{ + constexpr static bool value = std::is_base_of_v || std::is_base_of_v || + std::is_same_v || std::is_same_v; +}; +template +struct can_have_isotropicNDF +{ + constexpr static bool value = std::is_base_of_v || std::is_same_v; +}; +template +struct has_specularReflectance +{ + constexpr static bool value = std::is_base_of_v || std::is_same_v || + std::is_same_v; +}; +} - -template<> -CElementFactory::return_type CElementFactory::createElement(const char** _atts, ParserManager* _util) +auto CElementBSDF::compAddPropertyMap() -> AddPropertyMap { - const char* type; - const char* id; - std::string name; - if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) - return CElementFactory::return_type(nullptr, ""); + using this_t = CElementBSDF; + AddPropertyMap retval; + +// spectrum setting +#define ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(MEMBER,CONSTRAINT,...) { \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,FLOAT,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = std::move(_property); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,RGB,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = std::move(_property); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,SRGB,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = std::move(_property); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,SPECTRUM,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = std::move(_property); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ +} - static const core::unordered_map StringToType = - { - {"diffuse", CElementBSDF::Type::DIFFUSE}, - {"roughdiffuse", CElementBSDF::Type::ROUGHDIFFUSE}, - {"dielectric", CElementBSDF::Type::DIELECTRIC}, - {"thindielectric", CElementBSDF::Type::THINDIELECTRIC}, - {"roughdielectric", CElementBSDF::Type::ROUGHDIELECTRIC}, - {"conductor", CElementBSDF::Type::CONDUCTOR}, - {"roughconductor", CElementBSDF::Type::ROUGHCONDUCTOR}, - {"plastic", CElementBSDF::Type::PLASTIC}, - {"roughplastic", CElementBSDF::Type::ROUGHPLASTIC}, - {"coating", CElementBSDF::Type::COATING}, - {"roughcoating", CElementBSDF::Type::ROUGHCOATING}, - {"bumpmap", CElementBSDF::Type::BUMPMAP}, - {"normalmap", CElementBSDF::Type::BUMPMAP}, - {"phong", CElementBSDF::Type::PHONG}, - {"ward", CElementBSDF::Type::WARD}, - {"mixturebsdf", CElementBSDF::Type::MIXTURE_BSDF}, - {"blendbsdf", CElementBSDF::Type::BLEND_BSDF}, - {"mask", CElementBSDF::Type::MASK}, - {"twosided", CElementBSDF::Type::TWO_SIDED}, - {"difftrans", CElementBSDF::Type::DIFFUSE_TRANSMITTER}//, - //{"hk", CElementBSDF::Type::HANRAHAN_KRUEGER}, - //{"irawan", CElementBSDF::Type::IRAWAN_MARSCHNER} - }; + // diff trans + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(transmittance,std::is_same,DiffuseTransmitter); - auto found = StringToType.find(type); - if (found==StringToType.end()) - { - ParserLog::invalidXMLFileStructure("unknown type"); - _NBL_DEBUG_BREAK_IF(false); - return CElementFactory::return_type(nullptr, ""); - } + // diffuse + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(reflectance,derived_from,AllDiffuse); + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(diffuseReflectance,impl::has_diffuseReflectance); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(alpha,FLOAT,impl::has_alpha); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(useFastApprox,BOOLEAN,derived_from,AllDiffuse); - CElementBSDF* obj = _util->objects.construct(id); - if (!obj) - return CElementFactory::return_type(nullptr, ""); + // specular base + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(distribution,STRING,derived_from,RoughSpecularBase) + using ndf_e = RoughSpecularBase::NormalDistributionFunction; + static const core::unordered_map StringToType = + { + {"beckmann",ndf_e::BECKMANN}, + {"ggx", ndf_e::GGX}, + {"phong", ndf_e::PHONG}, + {"as", ndf_e::ASHIKHMIN_SHIRLEY} + }; + auto found = StringToType.find(_property.getProperty()); + if (found==StringToType.end()) + return; + state.distribution = found->second; + success = true; + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; + // COMMON: alpha + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(alphaU,FLOAT,impl::can_have_isotropicNDF); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(alphaV,FLOAT,impl::can_have_isotropicNDF); + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(specularReflectance,impl::has_specularReflectance); + + // conductor + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(eta,derived_from,AllConductor); + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(k,derived_from,AllConductor); + // adding twice cause two property types are allowed + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(extEta,FLOAT,derived_from,AllConductor) + state.extEta = TransmissiveBase::findIOR(_property.getProperty(),logger); + success = true; + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; + // special + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(material,STRING,derived_from,AllConductor) + state = AllConductor(_property.getProperty(),logger); + success = true; + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; + + // transmissive base + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(intIOR,FLOAT,derived_from,TransmissiveBase); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(extIOR,FLOAT,derived_from,TransmissiveBase); + // adding twice cause two property types are allowed + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(intIOR,STRING,derived_from,TransmissiveBase) + state.intIOR = TransmissiveBase::findIOR(_property.getProperty(),logger); + success = true; + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(extIOR,STRING,derived_from,TransmissiveBase) + state.extIOR = TransmissiveBase::findIOR(_property.getProperty(),logger); + success = true; + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(specularTransmittance,derived_from,TransmissiveBase); + + // plastic + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(nonlinear,BOOLEAN,derived_from,AllPlastic); + // COMMON: diffuseReflectance + + // coating + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(thickness,FLOAT,derived_from,AllCoating); + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(sigmaA,derived_from,AllCoating); + + // bumpmap + // normalmap + + // mixture + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(weights,STRING,std::is_same,MixtureBSDF) + std::istringstream sstr(_property.svalue); + std::string token; + while (std::getline(sstr,token,',')) + { + if (state.weightCount) + { + logger.log(" MaxChildCount of %d exceeded!",system::ILogger::ELL_ERROR,MetaBSDF::MaxChildCount); + break; + } + state.weights[state.weightCount++] = std::stof(token); + } + success = true; + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; - obj->type = found->second; - // defaults - switch (obj->type) - { - case CElementBSDF::Type::DIFFUSE: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHDIFFUSE: - obj->diffuse = CElementBSDF::AllDiffuse(); - break; - case CElementBSDF::Type::DIELECTRIC: - [[fallthrough]]; - case CElementBSDF::Type::THINDIELECTRIC: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHDIELECTRIC: - obj->dielectric = CElementBSDF::AllDielectric(); - break; - case CElementBSDF::Type::CONDUCTOR: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHCONDUCTOR: - obj->conductor = CElementBSDF::AllConductor(); - break; - case CElementBSDF::Type::PLASTIC: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHPLASTIC: - obj->plastic = CElementBSDF::AllPlastic(); - break; - case CElementBSDF::Type::COATING: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHCOATING: - obj->coating = CElementBSDF::AllCoating(); - break; - case CElementBSDF::Type::BUMPMAP: - obj->bumpmap = CElementBSDF::BumpMap(); - obj->bumpmap.wasNormal = strcmp(type,"bumpmap")!=0; - break; - case CElementBSDF::Type::PHONG: - obj->phong = CElementBSDF::Phong(); - break; - case CElementBSDF::Type::WARD: - obj->ward = CElementBSDF::Ward(); - break; - case CElementBSDF::Type::MIXTURE_BSDF: - obj->mixturebsdf = CElementBSDF::MixtureBSDF(); - break; - case CElementBSDF::Type::BLEND_BSDF: - obj->blendbsdf = CElementBSDF::BlendBSDF(); - break; - case CElementBSDF::Type::MASK: - obj->mask = CElementBSDF::Mask(); - break; - case CElementBSDF::Type::TWO_SIDED: - obj->twosided = CElementBSDF::TwoSided(); - break; - case CElementBSDF::Type::DIFFUSE_TRANSMITTER: - obj->difftrans = CElementBSDF::DiffuseTransmitter(); - break; - //case CElementBSDF::Type::HANRAHAN_KRUEGER: - //hk = CElementBSDF::HanrahanKrueger(); - //break; - //case CElementBSDF::Type::IRAWAN_MARSCHNER: - //irawan = CElementBSDF::IrawanMarschner(); - //break; - default: - break; - } - return CElementFactory::return_type(obj, std::move(name)); + // blend + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(weight,FLOAT,std::is_same,BlendBSDF); + + // mask + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(opacity,std::is_same,Mask); + + // twosided + + // phong + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(exponent,FLOAT,std::is_same,Phong); + // COMMON: specularReflectance + // COMMON: diffuseReflectance + + // ward + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(variant,STRING,std::is_same,Ward) + static const core::unordered_map StringToType = + { + {"ward", Ward::Type::WARD}, + {"ward-duer", Ward::Type::WARD_DUER}, + {"balanced", Ward::Type::BALANCED} + }; + auto found = StringToType.find(_property.getProperty()); + if (found==StringToType.end()) + return; + state.variant = found->second; + success = true; + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; + // COMMON: alphaU + // COMMON: alphaV + // COMMON: specularReflectance + // COMMON: diffuseReflectance + + // TODO: set HK and IRAWAN parameters, sigmaS, sigmaT, albedo, filename, repeatU, repeatV + +#undef ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED + + return retval; } -float CElementBSDF::TransmissiveBase::findIOR(const std::string& name) +float CElementBSDF::TransmissiveBase::findIOR(const std::string_view name, system::logger_opt_ptr logger) { - static const core::unordered_map NamedIndicesOfRefraction = + static const core::unordered_map NamedIndicesOfRefraction = { {"vacuum", 1.f}, {"helium", 1.00004f}, @@ -167,13 +224,13 @@ float CElementBSDF::TransmissiveBase::findIOR(const std::string& name) } -CElementBSDF::AllConductor::AllConductor(const std::string& material) : RoughSpecularBase(0.1) +CElementBSDF::AllConductor::AllConductor(const std::string_view material, system::logger_opt_ptr logger) : RoughSpecularBase(0.1) { // TODO fill this out with values from http://www.luxpop.com/HU_v173.cgi?OpCode=73 and https://github.com/mmp/pbrt-v3/blob/master/src/materials/metal.cpp or https://refractiveindex.info/?shelf=main&book=Cu&page=Johnson // we use Rec 709 for the Color primaries of this table, so Red ~= 615nm, Green ~= 535nm, Blue ~= 465nm - static const core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> NamedConductors = + static const core::unordered_map,3>,core::CaseInsensitiveHash,core::CaseInsensitiveEquals> NamedConductors = { -#define SPECTRUM_MACRO(R,G,B,X,Y,Z) {SPropertyElementData(SPropertyElementData::Type::RGB,core::vectorSIMDf(R,G,B)),SPropertyElementData(SPropertyElementData::Type::RGB,core::vectorSIMDf(X,Y,Z))} +#define SPECTRUM_MACRO(R,G,B,X,Y,Z) {{R,X},{G,Y},{B,Z}} {"a-C", SPECTRUM_MACRO(1.6855f, 1.065f, 1.727f, 0.0f, 0.009f, 0.0263f)}, // there is no "a-C", but "a-C:H; data from palik" {"Ag", SPECTRUM_MACRO(0.059481f, 0.055090f, 0.046878f, 4.1367f, 3.4574f, 2.8028f)}, {"Al", SPECTRUM_MACRO(1.3404f, 0.95151f, 0.68603f, 7.3509f, 6.4542f, 5.6351f)}, @@ -238,574 +295,122 @@ CElementBSDF::AllConductor::AllConductor(const std::string& material) : RoughSpe }; auto found = NamedConductors.find(material); - if (found == NamedConductors.end()) + if (found==NamedConductors.end()) { _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("IoR Table lookup not implemented for material preset: " + material); + logger.log("Named material %s in failed to be found, defaulting to \"none\"",system::ILogger::ELL_ERROR,material.data()); found = NamedConductors.find("none"); assert(found != NamedConductors.end()); } - eta = found->second.first; - k = found->second.second; - extEta = TransmissiveBase::findIOR("air"); + const auto etaK = found->second; + eta = SPropertyElementData(SPropertyElementData::Type::RGB,float32_t4{etaK.r.real(),etaK.g.real(),etaK.b.real(),0.f}); + k = SPropertyElementData(SPropertyElementData::Type::RGB,float32_t4{etaK.r.real(),etaK.g.real(),etaK.b.real(),0.f}); + extEta = TransmissiveBase::findIOR("air",logger); } -bool CElementBSDF::addProperty(SNamedPropertyElement&& _property) + +bool CElementBSDF::processChildData(IElement* _child, const std::string& name, system::logger_opt_ptr logger) { - bool error = false; - auto dispatch = [&](auto func) -> void - { - switch (type) - { - case CElementBSDF::Type::DIFFUSE: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHDIFFUSE: - func(diffuse); - break; - case CElementBSDF::Type::DIELECTRIC: - [[fallthrough]]; - case CElementBSDF::Type::THINDIELECTRIC: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHDIELECTRIC: - func(dielectric); - break; - case CElementBSDF::Type::CONDUCTOR: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHCONDUCTOR: - func(conductor); - break; - case CElementBSDF::Type::PLASTIC: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHPLASTIC: - func(plastic); - break; - case CElementBSDF::Type::COATING: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHCOATING: - func(coating); - break; - case CElementBSDF::Type::BUMPMAP: - func(bumpmap); - break; - case CElementBSDF::Type::PHONG: - func(phong); - break; - case CElementBSDF::Type::WARD: - func(ward); - break; - case CElementBSDF::Type::MIXTURE_BSDF: - func(mixturebsdf); - break; - case CElementBSDF::Type::BLEND_BSDF: - func(blendbsdf); - break; - case CElementBSDF::Type::MASK: - func(mask); - break; - case CElementBSDF::Type::TWO_SIDED: - func(twosided); - break; - case CElementBSDF::Type::DIFFUSE_TRANSMITTER: - func(difftrans); - break; - //case CElementBSDF::Type::HANRAHAN_KRUEGER: - //func(hk); - //break; - //case CElementBSDF::Type::IRAWAN_MARSCHNER: - //func(irwan); - //break; - default: - error = true; - break; - } - }; + if (!_child) + return true; -#define SET_FLOAT(MEMBER, ... ) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ - if constexpr (is_any_of::type,__VA_ARGS__>::value) \ - { \ - switch (_property.type) { \ - case SPropertyElementData::Type::FLOAT: \ - state. ## MEMBER = SPropertyElementData(_property); \ - break; \ - default: \ - error = true; \ - break; \ - } \ - } \ - }); \ - } -#define SET_SPECTRUM(MEMBER, ... ) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ - if constexpr (is_any_of::type,__VA_ARGS__>::value) \ - { \ - switch (_property.type) { \ - case SPropertyElementData::Type::FLOAT: \ - case SPropertyElementData::Type::RGB: \ - case SPropertyElementData::Type::SRGB: \ - case SPropertyElementData::Type::SPECTRUM: \ - state. ## MEMBER = SPropertyElementData(_property); \ - break; \ - default: \ - error = true; \ - break; \ - } \ - } \ - }); \ - } -#define SET_PROPERTY_TEMPLATE(MEMBER,PROPERTY_TYPE, ... ) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ - if constexpr (is_any_of::type,__VA_ARGS__>::value) \ + using this_t = CElementBSDF; + +#define SET_TEXTURE_CONSTRAINED(MEMBER,CONSTRAINT,... ) {#MEMBER,{.func = [](this_t* _this, IElement* _child, const system::logger_opt_ptr logger)->bool \ + { \ + bool success = false; \ + auto _texture = static_cast(_child); \ + _this->visit([&_texture,logger,&success](auto& state)->void \ { \ - if (_property.type!=PROPERTY_TYPE) { \ - error = true; \ - return; \ - } \ - state. ## MEMBER = _property.getProperty(); \ + if constexpr (CONSTRAINT __VA_OPT__(,) __VA_ARGS__>::value) \ + { +#define SET_TEXTURE_CONSTRAINED_END } \ } \ - }); \ - } - - auto processReflectance = SET_SPECTRUM(reflectance,AllDiffuse); - auto processDistribution = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_base_of::value) - { - static const core::unordered_map StringToType = - { - {"beckmann",RoughSpecularBase::NormalDistributionFunction::BECKMANN}, - {"ggx", RoughSpecularBase::NormalDistributionFunction::GGX}, - {"phong", RoughSpecularBase::NormalDistributionFunction::PHONG}, - {"as", RoughSpecularBase::NormalDistributionFunction::ASHIKHMIN_SHIRLEY} - }; - - auto found = StringToType.end(); - if (_property.type==SPropertyElementData::Type::STRING) - found = StringToType.find(_property.getProperty()); - if (found==StringToType.end()) - { - error = true; - return; - } - state.distribution = found->second; - } - }); - }; -#define TRANSMISSIVE_TYPES AllDielectric,AllPlastic,AllCoating -#define SPECULAR_TYPES TRANSMISSIVE_TYPES,AllConductor - auto processAlpha = SET_FLOAT(alpha, AllDiffuse,SPECULAR_TYPES); - auto processAlphaU = SET_FLOAT(alphaU, SPECULAR_TYPES); - auto processAlphaV = SET_FLOAT(alphaV, SPECULAR_TYPES); - auto processUseFastApprox = SET_PROPERTY_TEMPLATE(useFastApprox,SPropertyElementData::Type::BOOLEAN,AllDiffuse); - auto processIntIOR = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (is_any_of::value) - { - if (_property.type==SPropertyElementData::Type::FLOAT) - state.intIOR = _property.getProperty(); - else if (_property.type==SPropertyElementData::Type::STRING) - state.intIOR = TransmissiveBase::findIOR(_property.getProperty()); - else - error = true; - } - }); + ); \ + return success; \ + } \ +}} +#define SET_TEXTURE_CONSTRAINED_SIMPLE(MEMBER,CONSTRAINT,... ) SET_TEXTURE_CONSTRAINED(MEMBER,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = _texture; \ + success = true; \ +SET_TEXTURE_CONSTRAINED_END + + // TODO: store this somewhere outside a global + static const ProcessChildCallbackMap TextureCallbacks = + { + SET_TEXTURE_CONSTRAINED_SIMPLE(transmittance,std::is_same,DiffuseTransmitter), + SET_TEXTURE_CONSTRAINED_SIMPLE(reflectance,derived_from,AllDiffuse), + SET_TEXTURE_CONSTRAINED_SIMPLE(diffuseReflectance,impl::has_diffuseReflectance), + SET_TEXTURE_CONSTRAINED_SIMPLE(alpha,impl::has_alpha), + SET_TEXTURE_CONSTRAINED_SIMPLE(alphaU,impl::can_have_isotropicNDF), + SET_TEXTURE_CONSTRAINED_SIMPLE(alphaV,impl::can_have_isotropicNDF), + SET_TEXTURE_CONSTRAINED_SIMPLE(specularReflectance,impl::has_specularReflectance), + SET_TEXTURE_CONSTRAINED_SIMPLE(specularTransmittance,derived_from,TransmissiveBase), + SET_TEXTURE_CONSTRAINED_SIMPLE(sigmaA,derived_from,AllCoating), + SET_TEXTURE_CONSTRAINED("",is_any_of,BumpMap,NormalMap) + state.texture = _texture; + success = true; + SET_TEXTURE_CONSTRAINED_END, + SET_TEXTURE_CONSTRAINED_SIMPLE(weight,std::is_same,BlendBSDF), + SET_TEXTURE_CONSTRAINED_SIMPLE(opacity,std::is_same,Mask), + SET_TEXTURE_CONSTRAINED_SIMPLE(exponent,std::is_same,Phong) }; - auto processExtIOR = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; +#undef SET_TEXTURE_CONSTRAINED +#undef SET_TEXTURE_CONSTRAINED_SIMPLE - if constexpr (is_any_of::value) - { - if (_property.type==SPropertyElementData::Type::FLOAT) - state.extIOR = _property.getProperty(); - else if (_property.type==SPropertyElementData::Type::STRING) - state.extIOR = TransmissiveBase::findIOR(_property.getProperty()); - else - error = true; - } - }); - }; - auto processSpecularReflectance = SET_SPECTRUM(specularReflectance, SPECULAR_TYPES,Phong,Ward); - auto processDiffuseReflectance = SET_SPECTRUM(diffuseReflectance, AllDiffuse,AllPlastic,Phong,Ward); - auto processSpecularTransmittance = SET_SPECTRUM(specularTransmittance, TRANSMISSIVE_TYPES); -#undef SPECULAR_TYPES -#undef TRANSMISSIVE_TYPES - auto processMaterial = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) - { - if (_property.type == SPropertyElementData::Type::STRING) - conductor = AllConductor(_property.getProperty()); - else - error = true; - }/* - else - { - if constexpr (std::is_same::value) - { - } - }*/ - }); - }; - auto processEta = SET_SPECTRUM(eta, AllConductor); - auto processK = SET_SPECTRUM(k, AllConductor); - auto processExtEta = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) - { - if (_property.type==SPropertyElementData::Type::FLOAT) - state.extEta = _property.getProperty(); - else if (_property.type==SPropertyElementData::Type::STRING) - state.extEta = TransmissiveBase::findIOR(_property.getProperty()); - else - error = true; - } - }); - }; - auto processNonlinear = SET_PROPERTY_TEMPLATE(nonlinear, SPropertyElementData::Type::BOOLEAN, AllPlastic); - auto processThickness = SET_PROPERTY_TEMPLATE(thickness, SPropertyElementData::Type::FLOAT, AllCoating); - auto processSigmaA = SET_SPECTRUM(sigmaA, AllCoating); - auto processExponent = SET_FLOAT(exponent, Phong); - auto processVariant = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) + switch (_child->getType()) + { + case IElement::Type::TEXTURE: + { + auto found = TextureCallbacks.find(name); + if (found==TextureCallbacks.end()) + found = TextureCallbacks.find(""); + if (found==TextureCallbacks.end()) { - static const core::unordered_map StringToType = - { - {"ward", Ward::Type::WARD}, - {"ward-duer", Ward::Type::WARD_DUER}, - {"balanced", Ward::Type::BALANCED} - }; - auto found = StringToType.end(); - if (_property.type==SPropertyElementData::Type::STRING) - found = StringToType.find(_property.getProperty()); - if (found==StringToType.end()) - { - error = true; - return; - } - state.variant = found->second; + logger.log("No can have nested inside it with name \"%s\"!",system::ILogger::ELL_ERROR,name.c_str()); + return false; } - }); - }; - auto processWeights = [&]() -> void - { - dispatch([&](auto& state) -> void { - using state_type = std::remove_reference::type; - - if constexpr (std::is_same::value) + if (found->second(this,_child,logger)) { - if (_property.type!=SPropertyElementData::Type::STRING) - { - error = true; - return; - } - - std::istringstream sstr(_property.getProperty()); - std::string token; - while (std::getline(sstr, token, ',')) - state.weights[state.weightCount++] = std::stof(token); + logger.log( + "Failed to parse with name \"%s\" nested inside of type %d!", + system::ILogger::ELL_ERROR,name.c_str(),type + ); + return true; } - }); - }; - auto processWeight = SET_SPECTRUM(weight, BlendBSDF); - auto processOpacity = SET_SPECTRUM(opacity, Mask); - auto processTransmittance = SET_SPECTRUM(transmittance, DiffuseTransmitter); - // TODO: set HK and IRAWAN parameters - /* - auto processField = [&]() -> void - { - dispatch([&](auto& state) -> void + return true; + } + case IElement::Type::BSDF: { - using state_type = std::remove_reference::type; - if constexpr (std::is_same::value) + size_t maxChildCount = 0; { - if (_property.type != SPropertyElementData::Type::STRING) - { - error = true; - return; - } - auto found = StringToType.find(_property.svalue); - if (found!=StringToType.end()) - state.field = found->second; - else - state.field = FieldExtraction::Type::INVALID; - } - }); - }; - */ -#undef SET_FLOAT -#undef SET_SPECTRUM -#undef SET_PROPERTY_TEMPLATE - const core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> SetPropertyMap = - { - {"reflectance", processReflectance}, - {"distribution", processDistribution}, - {"alpha", processAlpha}, - {"alphaU", processAlphaU}, - {"alphaV", processAlphaV}, - {"useFastApprox", processUseFastApprox}, - {"intIOR", processIntIOR}, - {"extIOR", processExtIOR}, - {"specularReflectance", processSpecularReflectance}, - {"diffuseReflectance", processDiffuseReflectance}, - {"specularTransmittance", processSpecularTransmittance}, - {"material", processMaterial}, - {"eta", processEta}, - {"k", processK}, - {"extEta", processExtEta}, - {"nonlinear", processNonlinear}, - {"thickness", processThickness}, - {"sigmaA", processSigmaA}, - {"exponent", processExponent}, - {"variant", processVariant}, - {"weights", processWeights}, - {"weight", processWeight}, - {"opacity", processOpacity}, - {"transmittance", processTransmittance}//, - //{"sigmaS", processSigmaS}, - //{"sigmaT", processSigmaT}, - //{"albedo", processAlbedo}, - //{"filename", processFilename}, - //{"repeatU", processRepeatU}, - //{"repeatV", processRepeatV} - }; - - auto found = SetPropertyMap.find(_property.name); - if (found==SetPropertyMap.end()) - { - ParserLog::invalidXMLFileStructure("No BSDF can have such property set with name: "+_property.name); - return false; - } - - found->second(); - return !error; -} - - -bool CElementBSDF::processChildData(IElement* _child, const std::string& name) -{ - if (!_child) - return true; - - switch (_child->getType()) - { - case IElement::Type::TEXTURE: - { - auto _texture = static_cast(_child); - - bool error = false; - auto dispatch = [&](auto func) -> void - { - switch (type) + const auto* _this = this; + visit([&maxChildCount,_this](const auto& state)->void { - case CElementBSDF::Type::DIFFUSE: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHDIFFUSE: - func(diffuse); - break; - case CElementBSDF::Type::DIELECTRIC: - [[fallthrough]]; - case CElementBSDF::Type::THINDIELECTRIC: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHDIELECTRIC: - func(dielectric); - break; - case CElementBSDF::Type::CONDUCTOR: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHCONDUCTOR: - func(conductor); - break; - case CElementBSDF::Type::PLASTIC: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHPLASTIC: - func(plastic); - break; - case CElementBSDF::Type::COATING: - [[fallthrough]]; - case CElementBSDF::Type::ROUGHCOATING: - func(coating); - break; - case CElementBSDF::Type::BUMPMAP: - func(bumpmap); - break; - case CElementBSDF::Type::PHONG: - func(phong); - break; - case CElementBSDF::Type::WARD: - func(ward); - break; - case CElementBSDF::Type::MIXTURE_BSDF: - func(mixturebsdf); - break; - case CElementBSDF::Type::BLEND_BSDF: - func(blendbsdf); - break; - case CElementBSDF::Type::MASK: - func(mask); - break; - case CElementBSDF::Type::TWO_SIDED: - func(twosided); - break; - case CElementBSDF::Type::DIFFUSE_TRANSMITTER: - func(difftrans); - break; - //case CElementBSDF::Type::HANRAHAN_KRUEGER: - //func(hk); - //break; - //case CElementBSDF::Type::IRAWAN_MARSCHNER: - //func(irwan); - //break; - default: - error = true; - break; + using state_t = std::remove_reference_t; + if constexpr (std::is_base_of_v) + maxChildCount = state_t::MaxChildCount; } - }; -#define SET_TEXTURE(MEMBER, ... ) [&]() -> void { \ - dispatch([&](auto& state) -> void { \ - if constexpr (is_any_of::type,__VA_ARGS__>::value) \ - { \ - state. ## MEMBER.value.type = SPropertyElementData::Type::INVALID; \ - state. ## MEMBER.texture = _texture; \ - } \ - }); \ - } - - auto processReflectance = SET_TEXTURE(reflectance, AllDiffuse); -#define TRANSMISSIVE_TYPES AllDielectric,AllPlastic,AllCoating -#define SPECULAR_TYPES TRANSMISSIVE_TYPES,AllConductor - auto processAlpha = SET_TEXTURE(alpha, AllDiffuse,SPECULAR_TYPES); - auto processAlphaU = SET_TEXTURE(alphaU, SPECULAR_TYPES); - auto processAlphaV = SET_TEXTURE(alphaV, SPECULAR_TYPES); - auto processSpecularReflectance = SET_TEXTURE(specularReflectance, SPECULAR_TYPES,Phong,Ward); - auto processDiffuseReflectance = SET_TEXTURE(diffuseReflectance, AllDiffuse,AllPlastic,Phong,Ward); - auto processSpecularTransmittance = SET_TEXTURE(specularTransmittance, TRANSMISSIVE_TYPES); - auto processSigmaA = SET_TEXTURE(sigmaA, AllCoating); - auto processExponent = SET_TEXTURE(exponent, Phong); - auto processWeight = SET_TEXTURE(weight, BlendBSDF); - auto processOpacity = SET_TEXTURE(opacity, Mask); - auto processTransmittance = SET_TEXTURE(transmittance, DiffuseTransmitter); -#undef TRANSMISSIVE_TYPES -#undef SPECULAR_TYPES -#undef SET_TEXTURE - - const core::unordered_map, core::CaseInsensitiveHash, core::CaseInsensitiveEquals> SetChildMap = - { - {"reflectance", processReflectance}, - {"alpha", processAlpha}, - {"alphaU", processAlphaU}, - {"alphaV", processAlphaV}, - {"specularReflectance", processSpecularReflectance}, - {"diffuseReflectance", processDiffuseReflectance}, - {"specularTransmittance", processSpecularTransmittance}, - {"sigmaA", processSigmaA}, - {"exponent", processExponent}, - {"weight", processWeight}, - {"opacity", processOpacity}, - {"transmittance", processTransmittance}//, - //{"sigmaS", processSigmaS}, - //{"sigmaT", processSigmaT}, - //{"albedo", processAlbedo} - }; - - switch (type) - { - case Type::BUMPMAP: - bumpmap.texture = _texture; - break; - default: - { - auto found = SetChildMap.find(name); - if (found!=SetChildMap.end()) - found->second(); - else - { - _NBL_DEBUG_BREAK_IF(true); - ParserLog::invalidXMLFileStructure("No BSDF can have such property set with name: " + name); - return false; - } - } - break; - } - - if (error) - return false; + ); } - break; - case IElement::Type::BSDF: + if (meta_common.childCount(_child); - switch (type) - { - case Type::COATING: - [[fallthrough]]; - case Type::ROUGHCOATING: - if (coating.childCount < AllCoating::MaxChildCount) - coating.bsdf[coating.childCount++] = _bsdf; - else - return false; - break; - case Type::BUMPMAP: - if (bumpmap.childCount < BumpMap::MaxChildCount) - bumpmap.bsdf[bumpmap.childCount++] = _bsdf; - else - return false; - break; - case Type::MIXTURE_BSDF: - if (mixturebsdf.childCount < MixtureBSDF::MaxChildCount) - mixturebsdf.bsdf[mixturebsdf.childCount++] = _bsdf; - else - return false; - break; - case Type::BLEND_BSDF: - if (blendbsdf.childCount < BlendBSDF::MaxChildCount) - blendbsdf.bsdf[blendbsdf.childCount++] = _bsdf; - else - return false; - break; - case Type::MASK: - if (mask.childCount < Mask::MaxChildCount) - mask.bsdf[mask.childCount++] = _bsdf; - else - return false; - break; - case Type::TWO_SIDED: - if (twosided.childCount < TwoSided::MaxChildCount) - twosided.bsdf[twosided.childCount++] = _bsdf; - else - return false; - break; - default: - return false; - break; - } + twosided.bsdf[twosided.childCount++] = _bsdf; + return true; } - break; + logger.log(" cannot have more than %d other s nested inside it!",system::ILogger::ELL_ERROR,type,maxChildCount); + return false; + } default: + logger.log("Unsupported <%s> nested inside only and are allowed!",system::ILogger::ELL_ERROR,_child->getLogName()); return false; - break; } return true; } -bool CElementBSDF::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* metadata) +bool CElementBSDF::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt_ptr logger) { NBL_EXT_MITSUBA_LOADER_ELEMENT_INVALID_TYPE_CHECK(true); @@ -816,6 +421,4 @@ bool CElementBSDF::onEndTag(asset::IAssetLoader::IAssetLoaderOverride* _override return true; } -} -} } \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp b/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp index 8b0b34e3e4..89ca3c00e2 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmissionProfile.cpp @@ -17,7 +17,12 @@ auto CElementEmissionProfile::compAddPropertyMap() -> AddPropertyMap retval; - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(filename,STRING); + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("filename",STRING) + { + setLimitedString("filename",_this->filename,_property,logger); + return true; + } + }); NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("normalization",STRING) { const auto normalizeS = std::string(_property.svalue); diff --git a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp index c7115dfcef..eb9aa154dd 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp @@ -67,47 +67,49 @@ auto CElementEmitter::compAddPropertyMap() -> AddPropertyMap } }); - // spectrum setting -#define ADD_SPECTRUM(MEMBER,CONSTRAINT,...) { \ - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,FLOAT,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ - state. ## MEMBER.x = state. ## MEMBER.y = state. ## MEMBER.z = _property.getProperty(); \ - success = true; \ - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,RGB,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ - state. ## MEMBER = _property.getProperty(); \ - success = true; \ - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,SRGB,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ - state. ## MEMBER = _property.getProperty(); \ - success = true; \ - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,SPECTRUM,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ - state. ## MEMBER = _property.getProperty(); \ - success = true; \ - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ - } - // base NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(samplingWeight,FLOAT,derived_from,SampledEmitter); +// spectrum setting +#define ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(MEMBER,CONSTRAINT,...) { \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,FLOAT,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER.x = state. ## MEMBER.y = state. ## MEMBER.z = _property.getProperty(); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,RGB,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = _property.getProperty(); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,SRGB,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = _property.getProperty(); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(MEMBER,SPECTRUM,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + state. ## MEMBER = _property.getProperty(); \ + success = true; \ + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; \ +} + // delta - ADD_SPECTRUM(intensity,derived_from,DeltaDistributionEmitter); + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(intensity,derived_from,DeltaDistributionEmitter); // point covered by delta // non zero solid angle - ADD_SPECTRUM(radiance,derived_from,SolidAngleEmitter); + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(radiance,derived_from,SolidAngleEmitter); // area covered by solid angle // directional - ADD_SPECTRUM(irradiance,std::is_same,Directional); + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(irradiance,std::is_same,Directional); // collimated - ADD_SPECTRUM(power,std::is_same,Collimated); + ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(power,std::is_same,Collimated); + +#undef ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED // environment map NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("filename",STRING,std::is_same,EnvMap) { - setLimitedString("filename",_this->envmap.filename,std::move(_property),logger); return true; + setLimitedString("filename",_this->envmap.filename,_property,logger); return true; } ); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(scale,FLOAT,std::is_same,EnvMap); diff --git a/src/nbl/ext/MitsubaLoader/CElementFilm.cpp b/src/nbl/ext/MitsubaLoader/CElementFilm.cpp index d558975998..fec400829f 100644 --- a/src/nbl/ext/MitsubaLoader/CElementFilm.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementFilm.cpp @@ -103,22 +103,22 @@ auto CElementFilm::compAddPropertyMap() -> AddPropertyMap NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("variable",STRING,std::is_same,M) { - setLimitedString("variable",_this->outputFilePath,std::move(_property),logger); return true; + setLimitedString("variable",_this->outputFilePath,_property,logger); return true; } ); NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("outputFilePath",STRING) { - setLimitedString("outputFilePath",_this->outputFilePath,std::move(_property),logger); return true; + setLimitedString("outputFilePath",_this->outputFilePath,_property,logger); return true; } }); NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("bloomFilePath",STRING) { - setLimitedString("bloomFilePath",_this->denoiserTonemapperArgs,std::move(_property),logger); return true; + setLimitedString("bloomFilePath",_this->denoiserTonemapperArgs,_property,logger); return true; } }); NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("tonemapper",STRING) { - setLimitedString("tonemapper",_this->denoiserTonemapperArgs,std::move(_property),logger); return true; + setLimitedString("tonemapper",_this->denoiserTonemapperArgs,_property,logger); return true; } }); diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index fa7e89759d..9854912fc7 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -166,16 +166,15 @@ auto CElementIntegrator::compAddPropertyMap() -> AddPropertyMapfield.undefined = _property; // TODO: redo + _this->field.undefined = _property; return true; } ); NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("undefined",SPECTRUM,std::is_same,FieldExtraction) { - _this->field.undefined = _property; // TODO: redo + _this->field.undefined = _property; return true; } ); diff --git a/src/nbl/ext/MitsubaLoader/CElementShape.cpp b/src/nbl/ext/MitsubaLoader/CElementShape.cpp index c6db773afa..03b50f3f7c 100644 --- a/src/nbl/ext/MitsubaLoader/CElementShape.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementShape.cpp @@ -34,7 +34,7 @@ auto CElementShape::compAddPropertyMap() -> AddPropertyMap // LoadedFromFileBase NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("filename",STRING,derived_from,LoadedFromFileBase) { - setLimitedString("filename",_this->serialized.filename,std::move(_property),logger); return true; + setLimitedString("filename",_this->serialized.filename,_property,logger); return true; } ); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(faceNormals,BOOLEAN,derived_from,LoadedFromFileBase); diff --git a/src/nbl/ext/MitsubaLoader/CElementTexture.cpp b/src/nbl/ext/MitsubaLoader/CElementTexture.cpp index 0add6ac3ca..213a52cba3 100644 --- a/src/nbl/ext/MitsubaLoader/CElementTexture.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementTexture.cpp @@ -37,7 +37,7 @@ auto CElementTexture::compAddPropertyMap() -> AddPropertyMap // bitmap NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("filename",STRING,std::is_same,Bitmap) { - setLimitedString("filename",_this->bitmap.filename,std::move(_property),logger); return true; + setLimitedString("filename",_this->bitmap.filename,_property,logger); return true; } ); // special @@ -85,8 +85,7 @@ auto CElementTexture::compAddPropertyMap() -> AddPropertyMap NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(voffset,FLOAT,std::is_same,Bitmap); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(uscale,FLOAT,std::is_same,Bitmap); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(vscale,FLOAT,std::is_same,Bitmap); - NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(vscale,FLOAT,std::is_same,Bitmap); - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("filename",STRING,std::is_same,Bitmap) + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("channel",STRING,std::is_same,Bitmap) { static const core::unordered_map StringToType = { diff --git a/src/nbl/ext/MitsubaLoader/CMakeLists.txt b/src/nbl/ext/MitsubaLoader/CMakeLists.txt index ef3f326ad5..0ec8ba0685 100644 --- a/src/nbl/ext/MitsubaLoader/CMakeLists.txt +++ b/src/nbl/ext/MitsubaLoader/CMakeLists.txt @@ -34,9 +34,9 @@ set(NBL_EXT_MITSUBA_LOADER_SRC CElementSampler.cpp CElementTransform.cpp CElementShape.cpp -# CElementBSDF.cpp -# CElementTexture.cpp -# CElementEmitter.cpp + CElementBSDF.cpp + CElementTexture.cpp + CElementEmitter.cpp CElementEmissionProfile.cpp ParserUtil.cpp CSerializedLoader.cpp diff --git a/src/nbl/ext/MitsubaLoader/ElementMacros.h b/src/nbl/ext/MitsubaLoader/ElementMacros.h index ab88a11cb4..3bbb997e6e 100644 --- a/src/nbl/ext/MitsubaLoader/ElementMacros.h +++ b/src/nbl/ext/MitsubaLoader/ElementMacros.h @@ -13,6 +13,7 @@ // when you know that there's a member of `this_t` with identifier equal to NAME #define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(NAME,PROP_TYPE) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY(#NAME,PROP_TYPE) \ {\ + static_assert(SNamedPropertyElement::Type::PROP_TYPE!=SNamedPropertyElement::Type::STRING); \ _this->NAME = _property.getProperty(); \ return true; \ } \ @@ -29,7 +30,7 @@ #define NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED(#NAME,PROP_TYPE,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ {\ bool success = false; \ - _this->visit([&_property,&success](auto& state)->void \ + _this->visit([&_property,logger,&success](auto& state)->void \ { \ if constexpr (CONSTRAINT __VA_OPT__(,) __VA_ARGS__>::value) \ { @@ -45,6 +46,7 @@ // This it to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED` what `NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY` is to `NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY` // So basically you know the member is the same across the constraint filtered types #define NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT,...) NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(NAME,PROP_TYPE,CONSTRAINT __VA_OPT__(,) __VA_ARGS__) \ + static_assert(SNamedPropertyElement::Type::PROP_TYPE!=SNamedPropertyElement::Type::STRING); \ state. ## NAME = _property.getProperty(); \ success = true; \ NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 6f1af1e301..6b7a3f5f66 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -12,9 +12,9 @@ #include "nbl/ext/MitsubaLoader/CElementShape.h" #include "nbl/ext/MitsubaLoader/CElementTransform.h" //#include "nbl/ext/MitsubaLoader/CElementAnimation.h" -//#include "nbl/ext/MitsubaLoader/CElementBSDF.h" +#include "nbl/ext/MitsubaLoader/CElementBSDF.h" //#include "nbl/ext/MitsubaLoader/CElementTexture.h" -//#include "nbl/ext/MitsubaLoader/CElementEmitter.h" +#include "nbl/ext/MitsubaLoader/CElementEmitter.h" #include "nbl/ext/MitsubaLoader/CElementEmissionProfile.h" #include "expat/lib/expat.h" @@ -420,9 +420,9 @@ ParserManager::ParserManager() : propertyElements({ {"shape", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, {"transform", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, // {"animation", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, -// {"bsdf", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, -// {"texture", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, -// {"emitter", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, + {"bsdf", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, + {"texture", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, + {"emitter", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, {"emissionprofile", {.create=ParserManager::CreateElement::__call,.retvalGoesOnStack=true}}, {"alias", {.create=processAlias,.retvalGoesOnStack=true}}, {"ref", {.create=processRef,.retvalGoesOnStack=true}} @@ -434,6 +434,9 @@ ParserManager::ParserManager() : propertyElements({ CElementSampler::compAddPropertyMap(), CElementShape::compAddPropertyMap(), CElementTransform::compAddPropertyMap(), + CElementBSDF::compAddPropertyMap(), + CElementTexture::compAddPropertyMap(), + CElementEmitter::compAddPropertyMap(), CElementEmissionProfile::compAddPropertyMap() }) { } From a70a863790c830284ca669c487937ffd1d3a26b1 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz <34793522+AnastaZIuk@users.noreply.github.com> Date: Tue, 23 Dec 2025 18:57:17 +0100 Subject: [PATCH 319/472] update include paths for debug draw ext --- src/nbl/ext/DebugDraw/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 25e4be718a..dfa4a7624f 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -43,6 +43,7 @@ set(JSON [=[ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS + -I "${NBL_ROOT_PATH}/include" # a workaround due to debug draw ext common header which is not part of Nabla builtin archive -I "${CMAKE_CURRENT_SOURCE_DIR}" -T lib_${SM} ) From 4a14db706273cc85f8481b1a3bce481302fc6159 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 23 Dec 2025 19:22:11 +0100 Subject: [PATCH 320/472] update docker/compiler-explorer submodule --- docker/compiler-explorer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/compiler-explorer b/docker/compiler-explorer index 265166a574..27318d12f8 160000 --- a/docker/compiler-explorer +++ b/docker/compiler-explorer @@ -1 +1 @@ -Subproject commit 265166a574c3b0dae59e57d6d8605f0fa37c31e1 +Subproject commit 27318d12f88cf34bd0444101e6e260b12f5063a0 From 1badc7a3a911c88e48227c017af65a1c46e6db39 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 23 Dec 2025 22:09:10 +0100 Subject: [PATCH 321/472] fix mount logic for debug draw ext, perform tests on builtins on/off --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 34 +++++++++++++-------------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 9442e24c28..126731f425 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -91,7 +91,7 @@ namespace nbl::ext::debug_draw static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, DrawMode mode = ADM_DRAW_BATCH); //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included - static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const core::string& spvPath, const std::string_view archiveAlias = ""); + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index f7706a15c8..ca82da688a 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -5,7 +5,7 @@ #include "nbl/ext/DebugDraw/CDrawAABB.h" #ifdef NBL_EMBED_BUILTIN_RESOURCES -#include "nbl/ext/debug_draw/builtin/CArchive.h" +#include "nbl/ext/debug_draw/builtin/build/CArchive.h" #endif #include "nbl/ext/DebugDraw/builtin/build/spirv/keys.hpp" @@ -75,32 +75,30 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) return core::smart_refctd_ptr(new DrawAABB(std::move(constructorParams))); } -// note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly -constexpr std::string_view NBL_ARCHIVE_ENTRY = NBL_DEBUG_DRAW_HLSL_MOUNT_POINT; +// extension data mount alias +constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/DebugDraw"; -const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const core::string& spvPath, const std::string_view archiveAlias) +const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) { assert(system); if (!system) return nullptr; - if (system->exists(path(NBL_ARCHIVE_ENTRY) / spvPath.c_str(), {})) - { - logger->log("CDrawAABB .spv directory is already mounted!", ILogger::ELL_WARNING); + // the key is deterministic, we are validating presence of required .spv + const auto composed = path(archiveAlias.data()) / nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(device); + if (system->exists(composed, {})) return nullptr; - } // extension should mount everything for you, regardless if content goes from virtual filesystem // or disk directly - and you should never rely on application framework to expose extension data -#ifdef NBL_EMBED_BUILTIN_RESOURCES + #ifdef NBL_EMBED_BUILTIN_RESOURCES auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); - system->mount(smart_refctd_ptr(archive), archiveAlias.data()); -#else - auto archive = make_smart_refctd_ptr(std::move(NBL_ARCHIVE_ENTRY), smart_refctd_ptr(logger), system); - system->mount(smart_refctd_ptr(archive), archiveAlias.data()); -#endif + #else + auto archive = make_smart_refctd_ptr(std::string_view(NBL_DEBUG_DRAW_HLSL_MOUNT_POINT), smart_refctd_ptr(logger), system); + #endif + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); return smart_refctd_ptr(archive); } @@ -108,14 +106,13 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet { system::logger_opt_ptr logger = params.utilities->getLogger(); auto system = smart_refctd_ptr(params.assetManager->getSystem()); - - const auto key = nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(params.utilities->getLogicalDevice()); - mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), key, NBL_ARCHIVE_ENTRY); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); auto getShader = [&](const core::string& key)->smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = params.utilities->getLogger(); - lp.workingDirectory = NBL_DEBUG_DRAW_HLSL_MOUNT_POINT; + lp.workingDirectory = NBL_EXT_MOUNT_ENTRY; auto bundle = params.assetManager->getAsset(key.c_str(), lp); const auto contents = bundle.getContents(); @@ -135,6 +132,7 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet return IAsset::castDown(contents[0]); }; + const auto key = nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(device); smart_refctd_ptr unifiedShader = getShader(key); if (!unifiedShader) { From ab505fe13b194f808127da444877431adf5bf58d Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 23 Dec 2025 23:01:09 +0100 Subject: [PATCH 322/472] fix bugs (most importantly handling) --- src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp | 4 ++-- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index 9854912fc7..c26b07889d 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -60,8 +60,8 @@ auto CElementIntegrator::compAddPropertyMap() -> AddPropertyMapsecond.retvalGoesOnStack) return; - if (created.element && created.name.size()) - session->handles[created.name] = created.element; + if (created.element && created.element->id.size()) + session->handles[created.element->id] = created.element; session->elements.push(std::move(created)); } From 078899fcdb32e1e2de16ba6c98649740f6d25482 Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 24 Dec 2025 01:24:06 +0100 Subject: [PATCH 323/472] fix all MitsubaLoader bugs which came up during parsing tests --- examples_tests | 2 +- include/nbl/ext/MitsubaLoader/CElementBSDF.h | 2 +- .../MitsubaLoader/CElementEmissionProfile.h | 17 ++++------------- .../nbl/ext/MitsubaLoader/CElementIntegrator.h | 14 ++++++++------ src/nbl/ext/MitsubaLoader/CElementBSDF.cpp | 11 ++++++----- src/nbl/ext/MitsubaLoader/CElementEmitter.cpp | 2 +- src/nbl/ext/MitsubaLoader/CElementFilm.cpp | 3 +++ .../ext/MitsubaLoader/CElementIntegrator.cpp | 5 +++-- src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 6 +++--- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 18 ++++++++++++------ 10 files changed, 42 insertions(+), 38 deletions(-) diff --git a/examples_tests b/examples_tests index d20b9c67bf..8333763f44 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit d20b9c67bf5ef5c4d782a13709a78ce59b24e1e4 +Subproject commit 8333763f44407f45dfc47d5ae4b1bed3f6d8fb9e diff --git a/include/nbl/ext/MitsubaLoader/CElementBSDF.h b/include/nbl/ext/MitsubaLoader/CElementBSDF.h index 432064b847..4729c36dc5 100644 --- a/include/nbl/ext/MitsubaLoader/CElementBSDF.h +++ b/include/nbl/ext/MitsubaLoader/CElementBSDF.h @@ -127,7 +127,7 @@ class CElementBSDF : public IElement { alpha = defaultAlpha; } - virtual inline ~RoughSpecularBase() {} + inline ~RoughSpecularBase() {} inline RoughSpecularBase& operator=(const RoughSpecularBase& other) { diff --git a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h index ecf24c9f63..84927497df 100644 --- a/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h +++ b/include/nbl/ext/MitsubaLoader/CElementEmissionProfile.h @@ -14,6 +14,8 @@ namespace nbl::ext::MitsubaLoader struct CElementEmissionProfile final : public IElement { + constexpr static inline uint16_t MaxPathLen = 1024u; + static AddPropertyMap compAddPropertyMap(); inline CElementEmissionProfile(const char* id) : IElement(id), normalization(EN_NONE), flatten(0.0) /*no blending by default*/ {} @@ -22,23 +24,12 @@ struct CElementEmissionProfile final : public IElement { operator=(other); } - inline CElementEmissionProfile(CElementEmissionProfile&& other) : IElement("") - { - operator=(std::move(other)); - } inline ~CElementEmissionProfile() {} inline CElementEmissionProfile& operator=(const CElementEmissionProfile& other) { IElement::operator=(other); - filename = other.filename; - return *this; - } - - inline CElementEmissionProfile& operator=(CElementEmissionProfile&& other) - { - IElement::operator=(std::move(other)); - std::swap(filename, other.filename); + memcpy(filename,other.filename,MaxPathLen); return *this; } @@ -59,7 +50,7 @@ struct CElementEmissionProfile final : public IElement }; - std::string filename; // TODO: test destructor runs + char filename[MaxPathLen]; E_NORMALIZE normalization; // how much we flatten the profile towards a uniform distribution float flatten; diff --git a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h index 875bb08378..7ed5f0cbb4 100644 --- a/include/nbl/ext/MitsubaLoader/CElementIntegrator.h +++ b/include/nbl/ext/MitsubaLoader/CElementIntegrator.h @@ -88,10 +88,12 @@ class CElementIntegrator final : public IElement }; struct PathTracing : MonteCarloTracingBase,EmitterHideableBase { - constexpr static inline Type VariantType = Type::PATH; - bool strictNormals = false; }; + struct UniDirectionalPathTracing final : PathTracing + { + constexpr static inline Type VariantType = Type::PATH; + }; struct SimpleVolumetricPathTracing : PathTracing { constexpr static inline Type VariantType = Type::VOL_PATH_SIMPLE; @@ -100,7 +102,7 @@ class CElementIntegrator final : public IElement { constexpr static inline Type VariantType = Type::VOL_PATH; }; - struct BiDirectionalPathTracing final : MonteCarloTracingBase + struct BiDirectionalPathTracing final : PathTracing { constexpr static inline Type VariantType = Type::BDPT; @@ -252,9 +254,9 @@ class CElementIntegrator final : public IElement using variant_list_t = core::type_list< AmbientOcclusion, DirectIllumination, + UniDirectionalPathTracing, SimpleVolumetricPathTracing, ExtendedVolumetricPathTracing, - PathTracing, BiDirectionalPathTracing, PhotonMapping, ProgressivePhotonMapping, @@ -377,7 +379,7 @@ class CElementIntegrator final : public IElement case Type::IRR_CACHE: [[fallthrough]]; case Type::MULTI_CHANNEL: - if (_child->getType() != IElement::Type::INTEGRATOR) + if (_child->getType() == IElement::Type::INTEGRATOR) break; [[fallthrough]]; default: @@ -411,9 +413,9 @@ class CElementIntegrator final : public IElement { AmbientOcclusion ao; DirectIllumination direct; + PathTracing path; SimpleVolumetricPathTracing volpath_simple; ExtendedVolumetricPathTracing volpath; - PathTracing path; BiDirectionalPathTracing bdpt; PhotonMapping photonmapper; ProgressivePhotonMapping ppm; diff --git a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp index 3790e89748..6c91e4dded 100644 --- a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp @@ -98,8 +98,9 @@ auto CElementBSDF::compAddPropertyMap() -> AddPropertyMap // conductor ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(eta,derived_from,AllConductor); ADD_VARIANT_SPECTRUM_PROPERTY_CONSTRAINED(k,derived_from,AllConductor); + NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(extEta,FLOAT,derived_from,AllConductor); // adding twice cause two property types are allowed - NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(extEta,FLOAT,derived_from,AllConductor) + NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED(extEta,STRING,derived_from,AllConductor) state.extEta = TransmissiveBase::findIOR(_property.getProperty(),logger); success = true; NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_VARIANT_PROPERTY_CONSTRAINED_END; @@ -348,7 +349,7 @@ SET_TEXTURE_CONSTRAINED_END SET_TEXTURE_CONSTRAINED_SIMPLE(specularReflectance,impl::has_specularReflectance), SET_TEXTURE_CONSTRAINED_SIMPLE(specularTransmittance,derived_from,TransmissiveBase), SET_TEXTURE_CONSTRAINED_SIMPLE(sigmaA,derived_from,AllCoating), - SET_TEXTURE_CONSTRAINED("",is_any_of,BumpMap,NormalMap) + SET_TEXTURE_CONSTRAINED(,is_any_of,BumpMap,NormalMap) state.texture = _texture; success = true; SET_TEXTURE_CONSTRAINED_END, @@ -371,13 +372,13 @@ SET_TEXTURE_CONSTRAINED_END logger.log("No can have nested inside it with name \"%s\"!",system::ILogger::ELL_ERROR,name.c_str()); return false; } - if (found->second(this,_child,logger)) + if (!found->second(this,_child,logger)) { logger.log( "Failed to parse with name \"%s\" nested inside of type %d!", system::ILogger::ELL_ERROR,name.c_str(),type ); - return true; + return false; } return true; } @@ -397,7 +398,7 @@ SET_TEXTURE_CONSTRAINED_END if (meta_common.childCount(_child); - twosided.bsdf[twosided.childCount++] = _bsdf; + meta_common.bsdf[meta_common.childCount++] = _bsdf; return true; } logger.log(" cannot have more than %d other s nested inside it!",system::ILogger::ELL_ERROR,type,maxChildCount); diff --git a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp index eb9aa154dd..831de4506e 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp @@ -60,7 +60,7 @@ auto CElementEmitter::compAddPropertyMap() -> AddPropertyMap const auto lookAt = hlsl::transpose(lookAtGLM); // turn lookat into a rotation matrix const auto rotation = hlsl::inverse(hlsl::float32_t3x3(lookAt)); - _NBL_DEBUG_BREAK_IF(true); // no idea if matrix is correct + //_NBL_DEBUG_BREAK_IF(true); // no idea if matrix is correct, looks okay for (auto r=0; r<3; r++) _this->transform.matrix[r].xyz = rotation[r]; return true; diff --git a/src/nbl/ext/MitsubaLoader/CElementFilm.cpp b/src/nbl/ext/MitsubaLoader/CElementFilm.cpp index fec400829f..c85aa2082d 100644 --- a/src/nbl/ext/MitsubaLoader/CElementFilm.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementFilm.cpp @@ -126,8 +126,11 @@ auto CElementFilm::compAddPropertyMap() -> AddPropertyMap NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(cascadeLuminanceBase,FLOAT); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(cascadeLuminanceStart,FLOAT); + const auto floatPropType = SNamedPropertyElement::Type::FLOAT; NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(denoiserBloomScale,FLOAT); + retval.byPropertyType[floatPropType]["bloomScale"] = retval.byPropertyType[floatPropType]["denoiserBloomScale"]; // alias NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(denoiserBloomIntensity,FLOAT); + retval.byPropertyType[floatPropType]["bloomIntensity"] = retval.byPropertyType[floatPropType]["denoiserBloomIntensity"]; // alias NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_PROPERTY(envmapRegularizationFactor,FLOAT); diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index c26b07889d..f99b4487f9 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -212,11 +212,12 @@ bool CElementIntegrator::onEndTag(CMitsubaMetadata* metadata, system::logger_opt { } - if (metadata->m_global.m_integrator.type!=Type::INVALID) + /* Nested integrators rely on closing branch to post-order visit here to set the root one as the main one in the metadata + if (metadata->m_global.m_integrator.type!=Type::INVALID) // D { invalidXMLFileStructure(logger,"already specified an integrator, NOT overwriting."); return true; - } + }*/ metadata->m_global.m_integrator = *this; return true; diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index df9d8c776e..a852978d75 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -261,11 +261,11 @@ bool CMitsubaLoader::isALoadableFileFormat(system::IFile* _file, const system::l if (fileSizeread(success,tempBuff,0,pos); + _file->read(success,tempBuff,pos,3); if (!success) return false; if (tempBuff[0] == 0xEFu && tempBuff[1] == 0xBBu && tempBuff[2] == 0xBFu) diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index 934fd82506..fb54fab0ef 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -66,7 +66,8 @@ bool ParserManager::SessionContext::parse(IFile* _file) XML_SetUserData(parser,&ctx); const size_t size = _file->getSize(); - const char* buff = reinterpret_cast(const_cast(_file)->getMappedPointer()); + const void* const origPtr = const_cast(_file)->getMappedPointer(); + const char* buff = reinterpret_cast(origPtr); if (!buff) { buff = reinterpret_cast(_NBL_ALIGNED_MALLOC(size,4096u)); @@ -79,7 +80,7 @@ bool ParserManager::SessionContext::parse(IFile* _file) } } XML_Status parseStatus = XML_Parse(parser,buff,size,0); - if (_file->getMappedPointer()!=buff) + if (origPtr!=buff) _NBL_ALIGNED_FREE(const_cast(buff)); XML_ParserFree(parser); @@ -225,7 +226,9 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts nameIt = typeMap.find(""); if (nameIt==typeMap.end()) { - session->invalidXMLFileStructure("There's no Property named \""+property.name+"\" of Type (TODO) supported by ElementType (TODO)"); + core::string msg = "There's no Property named \""+property.name+"\" of Type "; + msg += property.type+" supported by <"+element->getLogName()+">"; + session->invalidXMLFileStructure(msg); return; } const auto& callback = nameIt->second; @@ -233,7 +236,10 @@ void ParserManager::XMLContext::parseElement(const char* _el, const char** _atts if constexpr (!std::is_same_v) if (!callback.allowedVariantTypes.empty() && std::find(callback.allowedVariantTypes.begin(),callback.allowedVariantTypes.end(),typedElement->type)==callback.allowedVariantTypes.end()) { - session->invalidXMLFileStructure("There's no Property named \""+property.name+"\" of Type(TODO) not supported on ElementType(TODO) of Variant(TODO)"); + core::string msg = "There's no Property named \""+property.name+"\" of Type "; + msg += property.type+" supported by <"+element->getLogName()+"> of Variant "; + msg += std::to_string(typedElement->type); + session->invalidXMLFileStructure(msg); return; } callback(typedElement,std::move(property),session->params->logger); @@ -311,7 +317,7 @@ void ParserManager::XMLContext::onEnd(const char* _el) if (parent && !parent->processChildData(element.element,element.name,session->params->logger)) { if (element.element) - killParseWithError(element.element->getLogName()+" could not processChildData with name: "+element.name); + killParseWithError("<"+parent->getLogName()+ "> could not processChildData of <"+element.element->getLogName()+"> with name: "+element.name); else killParseWithError("Failed to add a nullptr child with name: "+element.name); } @@ -384,7 +390,7 @@ struct ParserManager::CreateElement auto found = StringToTypeMap.find(type); if (found==StringToTypeMap.end()) { - ctx->invalidXMLFileStructure("unknown type"); + ctx->invalidXMLFileStructure(core::string("unknown type in <")+std::to_string((uint8_t)Element::ElementType)+" type=\""+type+"\">"); return {}; } From 846a38311936409fa73b12b55de9e6cc1870dd3b Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 24 Dec 2025 02:01:58 +0100 Subject: [PATCH 324/472] oopsie pushed a dangling submodule pointer last commit --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 8333763f44..513c5a7365 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8333763f44407f45dfc47d5ae4b1bed3f6d8fb9e +Subproject commit 513c5a736539086c97227643c62c4fbcf2eafa1a From aad78bba1aa58fb403ce57a9546dc5b2060ec8e6 Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 24 Dec 2025 02:13:43 +0100 Subject: [PATCH 325/472] fix a small typo in CElementBSDF::MixtureBSDF --- examples_tests | 2 +- src/nbl/ext/MitsubaLoader/CElementBSDF.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 513c5a7365..3db6e3cef4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 513c5a736539086c97227643c62c4fbcf2eafa1a +Subproject commit 3db6e3cef42467eb3fda53f59b78264e43c31ba8 diff --git a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp index 6c91e4dded..4117ca5f3a 100644 --- a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp @@ -141,7 +141,7 @@ auto CElementBSDF::compAddPropertyMap() -> AddPropertyMap std::string token; while (std::getline(sstr,token,',')) { - if (state.weightCount) + if (state.weightCount>=MixtureBSDF::MaxChildCount) { logger.log(" MaxChildCount of %d exceeded!",system::ILogger::ELL_ERROR,MetaBSDF::MaxChildCount); break; From 3e203b12d41ded4e03fe0c98bd6798e69993bc74 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 23 Dec 2025 22:23:06 -0300 Subject: [PATCH 326/472] Fixed emulated vector template resolution ambiguity --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 198 ++++++++---------- 1 file changed, 85 insertions(+), 113 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 25b033c30e..d0c728a8c7 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -134,14 +134,6 @@ NBL_CONSTEXPR_FUNC this_t operator OP() NBL_CONST_MEMBER_FUNC \ } #define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator OP (component_t val) NBL_CONST_MEMBER_FUNC \ -{\ - this_t output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP val);\ - return output;\ -}\ NBL_CONSTEXPR_FUNC this_t operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ @@ -183,6 +175,14 @@ NBL_CONSTEXPR_FUNC vector operator OP (vector;\ using component_t = ComponentType;\ +template\ +NBL_CONSTEXPR_STATIC this_t create(vector other)\ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, component_t::create(other[i]));\ + return output;\ +}\ NBL_CONSTEXPR_STATIC this_t create(this_t other)\ {\ CRTP output;\ @@ -209,6 +209,43 @@ NBL_CONSTEXPR_STATIC this_t create(vector other)\ return output;\ } +#define DEFINE_OPERATORS_FOR_TYPE(...)\ +NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + _static_cast(val));\ +\ + return output;\ +}\ +\ +NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) - _static_cast(val));\ +\ + return output;\ +}\ +\ +NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) * _static_cast(val));\ +\ + return output;\ +}\ +\ +NBL_CONSTEXPR_FUNC this_t operator/(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) / _static_cast(val));\ +\ + return output;\ +}\ + // Fundamental, integral template NBL_PARTIAL_REQ_TOP(is_fundamental_v && concepts::IntegralLikeScalar) struct emulated_vector&& concepts::IntegralLikeScalar) > : CRTP @@ -232,6 +269,15 @@ struct emulated_vector) NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) + + DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) + DEFINE_OPERATORS_FOR_TYPE(uint16_t) + DEFINE_OPERATORS_FOR_TYPE(uint32_t) + DEFINE_OPERATORS_FOR_TYPE(uint64_t) + DEFINE_OPERATORS_FOR_TYPE(int16_t) + DEFINE_OPERATORS_FOR_TYPE(int32_t) + DEFINE_OPERATORS_FOR_TYPE(int64_t) }; // Fundamental, not integral @@ -253,6 +299,15 @@ struct emulated_vector) NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) + + DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) + DEFINE_OPERATORS_FOR_TYPE(uint16_t) + DEFINE_OPERATORS_FOR_TYPE(uint32_t) + DEFINE_OPERATORS_FOR_TYPE(uint64_t) + DEFINE_OPERATORS_FOR_TYPE(int16_t) + DEFINE_OPERATORS_FOR_TYPE(int32_t) + DEFINE_OPERATORS_FOR_TYPE(int64_t) }; // Not fundamental, integral @@ -278,6 +333,20 @@ struct emulated_vector) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) + + DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(float16_t) + DEFINE_OPERATORS_FOR_TYPE(float32_t) + DEFINE_OPERATORS_FOR_TYPE(float64_t) + DEFINE_OPERATORS_FOR_TYPE(uint16_t) + DEFINE_OPERATORS_FOR_TYPE(uint32_t) + DEFINE_OPERATORS_FOR_TYPE(uint64_t) + DEFINE_OPERATORS_FOR_TYPE(int16_t) + DEFINE_OPERATORS_FOR_TYPE(int32_t) + DEFINE_OPERATORS_FOR_TYPE(int64_t) }; // Not fundamental, not integral @@ -299,107 +368,12 @@ struct emulated_vector) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) -}; - -#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM -#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM -#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR -#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR -#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR -#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR -#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR - -// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- - -#define DEFINE_OPERATORS_FOR_TYPE(...)\ -NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ -{\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + component_t::create(val));\ -\ - return output;\ -}\ -\ -NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ -{\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) - component_t::create(val));\ -\ - return output;\ -}\ -\ -NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ -{\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) * component_t::create(val));\ -\ - return output;\ -}\ -\ - - -template -struct emulated_vector, CRTP> : CRTP -{ - using component_t = emulated_float64_t; - using this_t = emulated_vector; - - NBL_CONSTEXPR_STATIC this_t create(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - - return output; - } - - template - NBL_CONSTEXPR_STATIC this_t create(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, component_t::create(other[i])); - - return output; - } - - NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i)); - - return output; - } - NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); - - return output; - } - NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); - - return output; - } DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(float16_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) @@ -408,17 +382,15 @@ struct emulated_vector, CRTP> : DEFINE_OPERATORS_FOR_TYPE(int16_t) DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - - NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC - { - component_t sum = component_t::create(0); - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); - - return sum; - } }; +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR #undef DEFINE_OPERATORS_FOR_TYPE template From e7a0ef4b3bf2f982ee922164bfddc5a263dbd0ae Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 28 Dec 2025 18:51:13 +0100 Subject: [PATCH 327/472] add IES texture eval overload, remove some old code, update examples_tests submodule --- examples_tests | 2 +- .../builtin/glsl/material_compiler/common.glsl | 3 ++- include/nbl/builtin/hlsl/ies/texture.hlsl | 10 +++++++--- src/nbl/CMakeLists.txt | 17 ----------------- 4 files changed, 10 insertions(+), 22 deletions(-) diff --git a/examples_tests b/examples_tests index b7b796189a..59a434cc64 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b7b796189a70270a681c35e3e5c1de7517533ed6 +Subproject commit 59a434cc64179b223663a6ca6543a4e85a9f58e1 diff --git a/include/nbl/builtin/glsl/material_compiler/common.glsl b/include/nbl/builtin/glsl/material_compiler/common.glsl index f6b7d97c46..c0a52a4f0d 100644 --- a/include/nbl/builtin/glsl/material_compiler/common.glsl +++ b/include/nbl/builtin/glsl/material_compiler/common.glsl @@ -215,6 +215,7 @@ bool nbl_glsl_MC_op_isDelta(in uint op) #ifdef TEX_PREFETCH_STREAM #include #endif +// TODO: once rewritten to HLSL, shall use new API #include // OptiX likes this one better @@ -1499,4 +1500,4 @@ nbl_glsl_MC_quot_pdf_aov_t nbl_glsl_MC_runGenerateAndRemainderStream( } #endif //GEN_CHOICE_STREAM -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/ies/texture.hlsl b/include/nbl/builtin/hlsl/ies/texture.hlsl index 7f02290506..4ea04755df 100644 --- a/include/nbl/builtin/hlsl/ies/texture.hlsl +++ b/include/nbl/builtin/hlsl/ies/texture.hlsl @@ -64,12 +64,10 @@ struct Texture return retval; } - static inline float32_t eval(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(SInfo) info, NBL_CONST_REF_ARG(uint32_t2) position) + static inline float32_t eval(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(SInfo) info, NBL_CONST_REF_ARG(float32_t2) uv) { // We don't currently support generating IES images that exploit symmetries or reduced domains, all are full octahederal mappings of a sphere. // If we did, we'd rely on MIRROR and CLAMP samplers to do some of the work for us while handling the discontinuity due to corner sampling. - - const float32_t2 uv = float32_t2(position) * info.inv; const float32_t3 dir = octahedral_t::uvToDir(uv); const polar_t polar = polar_t::createFromCartesian(dir); @@ -88,6 +86,12 @@ struct Texture return blendV; } + + static inline float32_t eval(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(SInfo) info, NBL_CONST_REF_ARG(uint32_t2) position) + { + const float32_t2 uv = float32_t2(position) * info.inv; + return eval(accessor, info, uv); + } }; } diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 53cd256205..8603d8935f 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -881,20 +881,3 @@ source_group(TREE "${NBL_ROOT_PATH}" PREFIX "Source Files" FILES ${NABLA_SOURCE_FILES} ) - -# we want HLSL intelisense with all fancy features -# NOTE: don't use HLSL Tools component but map .hlsl -# file ext to C++ intellisense in VS settings - -# https://marketplace.visualstudio.com/items?itemName=TimGJones.HLSLToolsforVisualStudio -# intellisense doesn't work in VS2026 with the ext even tho I seem to have correct config, -# there is syntax highlighting however it cannot resolve any #include file and -# in VS2022 this ext literally breaks the IDE making it unresponsive. -# One could not use it at all and rely on C++ intellisense for .hlsl files, -# but then I found I must right click in a .hlsl file -> Rescan -> Rescan File -# to resolve include files, "Rescan solution" doesn't work -target_sources(Nabla PRIVATE "${NBL_ROOT_PATH}/include/shadertoolsconfig.json") -source_group(TREE "${NBL_ROOT_PATH}" - PREFIX "HLSL Tools" - FILES "${NBL_ROOT_PATH}/include/shadertoolsconfig.json" -) \ No newline at end of file From 3e5f8d039d46e302f51029a291c69d1a21e2395d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 28 Dec 2025 21:12:08 +0100 Subject: [PATCH 328/472] awful nsc tooling bug I didn't notice while merging https://github.com/Devsh-Graphics-Programming/Nabla/pull/944 - upgrade builtins API to support config-genex, fix .spv custom command it was building all configs per single config (and embedding all 3 into single archive) --- cmake/common.cmake | 71 ++++++++++++++++++------------------- src/nbl/builtin/utils.cmake | 6 ++-- 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index d1b9c5eb5d..10741c9a94 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1485,42 +1485,41 @@ namespace @IMPL_NAMESPACE@ { file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") # generate keys and commands for compiling shaders - foreach(BUILD_CONFIGURATION ${CMAKE_CONFIGURATION_TYPES}) - set(FINAL_KEY_REL_PATH "${BUILD_CONFIGURATION}/${FINAL_KEY}") - set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") - - set(NBL_NSC_COMPILE_COMMAND - "$" - -Fc "${TARGET_OUTPUT}" - ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} - "${CONFIG_FILE}" - ) - - add_custom_command(OUTPUT "${TARGET_OUTPUT}" - COMMAND ${NBL_NSC_COMPILE_COMMAND} - DEPENDS ${DEPENDS_ON} - COMMENT "Creating \"${TARGET_OUTPUT}\"" - VERBATIM - COMMAND_EXPAND_LISTS - ) - - set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}") - target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) - - set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES - HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None - ) - - set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES - NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" - NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}" - NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" - NBL_SPIRV_ACCESS_KEY "${FINAL_KEY_REL_PATH}" - ) - - set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") - endforeach() + set(FINAL_KEY_REL_PATH "$/${FINAL_KEY}") + set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") + + set(NBL_NSC_COMPILE_COMMAND + "$" + -Fc "${TARGET_OUTPUT}" + ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} + "${CONFIG_FILE}" + ) + + add_custom_command(OUTPUT "${TARGET_OUTPUT}" + COMMAND ${NBL_NSC_COMPILE_COMMAND} + DEPENDS ${DEPENDS_ON} + COMMENT "Creating \"${TARGET_OUTPUT}\"" + VERBATIM + COMMAND_EXPAND_LISTS + ) + set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES GENERATED TRUE) + + set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}") + target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) + + set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES + HEADER_FILE_ONLY ON + VS_TOOL_OVERRIDE None + ) + + set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES + NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" + NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}" + NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" + NBL_SPIRV_ACCESS_KEY "${FINAL_KEY_REL_PATH}" + ) + + set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") return() endif() diff --git a/src/nbl/builtin/utils.cmake b/src/nbl/builtin/utils.cmake index d791cd3aa4..26499dfc76 100644 --- a/src/nbl/builtin/utils.cmake +++ b/src/nbl/builtin/utils.cmake @@ -119,10 +119,10 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH list(APPEND NBL_DEPENDENCY_FILES "${NBL_BUILTIN_HEADER_GEN_PY}") list(APPEND NBL_DEPENDENCY_FILES "${NBL_BUILTIN_DATA_GEN_PY}") - set(NBL_RESOURCES_LIST_FILE "${_OUTPUT_SOURCE_DIRECTORY_}/resources.txt") + set(NBL_RESOURCES_LIST_FILE "${_OUTPUT_SOURCE_DIRECTORY_}/resources-$.txt") string(REPLACE ";" "\n" RESOURCES_ARGS "${_LBR_${_BUNDLE_NAME_}_}") - file(WRITE "${NBL_RESOURCES_LIST_FILE}" "${RESOURCES_ARGS}") + file(GENERATE OUTPUT "${NBL_RESOURCES_LIST_FILE}" CONTENT "${RESOURCES_ARGS}") set(NBL_BUILTIN_RESOURCES_H "${_OUTPUT_HEADER_DIRECTORY_}/${NBL_BS_HEADER_FILENAME}") set(NBL_BUILTIN_RESOURCE_DATA_CPP "${_OUTPUT_SOURCE_DIRECTORY_}/${NBL_BS_DATA_SOURCE_FILENAME}") @@ -247,4 +247,4 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH if(MSVC AND NBL_SANITIZE_ADDRESS) set_property(TARGET ${_TARGET_NAME_} PROPERTY COMPILE_OPTIONS /fsanitize=address) endif() -endfunction() \ No newline at end of file +endfunction() From f45caecd5fae5e9a22bb196fc81674c4901f9f4d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 28 Dec 2025 21:12:08 +0100 Subject: [PATCH 329/472] awful nsc tooling bug I didn't notice while merging https://github.com/Devsh-Graphics-Programming/Nabla/pull/944 - upgrade builtins API to support config-genex, fix .spv custom command it was building all configs per single config (and embedding all 3 into single archive) --- cmake/common.cmake | 71 ++++++++++++++++++------------------- src/nbl/builtin/utils.cmake | 6 ++-- 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 16ea1aee06..5e33855df9 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1484,42 +1484,41 @@ namespace @IMPL_NAMESPACE@ { file(WRITE "${CONFIG_FILE}" "${CONFIG_CONTENT}") # generate keys and commands for compiling shaders - foreach(BUILD_CONFIGURATION ${CMAKE_CONFIGURATION_TYPES}) - set(FINAL_KEY_REL_PATH "${BUILD_CONFIGURATION}/${FINAL_KEY}") - set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") - - set(NBL_NSC_COMPILE_COMMAND - "$" - -Fc "${TARGET_OUTPUT}" - ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} - "${CONFIG_FILE}" - ) - - add_custom_command(OUTPUT "${TARGET_OUTPUT}" - COMMAND ${NBL_NSC_COMPILE_COMMAND} - DEPENDS ${DEPENDS_ON} - COMMENT "Creating \"${TARGET_OUTPUT}\"" - VERBATIM - COMMAND_EXPAND_LISTS - ) - - set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}") - target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) - - set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES - HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None - ) - - set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES - NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" - NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}" - NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" - NBL_SPIRV_ACCESS_KEY "${FINAL_KEY_REL_PATH}" - ) - - set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") - endforeach() + set(FINAL_KEY_REL_PATH "$/${FINAL_KEY}") + set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") + + set(NBL_NSC_COMPILE_COMMAND + "$" + -Fc "${TARGET_OUTPUT}" + ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} + "${CONFIG_FILE}" + ) + + add_custom_command(OUTPUT "${TARGET_OUTPUT}" + COMMAND ${NBL_NSC_COMPILE_COMMAND} + DEPENDS ${DEPENDS_ON} + COMMENT "Creating \"${TARGET_OUTPUT}\"" + VERBATIM + COMMAND_EXPAND_LISTS + ) + set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES GENERATED TRUE) + + set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}") + target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) + + set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES + HEADER_FILE_ONLY ON + VS_TOOL_OVERRIDE None + ) + + set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES + NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" + NBL_SPIRV_PERMUTATION_CONFIG "${CONFIG_FILE}" + NBL_SPIRV_BINARY_DIR "${IMPL_BINARY_DIR}" + NBL_SPIRV_ACCESS_KEY "${FINAL_KEY_REL_PATH}" + ) + + set_property(TARGET ${IMPL_TARGET} APPEND PROPERTY NBL_SPIRV_OUTPUTS "${TARGET_OUTPUT}") return() endif() diff --git a/src/nbl/builtin/utils.cmake b/src/nbl/builtin/utils.cmake index d791cd3aa4..26499dfc76 100644 --- a/src/nbl/builtin/utils.cmake +++ b/src/nbl/builtin/utils.cmake @@ -119,10 +119,10 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH list(APPEND NBL_DEPENDENCY_FILES "${NBL_BUILTIN_HEADER_GEN_PY}") list(APPEND NBL_DEPENDENCY_FILES "${NBL_BUILTIN_DATA_GEN_PY}") - set(NBL_RESOURCES_LIST_FILE "${_OUTPUT_SOURCE_DIRECTORY_}/resources.txt") + set(NBL_RESOURCES_LIST_FILE "${_OUTPUT_SOURCE_DIRECTORY_}/resources-$.txt") string(REPLACE ";" "\n" RESOURCES_ARGS "${_LBR_${_BUNDLE_NAME_}_}") - file(WRITE "${NBL_RESOURCES_LIST_FILE}" "${RESOURCES_ARGS}") + file(GENERATE OUTPUT "${NBL_RESOURCES_LIST_FILE}" CONTENT "${RESOURCES_ARGS}") set(NBL_BUILTIN_RESOURCES_H "${_OUTPUT_HEADER_DIRECTORY_}/${NBL_BS_HEADER_FILENAME}") set(NBL_BUILTIN_RESOURCE_DATA_CPP "${_OUTPUT_SOURCE_DIRECTORY_}/${NBL_BS_DATA_SOURCE_FILENAME}") @@ -247,4 +247,4 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH if(MSVC AND NBL_SANITIZE_ADDRESS) set_property(TARGET ${_TARGET_NAME_} PROPERTY COMPILE_OPTIONS /fsanitize=address) endif() -endfunction() \ No newline at end of file +endfunction() From f95f163d37f26fe8a07b06a33e901bf7d9b9fb83 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 28 Dec 2025 22:37:07 +0100 Subject: [PATCH 330/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 59a434cc64..81449834f4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 59a434cc64179b223663a6ca6543a4e85a9f58e1 +Subproject commit 81449834f4efd4fe649f7dc706ab202ca92c45f3 From 575136ee1546ea5849a97af286ff3b7f419c98e8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 28 Dec 2025 22:40:06 +0100 Subject: [PATCH 331/472] builtin sources & headers per configuration --- src/nbl/builtin/utils.cmake | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/nbl/builtin/utils.cmake b/src/nbl/builtin/utils.cmake index 26499dfc76..17fce1e8a6 100644 --- a/src/nbl/builtin/utils.cmake +++ b/src/nbl/builtin/utils.cmake @@ -82,10 +82,18 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH string(MAKE_C_IDENTIFIER ${_GUARD_SUFFIX_} _GUARD_SUFFIX_) set(_OUTPUT_INCLUDE_SEARCH_DIRECTORY_ "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_}") - set(_OUTPUT_HEADER_DIRECTORY_ "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_}/${_NAMESPACE_PREFIX_}") + set(_OUTPUT_HEADER_DIRECTORY_BASE "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_}/${_NAMESPACE_PREFIX_}") + set(_OUTPUT_SOURCE_DIRECTORY_BASE "${_OUTPUT_SOURCE_DIRECTORY_}") + set(_OUTPUT_INCLUDE_SEARCH_DIRECTORY_CONFIG "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_}/$") + set(_OUTPUT_HEADER_DIRECTORY_ "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_CONFIG}/${_NAMESPACE_PREFIX_}") + set(_OUTPUT_SOURCE_DIRECTORY_ "${_OUTPUT_SOURCE_DIRECTORY_BASE}/$") + set(_OUTPUT_INCLUDE_DIRECTORIES_ + "${_OUTPUT_HEADER_DIRECTORY_}" + "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_CONFIG}" + ) - file(MAKE_DIRECTORY "${_OUTPUT_HEADER_DIRECTORY_}") - file(MAKE_DIRECTORY "${_OUTPUT_SOURCE_DIRECTORY_}") + file(MAKE_DIRECTORY "${_OUTPUT_HEADER_DIRECTORY_BASE}") + file(MAKE_DIRECTORY "${_OUTPUT_SOURCE_DIRECTORY_BASE}") set(_ITR_ 0) foreach(X IN LISTS _LBR_${_BUNDLE_NAME_}_) # iterate over builtin resources bundle list given bundle name @@ -119,7 +127,7 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH list(APPEND NBL_DEPENDENCY_FILES "${NBL_BUILTIN_HEADER_GEN_PY}") list(APPEND NBL_DEPENDENCY_FILES "${NBL_BUILTIN_DATA_GEN_PY}") - set(NBL_RESOURCES_LIST_FILE "${_OUTPUT_SOURCE_DIRECTORY_}/resources-$.txt") + set(NBL_RESOURCES_LIST_FILE "${_OUTPUT_SOURCE_DIRECTORY_BASE}/resources-$.txt") string(REPLACE ";" "\n" RESOURCES_ARGS "${_LBR_${_BUNDLE_NAME_}_}") file(GENERATE OUTPUT "${NBL_RESOURCES_LIST_FILE}" CONTENT "${RESOURCES_ARGS}") @@ -139,6 +147,7 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH ) add_custom_command(OUTPUT "${NBL_BUILTIN_RESOURCES_H}" "${NBL_BUILTIN_RESOURCE_DATA_CPP}" "${NBL_BUILTIN_DATA_ARCHIVE_H}" "${NBL_BUILTIN_DATA_ARCHIVE_CPP}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${_OUTPUT_HEADER_DIRECTORY_}" "${_OUTPUT_SOURCE_DIRECTORY_}" COMMAND "${_Python3_EXECUTABLE}" "${NBL_BUILTIN_HEADER_GEN_PY}" ${NBL_BUILTIN_RESOURCES_COMMON_ARGS} --outputBuiltinPath "${NBL_BUILTIN_RESOURCES_H}" --outputArchivePath "${NBL_BUILTIN_DATA_ARCHIVE_H}" --archiveBundlePath "${_BUNDLE_ARCHIVE_ABSOLUTE_PATH_}" --guardSuffix "${_GUARD_SUFFIX_}" --isSharedLibrary "${_SHARED_}" COMMAND "${_Python3_EXECUTABLE}" "${NBL_BUILTIN_DATA_GEN_PY}" ${NBL_BUILTIN_RESOURCES_COMMON_ARGS} --outputBuiltinPath "${NBL_BUILTIN_RESOURCE_DATA_CPP}" --outputArchivePath "${NBL_BUILTIN_DATA_ARCHIVE_CPP}" --bundleAbsoluteEntryPath "${_BUNDLE_SEARCH_DIRECTORY_}/${_BUNDLE_ARCHIVE_ABSOLUTE_PATH_}" --correspondingHeaderFile "${NBL_BS_HEADER_FILENAME}" --xxHash256Exe "$<${_NBL_BR_RUNTIME_HASH_}:$>" COMMENT "Generating \"${_TARGET_NAME_}\"'s sources & headers" @@ -204,8 +213,8 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH endif() target_include_directories(${_TARGET_NAME_} PUBLIC + ${_OUTPUT_INCLUDE_DIRECTORIES_} "${_NABLA_INCLUDE_DIRECTORIES_}" - "${_OUTPUT_HEADER_DIRECTORY_}" ) set_target_properties(${_TARGET_NAME_} PROPERTIES CXX_STANDARD 20) @@ -242,7 +251,7 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH _ADD_PROPERTY_(BUILTIN_RESOURCES_HEADER_DIRECTORY _OUTPUT_HEADER_DIRECTORY_) _ADD_PROPERTY_(BUILTIN_RESOURCES_SOURCE_DIRECTORY _OUTPUT_SOURCE_DIRECTORY_) _ADD_PROPERTY_(BUILTIN_RESOURCES_HEADERS NBL_BUILTIN_RESOURCES_HEADERS) - _ADD_PROPERTY_(BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY _OUTPUT_INCLUDE_SEARCH_DIRECTORY_) + _ADD_PROPERTY_(BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY _OUTPUT_HEADER_DIRECTORY_) if(MSVC AND NBL_SANITIZE_ADDRESS) set_property(TARGET ${_TARGET_NAME_} PROPERTY COMPILE_OPTIONS /fsanitize=address) From 9d993304cdee546d1b3fc021ba2a19821c103b50 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 28 Dec 2025 22:40:06 +0100 Subject: [PATCH 332/472] builtin sources & headers per configuration --- src/nbl/builtin/utils.cmake | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/nbl/builtin/utils.cmake b/src/nbl/builtin/utils.cmake index 26499dfc76..17fce1e8a6 100644 --- a/src/nbl/builtin/utils.cmake +++ b/src/nbl/builtin/utils.cmake @@ -82,10 +82,18 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH string(MAKE_C_IDENTIFIER ${_GUARD_SUFFIX_} _GUARD_SUFFIX_) set(_OUTPUT_INCLUDE_SEARCH_DIRECTORY_ "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_}") - set(_OUTPUT_HEADER_DIRECTORY_ "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_}/${_NAMESPACE_PREFIX_}") + set(_OUTPUT_HEADER_DIRECTORY_BASE "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_}/${_NAMESPACE_PREFIX_}") + set(_OUTPUT_SOURCE_DIRECTORY_BASE "${_OUTPUT_SOURCE_DIRECTORY_}") + set(_OUTPUT_INCLUDE_SEARCH_DIRECTORY_CONFIG "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_}/$") + set(_OUTPUT_HEADER_DIRECTORY_ "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_CONFIG}/${_NAMESPACE_PREFIX_}") + set(_OUTPUT_SOURCE_DIRECTORY_ "${_OUTPUT_SOURCE_DIRECTORY_BASE}/$") + set(_OUTPUT_INCLUDE_DIRECTORIES_ + "${_OUTPUT_HEADER_DIRECTORY_}" + "${_OUTPUT_INCLUDE_SEARCH_DIRECTORY_CONFIG}" + ) - file(MAKE_DIRECTORY "${_OUTPUT_HEADER_DIRECTORY_}") - file(MAKE_DIRECTORY "${_OUTPUT_SOURCE_DIRECTORY_}") + file(MAKE_DIRECTORY "${_OUTPUT_HEADER_DIRECTORY_BASE}") + file(MAKE_DIRECTORY "${_OUTPUT_SOURCE_DIRECTORY_BASE}") set(_ITR_ 0) foreach(X IN LISTS _LBR_${_BUNDLE_NAME_}_) # iterate over builtin resources bundle list given bundle name @@ -119,7 +127,7 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH list(APPEND NBL_DEPENDENCY_FILES "${NBL_BUILTIN_HEADER_GEN_PY}") list(APPEND NBL_DEPENDENCY_FILES "${NBL_BUILTIN_DATA_GEN_PY}") - set(NBL_RESOURCES_LIST_FILE "${_OUTPUT_SOURCE_DIRECTORY_}/resources-$.txt") + set(NBL_RESOURCES_LIST_FILE "${_OUTPUT_SOURCE_DIRECTORY_BASE}/resources-$.txt") string(REPLACE ";" "\n" RESOURCES_ARGS "${_LBR_${_BUNDLE_NAME_}_}") file(GENERATE OUTPUT "${NBL_RESOURCES_LIST_FILE}" CONTENT "${RESOURCES_ARGS}") @@ -139,6 +147,7 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH ) add_custom_command(OUTPUT "${NBL_BUILTIN_RESOURCES_H}" "${NBL_BUILTIN_RESOURCE_DATA_CPP}" "${NBL_BUILTIN_DATA_ARCHIVE_H}" "${NBL_BUILTIN_DATA_ARCHIVE_CPP}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${_OUTPUT_HEADER_DIRECTORY_}" "${_OUTPUT_SOURCE_DIRECTORY_}" COMMAND "${_Python3_EXECUTABLE}" "${NBL_BUILTIN_HEADER_GEN_PY}" ${NBL_BUILTIN_RESOURCES_COMMON_ARGS} --outputBuiltinPath "${NBL_BUILTIN_RESOURCES_H}" --outputArchivePath "${NBL_BUILTIN_DATA_ARCHIVE_H}" --archiveBundlePath "${_BUNDLE_ARCHIVE_ABSOLUTE_PATH_}" --guardSuffix "${_GUARD_SUFFIX_}" --isSharedLibrary "${_SHARED_}" COMMAND "${_Python3_EXECUTABLE}" "${NBL_BUILTIN_DATA_GEN_PY}" ${NBL_BUILTIN_RESOURCES_COMMON_ARGS} --outputBuiltinPath "${NBL_BUILTIN_RESOURCE_DATA_CPP}" --outputArchivePath "${NBL_BUILTIN_DATA_ARCHIVE_CPP}" --bundleAbsoluteEntryPath "${_BUNDLE_SEARCH_DIRECTORY_}/${_BUNDLE_ARCHIVE_ABSOLUTE_PATH_}" --correspondingHeaderFile "${NBL_BS_HEADER_FILENAME}" --xxHash256Exe "$<${_NBL_BR_RUNTIME_HASH_}:$>" COMMENT "Generating \"${_TARGET_NAME_}\"'s sources & headers" @@ -204,8 +213,8 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH endif() target_include_directories(${_TARGET_NAME_} PUBLIC + ${_OUTPUT_INCLUDE_DIRECTORIES_} "${_NABLA_INCLUDE_DIRECTORIES_}" - "${_OUTPUT_HEADER_DIRECTORY_}" ) set_target_properties(${_TARGET_NAME_} PROPERTIES CXX_STANDARD 20) @@ -242,7 +251,7 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH _ADD_PROPERTY_(BUILTIN_RESOURCES_HEADER_DIRECTORY _OUTPUT_HEADER_DIRECTORY_) _ADD_PROPERTY_(BUILTIN_RESOURCES_SOURCE_DIRECTORY _OUTPUT_SOURCE_DIRECTORY_) _ADD_PROPERTY_(BUILTIN_RESOURCES_HEADERS NBL_BUILTIN_RESOURCES_HEADERS) - _ADD_PROPERTY_(BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY _OUTPUT_INCLUDE_SEARCH_DIRECTORY_) + _ADD_PROPERTY_(BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY _OUTPUT_HEADER_DIRECTORY_) if(MSVC AND NBL_SANITIZE_ADDRESS) set_property(TARGET ${_TARGET_NAME_} PROPERTY COMPILE_OPTIONS /fsanitize=address) From 2f5de5d91185825a1dc1438c6197ec8128289041 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 29 Dec 2025 10:33:08 +0100 Subject: [PATCH 333/472] wipe installation rules of builtin resource headers, they are not included in headers, not exported and its private interface actually they might be installed but then shall be exported and it requires polishing what content they include as they should hide all info about keys to keep our install single prefix --- cmake/common.cmake | 7 ------- src/nbl/CMakeLists.txt | 8 -------- 2 files changed, 15 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 10741c9a94..4ee0e705ca 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -350,13 +350,6 @@ function(nbl_install_media _FILE) nbl_install_lib_spec("${_FILE}" "") endfunction() -function(nbl_install_builtin_resources _TARGET_) - get_target_property(_BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY_ ${_TARGET_} BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY) - get_target_property(_BUILTIN_RESOURCES_HEADERS_ ${_TARGET_} BUILTIN_RESOURCES_HEADERS) - - nbl_install_headers_spec("${_BUILTIN_RESOURCES_HEADERS_}" "${_BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY_}") -endfunction() - function(NBL_TEST_MODULE_INSTALL_FILE _NBL_FILEPATH_) file(RELATIVE_PATH _NBL_REL_INSTALL_DEST_ "${NBL_ROOT_PATH}" "${_NBL_FILEPATH_}") cmake_path(GET _NBL_REL_INSTALL_DEST_ PARENT_PATH _NBL_REL_INSTALL_DEST_) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 8603d8935f..6a7e144f36 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -785,14 +785,6 @@ add_subdirectory(ext EXCLUDE_FROM_ALL) propagate_changed_variables_to_parent_scope() nbl_install_headers("${NABLA_HEADERS_PUBLIC}") -nbl_install_file_spec("${NBL_ROOT_PATH_BINARY}/include/nbl/builtin/builtinResources.h" "nbl/builtin") - -if(NBL_EMBED_BUILTIN_RESOURCES) - nbl_install_builtin_resources(nblBuiltinResourceData) - nbl_install_builtin_resources(spirvBuiltinResourceData) - nbl_install_builtin_resources(boostBuiltinResourceData) -endif() - set_target_properties(Nabla PROPERTIES DEBUG_POSTFIX _debug) set_target_properties(Nabla PROPERTIES RELWITHDEBINFO_POSTFIX _relwithdebinfo) From 374257c26811cb3adbd8528fe9268ee2a7896c30 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 29 Dec 2025 10:33:08 +0100 Subject: [PATCH 334/472] wipe installation rules of builtin resource headers, they are not included in headers, not exported and its private interface actually they might be installed but then shall be exported and it requires polishing what content they include as they should hide all info about keys to keep our install single prefix --- cmake/common.cmake | 7 ------- src/nbl/CMakeLists.txt | 8 -------- 2 files changed, 15 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 5e33855df9..9836fa7666 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -350,13 +350,6 @@ function(nbl_install_media _FILE) nbl_install_lib_spec("${_FILE}" "") endfunction() -function(nbl_install_builtin_resources _TARGET_) - get_target_property(_BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY_ ${_TARGET_} BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY) - get_target_property(_BUILTIN_RESOURCES_HEADERS_ ${_TARGET_} BUILTIN_RESOURCES_HEADERS) - - nbl_install_headers_spec("${_BUILTIN_RESOURCES_HEADERS_}" "${_BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY_}") -endfunction() - function(NBL_TEST_MODULE_INSTALL_FILE _NBL_FILEPATH_) file(RELATIVE_PATH _NBL_REL_INSTALL_DEST_ "${NBL_ROOT_PATH}" "${_NBL_FILEPATH_}") cmake_path(GET _NBL_REL_INSTALL_DEST_ PARENT_PATH _NBL_REL_INSTALL_DEST_) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 76e046848c..cebe1696ad 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -780,14 +780,6 @@ add_subdirectory(ext EXCLUDE_FROM_ALL) propagate_changed_variables_to_parent_scope() nbl_install_headers("${NABLA_HEADERS_PUBLIC}") -nbl_install_file_spec("${NBL_ROOT_PATH_BINARY}/include/nbl/builtin/builtinResources.h" "nbl/builtin") - -if(NBL_EMBED_BUILTIN_RESOURCES) - nbl_install_builtin_resources(nblBuiltinResourceData) - nbl_install_builtin_resources(spirvBuiltinResourceData) - nbl_install_builtin_resources(boostBuiltinResourceData) -endif() - set_target_properties(Nabla PROPERTIES DEBUG_POSTFIX _debug) set_target_properties(Nabla PROPERTIES RELWITHDEBINFO_POSTFIX _relwithdebinfo) From b2e73a47c73e3c6e9c0e07085e6e6fc5cac9af0c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 29 Dec 2025 17:52:42 +0700 Subject: [PATCH 335/472] Move obb into its own file --- .../asset/utils/CPolygonGeometryManipulator.h | 1 + include/nbl/builtin/hlsl/shapes/aabb.hlsl | 23 ---------- include/nbl/builtin/hlsl/shapes/obb.hlsl | 42 +++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 4 files changed, 44 insertions(+), 23 deletions(-) create mode 100644 include/nbl/builtin/hlsl/shapes/obb.hlsl diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 4f00c9fea1..7b953b4fbd 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -10,6 +10,7 @@ #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/utils/CGeometryManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" +#include "nbl/builtin/hlsl/shapes/obb.hlsl" namespace nbl::asset { diff --git a/include/nbl/builtin/hlsl/shapes/aabb.hlsl b/include/nbl/builtin/hlsl/shapes/aabb.hlsl index 1d5b772f63..07219c6687 100644 --- a/include/nbl/builtin/hlsl/shapes/aabb.hlsl +++ b/include/nbl/builtin/hlsl/shapes/aabb.hlsl @@ -60,29 +60,6 @@ struct AABB point_t maxVx; }; -template -struct OBB -{ - using scalar_t = Scalar; - using point_t = vector; - - static OBB createAxisAligned(point_t mid, point_t len) - { - OBB ret; - ret.mid = mid; - ret.ext = len * 0.5f; - for (auto dim_i = 0; dim_i < D; dim_i++) - { - ret.axes[dim_i] = point_t(0); - ret.axes[dim_i][dim_i] = 1; - } - return ret; - } - - point_t mid; - std::array axes; - point_t ext; -}; namespace util { diff --git a/include/nbl/builtin/hlsl/shapes/obb.hlsl b/include/nbl/builtin/hlsl/shapes/obb.hlsl new file mode 100644 index 0000000000..45873cbc7b --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/obb.hlsl @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_SHAPES_OBB_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_OBB_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ + +template +struct OBB +{ + using scalar_t = Scalar; + using point_t = vector; + + static OBB createAxisAligned(point_t mid, point_t len) + { + OBB ret; + ret.mid = mid; + ret.ext = len * 0.5f; + for (auto dim_i = 0; dim_i < D; dim_i++) + { + ret.axes[dim_i] = point_t(0); + ret.axes[dim_i][dim_i] = 1; + } + return ret; + } + + point_t mid; + std::array axes; + point_t ext; +}; + +} +} +} + +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 085ed3c923..d55362ef39 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -257,6 +257,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_rectangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/obb.hlsl") #sampling LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/basic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/linear.hlsl") From afb45589d183639a37b121c45f4bda541ec2a8e7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 29 Dec 2025 17:54:40 +0700 Subject: [PATCH 336/472] Recover missing COBBGenerator.h file --- include/nbl/asset/utils/COBBGenerator.h | 25 + src/nbl/asset/utils/COBBGenerator.cpp | 493 ++++++++++++++++++ .../utils/CPolygonGeometryManipulator.cpp | 2 +- 3 files changed, 519 insertions(+), 1 deletion(-) create mode 100644 include/nbl/asset/utils/COBBGenerator.h create mode 100644 src/nbl/asset/utils/COBBGenerator.cpp diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h new file mode 100644 index 0000000000..2b0d408342 --- /dev/null +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -0,0 +1,25 @@ + +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_ASSET_C_OBB_GENERATOR_H_INCLUDED_ +#define _NBL_ASSET_C_OBB_GENERATOR_H_INCLUDED_ + +#include "nbl/asset/utils/CPolygonGeometryManipulator.h" +#include "nbl/builtin/hlsl/shapes/obb.hlsl" + +namespace nbl::asset +{ + class COBBGenerator + { + public: + + using VertexCollection = CPolygonGeometryManipulator::VertexCollection; + + static hlsl::shapes::OBB<> compute(const VertexCollection& vertices); + + }; +} + +#endif diff --git a/src/nbl/asset/utils/COBBGenerator.cpp b/src/nbl/asset/utils/COBBGenerator.cpp new file mode 100644 index 0000000000..98b3707ab8 --- /dev/null +++ b/src/nbl/asset/utils/COBBGenerator.cpp @@ -0,0 +1,493 @@ +#include "nbl/asset/utils/COBBGenerator.h" + +namespace nbl::asset +{ + +namespace +{ + +template +struct Extremals +{ + std::array values; + + T* minPtr() + { + return values.data(); + } + + const T* minPtr() const + { + return values.data(); + } + + T* maxPtr() + { + return values.data() + CountV; + } + + const T* maxPtr() const + { + return values.data() + CountV; + } + +}; +} + +hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) +{ + constexpr size_t SAMPLE_DIR_COUNT = 7; // Number of sample directions + constexpr size_t SAMPLE_COUNT = SAMPLE_DIR_COUNT * 2; + + if (vertices.size <= 0) + { + return hlsl::shapes::OBB<>::createAxisAligned({}, {}); + } + + static auto getQualityValue = [](hlsl::float32_t3 len) -> hlsl::float32_t + { + return len.x * len.y + len.x * len.z + len.y * len.z; //half box area + }; + + using ExtremalVertices = Extremals; + using ExtremalProjections = Extremals; + using Axes = std::array; + using Edges = std::array; + + struct ExtremalSamples + { + ExtremalVertices vertices; + ExtremalProjections projections; + }; + + struct LargeBaseTriangle + { + hlsl::float32_t3 normal = {}; + Axes vertices = {}; + Edges edges = {}; + enum Flag + { + NORMAL, + SECOND_POINT_CLOSE, + THIRD_POINT_CLOSE + } flag; + }; + + static auto findExtremals_7FixedDirs = [](const VertexCollection& vertices)-> ExtremalSamples + { + ExtremalSamples result; + hlsl::float32_t proj; + + const auto firstVertex = vertices.fetch(0); + + auto* minProjections = result.projections.minPtr(); + auto* maxProjections = result.projections.maxPtr(); + + auto* minVertices = result.vertices.minPtr(); + auto* maxVertices = result.vertices.maxPtr(); + + // Slab 0: dir {1, 0, 0} + proj = firstVertex.x; + minProjections[0] = minProjections[0] = proj; + minVertices[0] = firstVertex; maxVertices[0] = firstVertex; + // Slab 1: dir {0, 1, 0} + proj = firstVertex.y; + minProjections[1] = maxProjections[1] = proj; + minVertices[1] = firstVertex; maxVertices[1] = firstVertex; + // Slab 2: dir {0, 0, 1} + proj = firstVertex.z; + minProjections[2] = maxProjections[2] = proj; + minVertices[2] = firstVertex; maxVertices[2] = firstVertex; + // Slab 3: dir {1, 1, 1} + proj = firstVertex.x + firstVertex.y + firstVertex.z; + minProjections[3] = maxProjections[3] = proj; + minVertices[3] = firstVertex; maxVertices[3] = firstVertex; + // Slab 4: dir {1, 1, -1} + proj = firstVertex.x + firstVertex.y - firstVertex.z; + minProjections[4] = maxProjections[4] = proj; + minVertices[4] = firstVertex; maxVertices[4] = firstVertex; + // Slab 5: dir {1, -1, 1} + proj = firstVertex.x - firstVertex.y + firstVertex.z; + minProjections[5] = maxProjections[5] = proj; + minVertices[5] = firstVertex; maxVertices[5] = firstVertex; + // Slab 6: dir {1, -1, -1} + proj = firstVertex.x - firstVertex.y - firstVertex.z; + minProjections[6] = maxProjections[6] = proj; + minVertices[6] = firstVertex; maxVertices[6] = firstVertex; + + for (size_t vertex_i = 1; vertex_i < vertices.size; vertex_i++) + { + const auto vertex = vertices.fetch(vertex_i); + // Slab 0: dir {1, 0, 0} + proj = vertices.fetch(vertex_i).x; + if (proj < minProjections[0]) { minProjections[0] = proj; minVertices[0] = vertices.fetch(vertex_i); } + if (proj > maxProjections[0]) { maxProjections[0] = proj; maxVertices[0] = vertices.fetch(vertex_i); } + // Slab 1: dir {0, 1, 0} + proj = vertices.fetch(vertex_i).y; + if (proj < minProjections[1]) { minProjections[1] = proj; minVertices[1] = vertices.fetch(vertex_i); } + if (proj > maxProjections[1]) { maxProjections[1] = proj; maxVertices[1] = vertices.fetch(vertex_i); } + // Slab 2: dir {0, 0, 1} + proj = vertices.fetch(vertex_i).z; + if (proj < minProjections[2]) { minProjections[2] = proj; minVertices[2] = vertices.fetch(vertex_i); } + if (proj > maxProjections[2]) { maxProjections[2] = proj; maxVertices[2] = vertices.fetch(vertex_i); } + // Slab 3: dir {1, 1, 1} + proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; + if (proj < minProjections[3]) { minProjections[3] = proj; minVertices[3] = vertices.fetch(vertex_i); } + if (proj > maxProjections[3]) { maxProjections[3] = proj; maxVertices[3] = vertices.fetch(vertex_i); } + // Slab 4: dir {1, 1, -1} + proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; + if (proj < minProjections[4]) { minProjections[4] = proj; minVertices[4] = vertices.fetch(vertex_i); } + if (proj > maxProjections[4]) { maxProjections[4] = proj; maxVertices[4] = vertices.fetch(vertex_i); } + // Slab 5: dir {1, -1, 1} + proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; + if (proj < minProjections[5]) { minProjections[5] = proj; minVertices[5] = vertices.fetch(vertex_i); } + if (proj > maxProjections[5]) { maxProjections[5] = proj; maxVertices[5] = vertices.fetch(vertex_i); } + // Slab 6: dir {1, -1, -1} + proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; + if (proj < minProjections[6]) { minProjections[6] = proj; minVertices[6] = vertices.fetch(vertex_i); } + if (proj > maxProjections[6]) { maxProjections[6] = proj; maxVertices[6] = vertices.fetch(vertex_i); } + } + + return result; + }; + + + static auto findFurthestPointPair = [](const ExtremalVertices& extremalVertices) -> std::pair + { + int indexFurthestPair = 0; + auto maxSqDist = hlsl::dot(extremalVertices.maxPtr()[0], extremalVertices.minPtr()[0]); + for (int k = 1; k < SAMPLE_DIR_COUNT; k++) + { + const auto sqDist = hlsl::dot(extremalVertices.maxPtr()[k], extremalVertices.minPtr()[k]); + if (sqDist > maxSqDist) { maxSqDist = sqDist; indexFurthestPair = k; } + } + return { + extremalVertices.minPtr()[indexFurthestPair], + extremalVertices.maxPtr()[indexFurthestPair] + }; + }; + + static auto sqDistPointInfiniteEdge = [](const hlsl::float32_t3& q, const hlsl::float32_t3& p0, const hlsl::float32_t3& v) -> hlsl::float32_t + { + const auto u0 = q - p0; + const auto t = dot(v, u0); + const auto sqLen_v = hlsl::dot(v, v); + return hlsl::dot(u0, u0) - (t * t) / sqLen_v; + }; + + static auto findFurthestPointFromInfiniteEdge = [](const hlsl::float32_t3& p0, const hlsl::float32_t3& e0, const VertexCollection& vertices) + { + auto maxSqDist = sqDistPointInfiniteEdge(vertices[0], p0, e0); + int maxIndex = 0; + for (size_t i = 1; i < vertices.size; i++) + { + const auto sqDist = sqDistPointInfiniteEdge(vertices[i], p0, e0); + if (sqDist > maxSqDist) + { maxSqDist = sqDist; + maxIndex = i; + } + } + + struct Result + { + hlsl::float32_t3 point; + hlsl::float32_t sqDist; + }; + return Result{ + vertices[maxIndex], + maxSqDist + }; + }; + + static auto findExtremalProjs_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) + { + const auto firstProj = hlsl::dot(vertices[0], normal); + auto tMinProj = firstProj, tMaxProj = firstProj; + + for (int i = 1; i < vertices.size; i++) + { + const auto proj = hlsl::dot(vertices[i], normal); + if (proj < tMinProj) { tMinProj = proj; } + if (proj > tMaxProj) { tMaxProj = proj; } + } + + struct Result + { + hlsl::float32_t minProj; + hlsl::float32_t maxProj; + }; + return Result{ tMinProj, tMaxProj }; + }; + + static auto findExtremalPoints_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) + { + const auto firstProj = dot(vertices[0], normal); + + auto tMinProj = firstProj, tMaxProj = firstProj; + auto tMinVert = vertices[0], tMaxVert = vertices[0]; + + for (int i = 1; i < vertices.size; i++) + { + const auto proj = hlsl::dot(vertices[i], normal); + if (proj < tMinProj) { tMinProj = proj; tMinVert = vertices[i]; } + if (proj > tMaxProj) { tMaxProj = proj; tMaxVert = vertices[i]; } + } + + struct Result + { + hlsl::float32_t minProj; + hlsl::float32_t maxProj; + hlsl::float32_t3 minVert; + hlsl::float32_t3 maxVert; + }; + return Result{ tMinProj, tMaxProj, tMinVert, tMaxVert }; + }; + + static auto findUpperLowerTetraPoints = []( + const hlsl::float32_t3& n, + const VertexCollection& vertices, + const hlsl::float32_t3& p0) + { + const auto eps = 0.000001f; + const auto extremalPoints = findExtremalPoints_OneDir(n, vertices); + const auto triProj = hlsl::dot(p0, n); + + const auto maxVert = extremalPoints.maxProj - eps > triProj ? std::optional(extremalPoints.maxVert) : std::nullopt; + const auto minVert = extremalPoints.minProj + eps < triProj ? std::optional(extremalPoints.minVert) : std::nullopt; + + struct Result + { + std::optional minVert; + std::optional maxVert; + }; + return Result{ + minVert, + maxVert + }; + }; + + static auto findBestObbAxesFromTriangleNormalAndEdgeVectors = []( + const VertexCollection& vertices, + const hlsl::float32_t3 normal, + const std::array edges, + Axes& bestAxes, + hlsl::float32_t& bestVal) + { + hlsl::float32_t3 dmax, dmin, dlen; + + // The operands are assumed to be orthogonal and unit normals + const auto yExtremeProjs = findExtremalProjs_OneDir(normal, vertices); + dmin.y = yExtremeProjs.minProj; + dmax.y = yExtremeProjs.maxProj; + dlen.y = dmax.y - dmin.y; + + for (const auto& edge : edges) + { + const auto binormal = hlsl::cross(edge, normal); + + const auto xExtremeProjs = findExtremalProjs_OneDir(edge, vertices); + dmin.x = xExtremeProjs.minProj; + dmax.x = xExtremeProjs.maxProj; + dlen.x = dmax.x - dmin.x; + + const auto zExtremeProjs = findExtremalProjs_OneDir(binormal, vertices); + dmin.z = zExtremeProjs.minProj; + dmax.z = zExtremeProjs.maxProj; + dlen.z = dmax.z - dmin.z; + + const auto quality = getQualityValue(dlen); + if (quality < bestVal) + { + bestVal = quality; + bestAxes = { + edge, + normal, + binormal + }; + } + } + + }; + + + static auto findBaseTriangle = [](const ExtremalVertices& extremalVertices, const VertexCollection& vertices)-> LargeBaseTriangle + { + hlsl::float32_t eps = 0.000001f; + + std::array baseTriangleVertices; + Edges edges; + + // Find the furthest point pair among the selected min and max point pairs + std::tie(baseTriangleVertices[0], baseTriangleVertices[1]) = findFurthestPointPair(extremalVertices); + + // Degenerate case 1: + // If the found furthest points are located very close, return OBB aligned with the initial AABB + if (hlsl::dot(baseTriangleVertices[0], baseTriangleVertices[1]) < eps) + { + return { + .vertices = baseTriangleVertices, + .flag = LargeBaseTriangle::SECOND_POINT_CLOSE + }; + } + + // Compute edge vector of the line segment p0, p1 + edges[0] = hlsl::normalize(baseTriangleVertices[0] - baseTriangleVertices[1]); + + // Find a third point furthest away from line given by p0, e0 to define the large base triangle + const auto furthestPointRes = findFurthestPointFromInfiniteEdge(vertices[0], edges[0], vertices); + + // Degenerate case 2: + // If the third point is located very close to the line, return an OBB aligned with the line + if (furthestPointRes.sqDist < eps) + { + return { + .vertices = baseTriangleVertices, + .edges = edges, + .flag = LargeBaseTriangle::THIRD_POINT_CLOSE + }; + } + + // Compute the two remaining edge vectors and the normal vector of the base triangle + edges[1] = hlsl::normalize(baseTriangleVertices[1] - baseTriangleVertices[2]); + edges[2] = hlsl::normalize(baseTriangleVertices[2] - baseTriangleVertices[0]); + const auto normal = hlsl::normalize(hlsl::cross(edges[1], edges[0])); + + return { + .normal = normal, + .vertices = baseTriangleVertices, + .edges = edges, + .flag = LargeBaseTriangle::NORMAL + }; + }; + + auto findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle = [](const VertexCollection& vertices, + const LargeBaseTriangle& baseTriangle, + Axes& bestAxes, hlsl::float32_t& bestVal) + { + hlsl::float32_t3 f0, f1, f2; // Edge vectors towards minVert; + hlsl::float32_t3 g0, g1, g2; // Edge vectors towards maxVert; + hlsl::float32_t3 n0, n1, n2; // Unit normals of top tetra tris + hlsl::float32_t3 m0, m1, m2; // Unit normals of bottom tetra tris + + // Find furthest points above and below the plane of the base triangle for tetra constructions + // For each found valid point, search for the best OBB axes based on the 3 arising triangles + const auto upperLowerTetraVertices = findUpperLowerTetraPoints(baseTriangle.normal, vertices, baseTriangle.vertices[0]); + if (upperLowerTetraVertices.minVert) + { + const auto minVert = *upperLowerTetraVertices.minVert; + f0 = normalize(minVert - baseTriangle.vertices[0]); + f1 = normalize(minVert - baseTriangle.vertices[1]); + f2 = normalize(minVert - baseTriangle.vertices[2]); + n0 = normalize(cross(f1, baseTriangle.edges[0])); + n1 = normalize(cross(f2, baseTriangle.edges[1])); + n2 = normalize(cross(f0, baseTriangle.edges[2])); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); + } + if (upperLowerTetraVertices.maxVert) + { + const auto maxVert = *upperLowerTetraVertices.minVert; + g0 = normalize(maxVert - baseTriangle.vertices[0]); + g1 = normalize(maxVert - baseTriangle.vertices[1]); + g2 = normalize(maxVert - baseTriangle.vertices[2]); + m0 = normalize(cross(g1, baseTriangle.edges[0])); + m1 = normalize(cross(g2, baseTriangle.edges[1])); + m2 = normalize(cross(g0, baseTriangle.edges[2])); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m0, { baseTriangle.edges[0], g1, g0 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m1, { baseTriangle.edges[1], g2, g1 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m2, { baseTriangle.edges[2], g0, g2 }, bestAxes, bestVal); + } + }; + + static auto buildObbFromAxesAndLocalMinMax = []( + const Axes& axes, + const hlsl::float32_t3& localMin, + const hlsl::float32_t3& localMax) -> hlsl::shapes::OBB<3, hlsl::float32_t> + { + const auto localMid = 0.5f * (localMin + localMax); + auto globalMid = axes[0] * localMid.x; + globalMid += axes[1] * localMid.y; + globalMid += axes[2] * localMid.z; + return { + .mid = globalMid, + .axes = axes, + .ext = 0.5f * (localMax - localMin) + }; + }; + + static auto computeObb = [](const Axes& axes, const VertexCollection& vertices) + { + const auto extremalX = findExtremalProjs_OneDir(axes[0], vertices); + const auto extremalY = findExtremalProjs_OneDir(axes[1], vertices); + const auto extremalZ = findExtremalProjs_OneDir(axes[2], vertices); + const auto localMin = hlsl::float32_t3{ extremalX.minProj, extremalY.minProj, extremalZ.minProj }; + const auto localMax = hlsl::float32_t3{ extremalX.maxProj, extremalY.maxProj, extremalZ.maxProj }; + return buildObbFromAxesAndLocalMinMax(axes, localMin, localMax); + }; + + static auto computeLineAlignedObb = [](const hlsl::float32_t3& u, const VertexCollection& vertices) + { + // Given u, build any orthonormal base u, v, w + + // Make sure r is not equal to u + auto r = u; + if (fabs(u.x) > fabs(u.y) && fabs(u.x) > fabs(u.z)) { r.x = 0; } + else if (fabs(u.y) > fabs(u.z)) { r.y = 0; } + else { r.z = 0; } + + const auto sqLen = hlsl::dot(r, r); + if (sqLen < FLT_EPSILON) { r.x = r.y = r.z = 1; } + + const auto v = normalize(cross(u, r)); + const auto w = normalize(cross(u, v)); + return computeObb({ u, v, w }, vertices); + }; + + const auto extremals = findExtremals_7FixedDirs(vertices); + + const auto* minProj = extremals.projections.minPtr(); + const auto* maxProj = extremals.projections.maxPtr(); + + // Determine which points to use in the iterations below + const auto selectedVertices = [&] + { + if (vertices.size < SAMPLE_COUNT) { return vertices; } + return VertexCollection::fromSpan(extremals.vertices.values); + }(); + + // Compute size of AABB (max and min projections of vertices are already computed as slabs 0-2) + auto alMid = hlsl::float32_t3((minProj[0] + maxProj[0]) * 0.5f, (minProj[1] + maxProj[1]) * 0.5f, (minProj[2] + maxProj[2]) * 0.5f); + auto alLen = hlsl::float32_t3(maxProj[0] - minProj[0], maxProj[1] - minProj[1], maxProj[2] - minProj[2]); + auto alVal = getQualityValue(alLen); + + + const auto baseTriangle = findBaseTriangle(extremals.vertices, vertices); + + if (baseTriangle.flag == LargeBaseTriangle::SECOND_POINT_CLOSE) + return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); + if (baseTriangle.flag == LargeBaseTriangle::THIRD_POINT_CLOSE) + return computeLineAlignedObb(baseTriangle.edges[0], vertices); + + + Axes bestAxes = { + hlsl::float32_t3{1.f, 0.f, 0.f}, + {0.f, 1.f, 0.f}, + {0.f, 0.f, 1.f}, + }; + auto bestVal = alVal; + // Find best OBB axes based on the base triangle + findBestObbAxesFromTriangleNormalAndEdgeVectors(selectedVertices, baseTriangle.normal, baseTriangle.edges, bestAxes, bestVal); + + // Find improved OBB axes based on constructed di-tetrahedral shape raised from base triangle + findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle(selectedVertices, baseTriangle, bestAxes, bestVal); + + const auto obb = computeObb(bestAxes, vertices); + + // Check if the OBB extent is still smaller than the intial AABB + if (getQualityValue(2.f * obb.ext) < alVal) + return obb; + return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); +} + +} diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 52e23461b2..74169eb0d8 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -21,7 +21,7 @@ namespace nbl::asset hlsl::shapes::OBB<> CPolygonGeometryManipulator::calculateOBB(const VertexCollection& vertices) { - return CObbGenerator::calculateOBB(vertices); + return COBBGenerator::compute(vertices); } core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo) From c71762e336026bcdcd9fdf8e936e4b55810ca4e8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 29 Dec 2025 12:30:25 +0100 Subject: [PATCH 337/472] BUILD_INTERFACE for builtin auto-gen include search directories --- src/nbl/CMakeLists.txt | 1 + src/nbl/builtin/utils.cmake | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 6a7e144f36..895488cf45 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -694,6 +694,7 @@ if(NBL_EMBED_BUILTIN_RESOURCES) target_compile_definitions(${NBL_TARGET} PUBLIC "$") # workaround because must use Nabla headers without linking Nabla to itself.. target_include_directories(${NBL_TARGET} PUBLIC "$") target_include_directories(Nabla PRIVATE "${_INTERNAL_BR_OUTPUT_INCLUDE_SEARCH_DIRECTORY_}") + target_include_directories(Nabla INTERFACE "$") endforeach() add_dependencies(Nabla ${NBL_TARGETS}) diff --git a/src/nbl/builtin/utils.cmake b/src/nbl/builtin/utils.cmake index 17fce1e8a6..6465c2ac6d 100644 --- a/src/nbl/builtin/utils.cmake +++ b/src/nbl/builtin/utils.cmake @@ -251,7 +251,7 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH _ADD_PROPERTY_(BUILTIN_RESOURCES_HEADER_DIRECTORY _OUTPUT_HEADER_DIRECTORY_) _ADD_PROPERTY_(BUILTIN_RESOURCES_SOURCE_DIRECTORY _OUTPUT_SOURCE_DIRECTORY_) _ADD_PROPERTY_(BUILTIN_RESOURCES_HEADERS NBL_BUILTIN_RESOURCES_HEADERS) - _ADD_PROPERTY_(BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY _OUTPUT_HEADER_DIRECTORY_) + _ADD_PROPERTY_(BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY _OUTPUT_INCLUDE_SEARCH_DIRECTORY_CONFIG) if(MSVC AND NBL_SANITIZE_ADDRESS) set_property(TARGET ${_TARGET_NAME_} PROPERTY COMPILE_OPTIONS /fsanitize=address) From 6556ad6b6d3d9df961d0563e34a1cdf07f3d1e20 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 29 Dec 2025 12:30:25 +0100 Subject: [PATCH 338/472] BUILD_INTERFACE for builtin auto-gen include search directories --- src/nbl/CMakeLists.txt | 1 + src/nbl/builtin/utils.cmake | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index cebe1696ad..512633536f 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -689,6 +689,7 @@ if(NBL_EMBED_BUILTIN_RESOURCES) target_compile_definitions(${NBL_TARGET} PUBLIC "$") # workaround because must use Nabla headers without linking Nabla to itself.. target_include_directories(${NBL_TARGET} PUBLIC "$") target_include_directories(Nabla PRIVATE "${_INTERNAL_BR_OUTPUT_INCLUDE_SEARCH_DIRECTORY_}") + target_include_directories(Nabla INTERFACE "$") endforeach() add_dependencies(Nabla ${NBL_TARGETS}) diff --git a/src/nbl/builtin/utils.cmake b/src/nbl/builtin/utils.cmake index 17fce1e8a6..6465c2ac6d 100644 --- a/src/nbl/builtin/utils.cmake +++ b/src/nbl/builtin/utils.cmake @@ -251,7 +251,7 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH _ADD_PROPERTY_(BUILTIN_RESOURCES_HEADER_DIRECTORY _OUTPUT_HEADER_DIRECTORY_) _ADD_PROPERTY_(BUILTIN_RESOURCES_SOURCE_DIRECTORY _OUTPUT_SOURCE_DIRECTORY_) _ADD_PROPERTY_(BUILTIN_RESOURCES_HEADERS NBL_BUILTIN_RESOURCES_HEADERS) - _ADD_PROPERTY_(BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY _OUTPUT_HEADER_DIRECTORY_) + _ADD_PROPERTY_(BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY _OUTPUT_INCLUDE_SEARCH_DIRECTORY_CONFIG) if(MSVC AND NBL_SANITIZE_ADDRESS) set_property(TARGET ${_TARGET_NAME_} PROPERTY COMPILE_OPTIONS /fsanitize=address) From d5f70116c1a244410542f81a7222e8d51f539c91 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 29 Dec 2025 18:55:38 +0700 Subject: [PATCH 339/472] Refactor obb calculation and some fixes --- src/nbl/asset/utils/COBBGenerator.cpp | 61 ++++++++++----------------- 1 file changed, 23 insertions(+), 38 deletions(-) diff --git a/src/nbl/asset/utils/COBBGenerator.cpp b/src/nbl/asset/utils/COBBGenerator.cpp index 98b3707ab8..d869a89cc6 100644 --- a/src/nbl/asset/utils/COBBGenerator.cpp +++ b/src/nbl/asset/utils/COBBGenerator.cpp @@ -269,33 +269,25 @@ hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) static auto findBestObbAxesFromTriangleNormalAndEdgeVectors = []( const VertexCollection& vertices, const hlsl::float32_t3 normal, - const std::array edges, + const std::array& edges, Axes& bestAxes, hlsl::float32_t& bestVal) { - hlsl::float32_t3 dmax, dmin, dlen; - // The operands are assumed to be orthogonal and unit normals const auto yExtremeProjs = findExtremalProjs_OneDir(normal, vertices); - dmin.y = yExtremeProjs.minProj; - dmax.y = yExtremeProjs.maxProj; - dlen.y = dmax.y - dmin.y; + const auto yLen = yExtremeProjs.maxProj - yExtremeProjs.minProj; for (const auto& edge : edges) { const auto binormal = hlsl::cross(edge, normal); const auto xExtremeProjs = findExtremalProjs_OneDir(edge, vertices); - dmin.x = xExtremeProjs.minProj; - dmax.x = xExtremeProjs.maxProj; - dlen.x = dmax.x - dmin.x; + const auto xLen = xExtremeProjs.maxProj - xExtremeProjs.minProj; const auto zExtremeProjs = findExtremalProjs_OneDir(binormal, vertices); - dmin.z = zExtremeProjs.minProj; - dmax.z = zExtremeProjs.maxProj; - dlen.z = dmax.z - dmin.z; + const auto zLen = zExtremeProjs.maxProj - zExtremeProjs.minProj; - const auto quality = getQualityValue(dlen); + const auto quality = getQualityValue({xLen, yLen, zLen}); if (quality < bestVal) { bestVal = quality; @@ -312,7 +304,7 @@ hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) static auto findBaseTriangle = [](const ExtremalVertices& extremalVertices, const VertexCollection& vertices)-> LargeBaseTriangle { - hlsl::float32_t eps = 0.000001f; + constexpr hlsl::float32_t eps = 0.000001f; std::array baseTriangleVertices; Edges edges; @@ -364,10 +356,6 @@ hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) const LargeBaseTriangle& baseTriangle, Axes& bestAxes, hlsl::float32_t& bestVal) { - hlsl::float32_t3 f0, f1, f2; // Edge vectors towards minVert; - hlsl::float32_t3 g0, g1, g2; // Edge vectors towards maxVert; - hlsl::float32_t3 n0, n1, n2; // Unit normals of top tetra tris - hlsl::float32_t3 m0, m1, m2; // Unit normals of bottom tetra tris // Find furthest points above and below the plane of the base triangle for tetra constructions // For each found valid point, search for the best OBB axes based on the 3 arising triangles @@ -375,28 +363,28 @@ hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) if (upperLowerTetraVertices.minVert) { const auto minVert = *upperLowerTetraVertices.minVert; - f0 = normalize(minVert - baseTriangle.vertices[0]); - f1 = normalize(minVert - baseTriangle.vertices[1]); - f2 = normalize(minVert - baseTriangle.vertices[2]); - n0 = normalize(cross(f1, baseTriangle.edges[0])); - n1 = normalize(cross(f2, baseTriangle.edges[1])); - n2 = normalize(cross(f0, baseTriangle.edges[2])); + const auto f0 = normalize(minVert - baseTriangle.vertices[0]); + const auto f1 = normalize(minVert - baseTriangle.vertices[1]); + const auto f2 = normalize(minVert - baseTriangle.vertices[2]); + const auto n0 = normalize(cross(f1, baseTriangle.edges[0])); + const auto n1 = normalize(cross(f2, baseTriangle.edges[1])); + const auto n2 = normalize(cross(f0, baseTriangle.edges[2])); findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); } if (upperLowerTetraVertices.maxVert) { - const auto maxVert = *upperLowerTetraVertices.minVert; - g0 = normalize(maxVert - baseTriangle.vertices[0]); - g1 = normalize(maxVert - baseTriangle.vertices[1]); - g2 = normalize(maxVert - baseTriangle.vertices[2]); - m0 = normalize(cross(g1, baseTriangle.edges[0])); - m1 = normalize(cross(g2, baseTriangle.edges[1])); - m2 = normalize(cross(g0, baseTriangle.edges[2])); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m0, { baseTriangle.edges[0], g1, g0 }, bestAxes, bestVal); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m1, { baseTriangle.edges[1], g2, g1 }, bestAxes, bestVal); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, m2, { baseTriangle.edges[2], g0, g2 }, bestAxes, bestVal); + const auto maxVert = *upperLowerTetraVertices.maxVert; + const auto f0 = normalize(maxVert - baseTriangle.vertices[0]); + const auto f1 = normalize(maxVert - baseTriangle.vertices[1]); + const auto f2 = normalize(maxVert - baseTriangle.vertices[2]); + const auto n0 = normalize(cross(f1, baseTriangle.edges[0])); + const auto n1 = normalize(cross(f2, baseTriangle.edges[1])); + const auto n2 = normalize(cross(f0, baseTriangle.edges[2])); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); } }; @@ -406,11 +394,8 @@ hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) const hlsl::float32_t3& localMax) -> hlsl::shapes::OBB<3, hlsl::float32_t> { const auto localMid = 0.5f * (localMin + localMax); - auto globalMid = axes[0] * localMid.x; - globalMid += axes[1] * localMid.y; - globalMid += axes[2] * localMid.z; return { - .mid = globalMid, + .mid = axes[0] * localMid.x + axes[1] * localMid.y + axes[2] * localMid.z, .axes = axes, .ext = 0.5f * (localMax - localMin) }; From 32cbb366ddcc7cb227b18d5bc268fe0c54076f22 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 29 Dec 2025 19:25:56 +0700 Subject: [PATCH 340/472] Update CDrawAABB::computeOBBTransform to return float32_t3x4 --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 24 ++++++++---------------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 509608c2f8..6263378024 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -208,7 +208,7 @@ namespace nbl::ext::debug_draw return transform; } - static hlsl::float32_t4x4 getTransformFromOBB(const hlsl::shapes::OBB<3, float>& aabb); + static hlsl::float32_t3x4 getTransformFromOBB(const hlsl::shapes::OBB<3, float>& aabb); protected: struct ConstructorParams diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index ad04483ba2..a6a1cb1bce 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -367,25 +367,17 @@ bool DrawAABB::renderSingle(const DrawParameters& params, const hlsl::shapes::AA return true; } -hlsl::float32_t4x4 DrawAABB::getTransformFromOBB(const hlsl::shapes::OBB<3, float>& obb) +hlsl::float32_t3x4 DrawAABB::getTransformFromOBB(const hlsl::shapes::OBB<3, float>& obb) { const auto obbScale = obb.ext * 2.0f; - const auto obbMat = hlsl::transpose(float32_t4x4{ - hlsl::float32_t4(obb.axes[0] * obbScale.x, 0), - hlsl::float32_t4(obb.axes[1] * obbScale.y, 0), - hlsl::float32_t4(obb.axes[2] * obbScale.z, 0), - hlsl::float32_t4(obb.mid, 1) - }); - - const auto translateUnitCube = float32_t4x4{ - hlsl::float32_t4(1, 0, 0, -0.5f), - hlsl::float32_t4(0, 1, 0, -0.5f), - hlsl::float32_t4(0, 0, 1, -0.5f), - hlsl::float32_t4(0, 0, 0, 1), + const auto axesScaleX = obb.axes[0] * obbScale.x; + const auto axesScaleY = obb.axes[1] * obbScale.y; + const auto axesScaleZ = obb.axes[2] * obbScale.z; + return float32_t3x4{ + axesScaleX.x, axesScaleY.x, axesScaleZ.x, obb.mid.x - (0.5 * (axesScaleX.x + axesScaleY.x + axesScaleZ.x)), + axesScaleX.y, axesScaleY.y, axesScaleZ.y, obb.mid.y - (0.5 * (axesScaleX.y + axesScaleY.y + axesScaleZ.y)), + axesScaleX.z, axesScaleY.z, axesScaleZ.z, obb.mid.z - (0.5 * (axesScaleX.z + axesScaleY.z + axesScaleZ.z)), }; - - const auto transform = mul(obbMat, translateUnitCube); - return transform; } } From 3d61b44b72d4c408ceaa50311c601215b21c0525 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 29 Dec 2025 15:40:11 +0100 Subject: [PATCH 341/472] pre-merge `examples_tests` submodule pointer update --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c5e28aaeaa..5df217517f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c5e28aaeaae107d8b0baa2b28355f8d445b7cd7a +Subproject commit 5df217517fd5af0964b6d170afb68d5194daf60d From 7137295ca8f643c7cb4ac80c9b56ce2f6b902933 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 29 Dec 2025 19:01:38 +0100 Subject: [PATCH 342/472] fix check-runs creation for examples, take sha depending on context --- .github/workflows/build-nabla.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-nabla.yml b/.github/workflows/build-nabla.yml index 6824449abb..f92ffa7545 100644 --- a/.github/workflows/build-nabla.yml +++ b/.github/workflows/build-nabla.yml @@ -146,11 +146,12 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} result-encoding: string script: | + const headSha = context.payload.pull_request ? context.payload.pull_request.head.sha : context.sha; const response = await github.rest.checks.create({ owner: context.repo.owner, repo: context.repo.repo, name: `Examples (${{ matrix.os }}, ${{ matrix.vendor }}-${{ matrix.tag }}, ${{ matrix.config }})`, - head_sha: context.sha, + head_sha: headSha, status: 'in_progress' }); return response.data.id; From 3344a585e6d0cfd06c6edeec0ba72c714e2a137c Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 29 Dec 2025 23:41:37 +0100 Subject: [PATCH 343/472] tiny adjustments to get a more complex example with Mitsuba Loader to compile --- include/nbl/ext/MitsubaLoader/CElementSampler.h | 2 -- include/nbl/system/declarations.h | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementSampler.h b/include/nbl/ext/MitsubaLoader/CElementSampler.h index 9b9bc2b820..82f586c19e 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSampler.h +++ b/include/nbl/ext/MitsubaLoader/CElementSampler.h @@ -10,8 +10,6 @@ namespace nbl::ext::MitsubaLoader { -class CGlobalMitsubaMetadata; - class CElementSampler : public IElement { public: diff --git a/include/nbl/system/declarations.h b/include/nbl/system/declarations.h index ebc5a890ae..fa3dc2c6da 100644 --- a/include/nbl/system/declarations.h +++ b/include/nbl/system/declarations.h @@ -14,6 +14,9 @@ #include "nbl/system/DynamicFunctionCaller.h" #include "nbl/system/SReadWriteSpinLock.h" +// printing and serialization +#include "nbl/system/to_string.h" + // files #include "nbl/system/IFile.h" From 6cb49564e10fc1744c86e9870d8e36a33ab7c22e Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 30 Dec 2025 10:03:38 +0100 Subject: [PATCH 344/472] `system::to_string` for `IAsset::E_TYPE` --- include/nbl/asset/IAsset.h | 68 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/include/nbl/asset/IAsset.h b/include/nbl/asset/IAsset.h index 7c6a33193d..b7142713bf 100644 --- a/include/nbl/asset/IAsset.h +++ b/include/nbl/asset/IAsset.h @@ -194,4 +194,72 @@ concept Asset = std::is_base_of_v; } +namespace nbl::system::impl +{ +template<> +struct to_string_helper +{ + private: + using enum_t = asset::IAsset::E_TYPE; + + public: + static inline std::string __call(const enum_t value) + { + switch (value) + { + case enum_t::ET_BUFFER: + return "ICPUBuffer"; + case enum_t::ET_BUFFER_VIEW: + return "ICPUBufferView"; + case enum_t::ET_SAMPLER: + return "ICPUSampler"; + case enum_t::ET_IMAGE: + return "ICPUImage"; + case enum_t::ET_IMAGE_VIEW: + return "ICPUImageView"; + case enum_t::ET_DESCRIPTOR_SET: + return "ICPUDescriptorSet"; + case enum_t::ET_DESCRIPTOR_SET_LAYOUT: + return "ICPUDescriptorSetLayout"; + case enum_t::ET_SKELETON: + return "ICPUSkeleton"; + case enum_t::ET_ANIMATION_LIBRARY: + return "ICPUAnimationLibrary"; + case enum_t::ET_PIPELINE_LAYOUT: + return "ICPUPipelineLayout"; + case enum_t::ET_SHADER: + return "IShader"; + case enum_t::ET_GEOMETRY: + return "IGeometry"; + case enum_t::ET_RENDERPASS: + return "ICPURenderpass"; + case enum_t::ET_FRAMEBUFFER: + return "ICPUFramebuffer"; + case enum_t::ET_GRAPHICS_PIPELINE: + return "ICPUGraphicsPipeline"; + case enum_t::ET_BOTOM_LEVEL_ACCELERATION_STRUCTURE: + return "ICPUBottomLevelAccelerationStructure"; + case enum_t::ET_TOP_LEVEL_ACCELERATION_STRUCTURE: + return "ICPUTopLevelAccelerationStructure"; + case enum_t::ET_GEOMETRY_COLLECTION: + return "ICPUGeometryCollection"; + case enum_t::ET_MORPH_TARGETS: + return "ICPUMorphTargets"; + case enum_t::ET_COMPUTE_PIPELINE: + return "ICPUComputePipeline"; + case enum_t::ET_PIPELINE_CACHE: + return "ICPUPipelineCache"; + case enum_t::ET_SCENE: + return "ICPUScene"; + case enum_t::ET_RAYTRACING_PIPELINE: + return "ICPURayTracingPipeline"; + case enum_t::ET_IMPLEMENTATION_SPECIFIC_METADATA: + return ""; + default: + break; + } + return ""; + } +}; +} #endif From af7574a1154d2c78b9b0814dd37b2a52ba442361 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 31 Dec 2025 14:06:29 +0300 Subject: [PATCH 345/472] include `tgmath.hlsl` in `functions.hlsl`, update examples_tests --- examples_tests | 2 +- include/nbl/builtin/hlsl/math/functions.hlsl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index b5d8abc0e5..086af9e659 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b5d8abc0e5c4761a3714b2c4a074cb10aaa90573 +Subproject commit 086af9e6590119bd394f2622db80ab0054445502 diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index a52eb21c23..a1c51d4e51 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -5,6 +5,7 @@ #define _NBL_BUILTIN_HLSL_MATH_FUNCTIONS_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/tgmath.hlsl" #include "nbl/builtin/hlsl/numbers.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" #include "nbl/builtin/hlsl/concepts/vector.hlsl" From 0e3ed2801949036084943abe8fadb2ed76809453 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 2 Jan 2026 12:04:40 +0100 Subject: [PATCH 346/472] add NBL_EMULATED_VECTOR_SCALAR_BITWISE_OPERATOR to allow scalar & emulated vector --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index d0c728a8c7..2192d348b9 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -143,6 +143,16 @@ NBL_CONSTEXPR_FUNC this_t operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ return output;\ } +#define NBL_EMULATED_VECTOR_SCALAR_BITWISE_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator OP (component_t val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) OP val);\ + return output;\ +} + #define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ NBL_CONSTEXPR_FUNC this_t operator OP(vector other) NBL_CONST_MEMBER_FUNC \ {\ @@ -257,6 +267,9 @@ struct emulated_vector Date: Fri, 2 Jan 2026 21:33:03 +0100 Subject: [PATCH 347/472] add support for depfiles & use with NSC --- cmake/common.cmake | 15 +- include/nbl/asset/utils/IShaderCompiler.h | 17 ++ src/nbl/asset/utils/CHLSLCompiler.cpp | 61 ++++- src/nbl/asset/utils/IShaderCompiler.cpp | 291 +++++++++++++++++++--- tools/nsc/main.cpp | 63 ++++- 5 files changed, 397 insertions(+), 50 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 9836fa7666..0d8b583f18 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1142,6 +1142,7 @@ option(NSC_DEBUG_EDIF_SOURCE_BIT "Add \"-fspv-debug=source\" to NSC Debug CLI" O option(NSC_DEBUG_EDIF_LINE_BIT "Add \"-fspv-debug=line\" to NSC Debug CLI" OFF) option(NSC_DEBUG_EDIF_TOOL_BIT "Add \"-fspv-debug=tool\" to NSC Debug CLI" ON) option(NSC_DEBUG_EDIF_NON_SEMANTIC_BIT "Add \"-fspv-debug=vulkan-with-source\" to NSC Debug CLI" OFF) +option(NSC_USE_DEPFILE "Generate depfiles for NSC custom commands" ON) function(NBL_CREATE_NSC_COMPILE_RULES) set(COMMENT "this code has been autogenerated with Nabla CMake NBL_CREATE_HLSL_COMPILE_RULES utility") @@ -1479,21 +1480,33 @@ namespace @IMPL_NAMESPACE@ { # generate keys and commands for compiling shaders set(FINAL_KEY_REL_PATH "$/${FINAL_KEY}") set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") + set(DEPFILE_PATH "${TARGET_OUTPUT}.d") + + set(NBL_NSC_DEPFILE_ARGS "") + if(NSC_USE_DEPFILE) + set(NBL_NSC_DEPFILE_ARGS -MD -MF "${DEPFILE_PATH}") + endif() set(NBL_NSC_COMPILE_COMMAND "$" -Fc "${TARGET_OUTPUT}" ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} + ${NBL_NSC_DEPFILE_ARGS} "${CONFIG_FILE}" ) - add_custom_command(OUTPUT "${TARGET_OUTPUT}" + set(NBL_NSC_CUSTOM_COMMAND_ARGS + OUTPUT "${TARGET_OUTPUT}" COMMAND ${NBL_NSC_COMPILE_COMMAND} DEPENDS ${DEPENDS_ON} COMMENT "Creating \"${TARGET_OUTPUT}\"" VERBATIM COMMAND_EXPAND_LISTS ) + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS DEPFILE "${DEPFILE_PATH}") + endif() + add_custom_command(${NBL_NSC_CUSTOM_COMMAND_ARGS}) set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES GENERATED TRUE) set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}") diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index 30d37f36c7..9fd4eee833 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -137,6 +137,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted const CIncludeFinder* includeFinder = nullptr; std::span extraDefines = {}; E_SPIRV_VERSION targetSpirvVersion = E_SPIRV_VERSION::ESV_1_6; + bool depfile = false; + system::path depfilePath = {}; }; // https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#debugging @@ -215,6 +217,10 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted // Needed for json vector serialization. Making it private and declaring from_json(_, SEntry&) as friend didn't work inline SPreprocessingDependency() {} + inline const system::path& getRequestingSourceDir() const { return requestingSourceDir; } + inline std::string_view getIdentifier() const { return identifier; } + inline bool isStandardInclude() const { return standardInclude; } + private: friend void to_json(nlohmann::json& j, const SEntry::SPreprocessingDependency& dependency); friend void from_json(const nlohmann::json& j, SEntry::SPreprocessingDependency& dependency); @@ -447,6 +453,17 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted NBL_API2 EntrySet::const_iterator find_impl(const SEntry& mainFile, const CIncludeFinder* finder) const; }; + struct DepfileWriteParams + { + system::ISystem* system = nullptr; + std::string_view depfilePath = {}; + std::string_view outputPath = {}; + std::string_view sourceIdentifier = {}; + system::path workingDirectory = {}; + }; + + static bool writeDepfile(const DepfileWriteParams& params, const CCache::SEntry::dependency_container_t& dependencies, const CIncludeFinder* includeFinder = nullptr, system::logger_opt_ptr logger = nullptr); + core::smart_refctd_ptr compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const; inline core::smart_refctd_ptr compileToSPIRV(const char* code, const SCompilerOptions& options) const diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index d36ecfa1cb..e551d3c72b 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -111,12 +111,23 @@ static bool fixup_spirv_target_ver(std::vector& arguments, system: for (auto targetEnvArgumentPos=arguments.begin(); targetEnvArgumentPos!=arguments.end(); targetEnvArgumentPos++) if (targetEnvArgumentPos->find(Prefix)==0) { - const auto suffix = targetEnvArgumentPos->substr(Prefix.length()); + auto suffix = targetEnvArgumentPos->substr(Prefix.length()); + auto trim = [](std::wstring& value) { + auto isTrimChar = [](wchar_t c) { + return c == L' ' || c == L'\t' || c == L'\r' || c == L'\n' || c == L'"'; + }; + while (!value.empty() && isTrimChar(value.front())) + value.erase(value.begin()); + while (!value.empty() && isTrimChar(value.back())) + value.pop_back(); + }; + trim(suffix); const auto found = AllowedSuffices.find(suffix); if (found!=AllowedSuffices.end()) return true; - logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env= found but with unsupported value `%s`.", system::ILogger::ELL_ERROR, "TODO: write wchar to char convert usage"); - return false; + logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env=vulkan1.3, previous value `%ls` is unsupported.", system::ILogger::ELL_WARNING, suffix.c_str()); + *targetEnvArgumentPos = L"-fspv-target-env=vulkan1.3"; + return true; } logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env=vulkan1.3, as it is required by Nabla.", system::ILogger::ELL_WARNING); @@ -353,6 +364,21 @@ namespace nbl::wave std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector& dxc_compile_flags_override, std::vector* dependencies) const { + const bool depfileEnabled = preprocessOptions.depfile; + if (depfileEnabled) + { + if (preprocessOptions.depfilePath.empty()) + { + preprocessOptions.logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); + return {}; + } + } + + std::vector localDependencies; + auto* dependenciesOut = dependencies; + if (depfileEnabled && !dependenciesOut) + dependenciesOut = &localDependencies; + // HACK: we do a pre-pre-process here to add \n after every #pragma to neutralize boost::wave's actions // See https://github.com/Devsh-Graphics-Programming/Nabla/issues/746 size_t line_index = 0; @@ -367,8 +393,8 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE } // preprocess - core::string resolvedString = nbl::wave::preprocess(code, preprocessOptions, bool(dependencies) /* if dependencies were passed, we assume we want caching*/, - [&dxc_compile_flags_override, &stage, &dependencies](nbl::wave::context& context) -> void + core::string resolvedString = nbl::wave::preprocess(code, preprocessOptions, bool(dependenciesOut), + [&dxc_compile_flags_override, &stage, &dependenciesOut](nbl::wave::context& context) -> void { if (context.get_hooks().m_dxc_compile_flags_override.size() != 0) dxc_compile_flags_override = context.get_hooks().m_dxc_compile_flags_override; @@ -377,9 +403,8 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE if (context.get_hooks().m_pragmaStage != IShader::E_SHADER_STAGE::ESS_UNKNOWN) stage = context.get_hooks().m_pragmaStage; - if (dependencies) { - *dependencies = std::move(context.get_dependencies()); - } + if (dependenciesOut) + *dependenciesOut = std::move(context.get_dependencies()); } ); @@ -396,13 +421,31 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE } } + if (resolvedString.empty()) + return resolvedString; + + if (depfileEnabled) + { + IShaderCompiler::DepfileWriteParams params = {}; + const std::string depfilePathString = preprocessOptions.depfilePath.generic_string(); + std::filesystem::path targetPath = preprocessOptions.depfilePath; + if (targetPath.extension() == ".d") + targetPath.replace_extension(); + params.depfilePath = depfilePathString; + params.outputPath = targetPath.generic_string(); + params.sourceIdentifier = preprocessOptions.sourceIdentifier; + params.system = m_system.get(); + if (!IShaderCompiler::writeDepfile(params, *dependenciesOut, preprocessOptions.includeFinder, preprocessOptions.logger)) + return {}; + } + return resolvedString; } std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector* dependencies) const { std::vector extra_dxc_compile_flags = {}; - return preprocessShader(std::move(code), stage, preprocessOptions, extra_dxc_compile_flags); + return preprocessShader(std::move(code), stage, preprocessOptions, extra_dxc_compile_flags, dependencies); } core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::string_view code, const IShaderCompiler::SCompilerOptions& options, std::vector* dependencies) const diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index e60bf31b5c..1d0083f818 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include @@ -21,40 +23,258 @@ IShaderCompiler::IShaderCompiler(core::smart_refctd_ptr&& syste m_defaultIncludeFinder = core::make_smart_refctd_ptr(core::smart_refctd_ptr(m_system)); } -core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const +bool IShaderCompiler::writeDepfile( + const DepfileWriteParams& params, + const CCache::SEntry::dependency_container_t& dependencies, + const CIncludeFinder* includeFinder, + system::logger_opt_ptr logger) { - CCache::SEntry entry; - if (options.readCache || options.writeCache) - entry = CCache::SEntry(code, options); - - if (options.readCache) - { - auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder); - if (found != options.readCache->m_container.end()) - { - if (options.writeCache) - { - CCache::SEntry writeEntry = *found; - options.writeCache->insert(std::move(writeEntry)); - } - return found->decompressShader(); - } - } - - auto retVal = compileToSPIRV_impl(code, options, options.writeCache ? &entry.dependencies:nullptr); - // compute the SPIR-V shader content hash - if (retVal) - { - auto backingBuffer = retVal->getContent(); - const_cast(backingBuffer)->setContentHash(backingBuffer->computeContentHash()); - } + std::string depfilePathString; + if (!params.depfilePath.empty()) + depfilePathString = std::string(params.depfilePath); + else + depfilePathString = std::string(params.outputPath) + ".d"; + + if (depfilePathString.empty()) + { + logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); + return false; + } + + const auto parentDirectory = std::filesystem::path(depfilePathString).parent_path(); + if (!parentDirectory.empty() && !std::filesystem::exists(parentDirectory)) + { + if (!std::filesystem::create_directories(parentDirectory)) + { + logger.log("Failed to create parent directory for depfile.", system::ILogger::ELL_ERROR); + return false; + } + } + + std::vector depPaths; + depPaths.reserve(dependencies.size() + 1); + + auto addDepPath = [&depPaths](const std::filesystem::path& path) + { + if (path.empty()) + return; + if (!std::filesystem::exists(path)) + return; + depPaths.emplace_back(path.generic_string()); + }; + + if (!params.sourceIdentifier.empty()) + { + std::filesystem::path rootPath{std::string(params.sourceIdentifier)}; + if (rootPath.is_relative()) + { + if (!params.workingDirectory.empty()) + rootPath = std::filesystem::absolute(std::filesystem::path(params.workingDirectory) / rootPath); + else + rootPath = std::filesystem::absolute(rootPath); + } + addDepPath(rootPath); + } + + for (const auto& dep : dependencies) + { + if (includeFinder) + { + IShaderCompiler::IIncludeLoader::found_t header = dep.isStandardInclude() ? + includeFinder->getIncludeStandard(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())) : + includeFinder->getIncludeRelative(dep.getRequestingSourceDir(), std::string(dep.getIdentifier())); + + if (!header) + continue; + addDepPath(header.absolutePath); + } + else + { + std::filesystem::path candidate = dep.isStandardInclude() ? std::filesystem::path(std::string(dep.getIdentifier())) : (dep.getRequestingSourceDir() / std::string(dep.getIdentifier())); + if (candidate.is_relative()) + { + if (!params.workingDirectory.empty()) + candidate = std::filesystem::absolute(std::filesystem::path(params.workingDirectory) / candidate); + else + candidate = std::filesystem::absolute(candidate); + } + addDepPath(candidate); + } + } + + std::sort(depPaths.begin(), depPaths.end()); + depPaths.erase(std::unique(depPaths.begin(), depPaths.end()), depPaths.end()); + + auto escapeDepPath = [](const std::string& path) -> std::string + { + std::string normalized = path; + std::replace(normalized.begin(), normalized.end(), '\\', '/'); + std::string out; + out.reserve(normalized.size()); + for (const char c : normalized) + { + if (c == ' ' || c == '#') + out.emplace_back('\\'); + if (c == '$') + { + out.emplace_back('$'); + out.emplace_back('$'); + continue; + } + out.emplace_back(c); + } + return out; + }; + + if (!params.system) + { + logger.log("Depfile system is null.", system::ILogger::ELL_ERROR); + return false; + } + + core::smart_refctd_ptr depfile; + { + system::ISystem::future_t> future; + params.system->createFile(future, system::path(depfilePathString), system::IFileBase::ECF_WRITE); + if (!future.wait()) + { + logger.log("Failed to open depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); + return false; + } + future.acquire().move_into(depfile); + } + if (!depfile) + { + logger.log("Failed to open depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); + return false; + } + + std::string targetPathString; + if (params.outputPath.empty()) + { + std::filesystem::path targetPath = depfilePathString; + if (targetPath.extension() == ".d") + targetPath.replace_extension(); + targetPathString = targetPath.generic_string(); + } + else + { + targetPathString = std::string(params.outputPath); + } + if (targetPathString.empty()) + { + logger.log("Depfile target path is empty.", system::ILogger::ELL_ERROR); + return false; + } + const std::string target = escapeDepPath(std::filesystem::path(targetPathString).generic_string()); + std::vector escapedDeps; + escapedDeps.reserve(depPaths.size()); + for (const auto& depPath : depPaths) + escapedDeps.emplace_back(escapeDepPath(depPath)); + + std::string depfileContents; + depfileContents.append(target); + depfileContents.append(":"); + if (!escapedDeps.empty()) + { + depfileContents.append(" \\\n"); + for (size_t index = 0; index < escapedDeps.size(); ++index) + { + depfileContents.append(" "); + depfileContents.append(escapedDeps[index]); + if (index + 1 < escapedDeps.size()) + depfileContents.append(" \\\n"); + } + } + depfileContents.append("\n"); + + system::IFile::success_t success; + depfile->write(success, depfileContents.data(), 0, depfileContents.size()); + if (!success) + { + logger.log("Failed to write depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); + return false; + } + return true; +} - if (options.writeCache) - { - if (entry.setContent(retVal->getContent())) - options.writeCache->insert(std::move(entry)); - } - return retVal; +core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(const std::string_view code, const SCompilerOptions& options) const +{ + const bool depfileEnabled = options.preprocessorOptions.depfile; + const bool supportsDependencies = options.getCodeContentType() == IShader::E_CONTENT_TYPE::ECT_HLSL; + + auto writeDepfileFromDependencies = [&](const CCache::SEntry::dependency_container_t& dependencies) -> bool + { + if (!depfileEnabled) + return true; + + if (options.preprocessorOptions.depfilePath.empty()) + { + options.preprocessorOptions.logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); + return false; + } + + IShaderCompiler::DepfileWriteParams params = {}; + const std::string depfilePathString = options.preprocessorOptions.depfilePath.generic_string(); + params.depfilePath = depfilePathString; + auto targetPath = options.preprocessorOptions.depfilePath; + if (targetPath.extension() == ".d") + targetPath.replace_extension(); + params.outputPath = targetPath.generic_string(); + params.sourceIdentifier = options.preprocessorOptions.sourceIdentifier; + params.system = m_system.get(); + return IShaderCompiler::writeDepfile(params, dependencies, options.preprocessorOptions.includeFinder, options.preprocessorOptions.logger); + }; + + CCache::SEntry entry; + if (options.readCache || options.writeCache) + entry = CCache::SEntry(code, options); + + if (options.readCache) + { + auto found = options.readCache->find_impl(entry, options.preprocessorOptions.includeFinder); + if (found != options.readCache->m_container.end()) + { + if (options.writeCache) + { + CCache::SEntry writeEntry = *found; + options.writeCache->insert(std::move(writeEntry)); + } + auto shader = found->decompressShader(); + if (depfileEnabled && !writeDepfileFromDependencies(found->dependencies)) + return nullptr; + return shader; + } + } + + CCache::SEntry::dependency_container_t depfileDependencies; + CCache::SEntry::dependency_container_t* dependenciesPtr = nullptr; + if (options.writeCache) + dependenciesPtr = &entry.dependencies; + else if (depfileEnabled && supportsDependencies) + dependenciesPtr = &depfileDependencies; + + auto retVal = compileToSPIRV_impl(code, options, dependenciesPtr); + if (retVal) + { + auto backingBuffer = retVal->getContent(); + const_cast(backingBuffer)->setContentHash(backingBuffer->computeContentHash()); + } + + if (retVal && depfileEnabled && supportsDependencies) + { + const auto* deps = options.writeCache ? &entry.dependencies : &depfileDependencies; + if (!writeDepfileFromDependencies(*deps)) + return nullptr; + } + + if (options.writeCache) + { + if (entry.setContent(retVal->getContent())) + options.writeCache->insert(std::move(entry)); + } + + return retVal; } std::string IShaderCompiler::preprocessShader( @@ -72,7 +292,6 @@ std::string IShaderCompiler::preprocessShader( return preprocessShader(std::move(code), stage, preprocessOptions, dependencies); } - auto IShaderCompiler::IIncludeGenerator::getInclude(const std::string& includeName) const -> IIncludeLoader::found_t { core::vector> builtinNames = getBuiltinNamesToFunctionMapping(); @@ -97,7 +316,7 @@ core::vector IShaderCompiler::IIncludeGenerator::parseArgumentsFrom std::stringstream ss{ _path }; std::string arg; while (std::getline(ss, arg, '/')) - args.push_back(std::move(arg)); + args.emplace_back(std::move(arg)); return args; } @@ -178,7 +397,7 @@ void IShaderCompiler::CIncludeFinder::addSearchPath(const std::string& searchPat { if (!loader) return; - m_loaders.push_back(LoaderSearchPath{ loader, searchPath }); + m_loaders.emplace_back(LoaderSearchPath{ loader, searchPath }); } void IShaderCompiler::CIncludeFinder::addGenerator(const core::smart_refctd_ptr& generatorToAdd) @@ -301,7 +520,7 @@ core::smart_refctd_ptr IShaderCompiler::CCache::serialize() const size_t i = 0u; for (auto& entry : m_container) { // Add the entry as a json array - entries.push_back(entry); + entries.emplace_back(entry); // We keep a copy of the offsets and the sizes of each shader. This is so that later on, when we add the shaders to the buffer after json creation // (where the params array has been moved) we don't have to read the json to get the offsets again diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index edc56de84c..3332b8377b 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -200,6 +200,12 @@ class ShaderCompiler final : public system::IApplicationFramework m_logger->log(outputType + " shader code will be saved to " + output_filepath, ILogger::ELL_INFO); } + DepfileConfig depfileConfig = parseDepfileArgs(m_arguments); + if (depfileConfig.enabled && depfileConfig.path.empty()) + depfileConfig.path = output_filepath + ".d"; + if (depfileConfig.enabled) + m_logger->log("Dependency file will be saved to %s", ILogger::ELL_INFO, depfileConfig.path.c_str()); + #ifndef NBL_EMBED_BUILTIN_RESOURCES if (!no_nbl_builtins) { m_system->unmountBuiltins(); @@ -234,12 +240,12 @@ class ShaderCompiler final : public system::IApplicationFramework std::string_view result_view; if (preprocessOnly) { - preprocessing_result = preprocess_shader(shader.get(), shaderStage, file_to_compile); + preprocessing_result = preprocess_shader(shader.get(), shaderStage, file_to_compile, depfileConfig); result_view = preprocessing_result; } else { - compilation_result = compile_shader(shader.get(), shaderStage, file_to_compile); + compilation_result = compile_shader(shader.get(), shaderStage, file_to_compile, depfileConfig); result_view = { (const char*)compilation_result->getContent()->getPointer(), compilation_result->getContent()->getSize() }; } auto end = std::chrono::high_resolution_clock::now(); @@ -291,6 +297,9 @@ class ShaderCompiler final : public system::IApplicationFramework return false; } + if (depfileConfig.enabled) + m_logger->log("Dependency file written to %s", ILogger::ELL_INFO, depfileConfig.path.c_str()); + return true; } else @@ -307,7 +316,49 @@ class ShaderCompiler final : public system::IApplicationFramework private: - std::string preprocess_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier) { + struct DepfileConfig + { + bool enabled = false; + std::string path; + }; + + DepfileConfig parseDepfileArgs(std::vector& args) + { + DepfileConfig cfg; + for (auto it = args.begin(); it != args.end();) + { + const std::string& arg = *it; + if (arg == "-MD" || arg == "-M") + { + cfg.enabled = true; + it = args.erase(it); + continue; + } + if (arg == "-MF") + { + if (it + 1 == args.end()) + { + m_logger->log("Incorrect arguments. Expecting filename after -MF.", ILogger::ELL_ERROR); + return cfg; + } + cfg.enabled = true; + cfg.path = *(it + 1); + it = args.erase(it, it + 2); + continue; + } + if (arg.rfind("-MF", 0) == 0 && arg.size() > 3) + { + cfg.enabled = true; + cfg.path = arg.substr(3); + it = args.erase(it); + continue; + } + ++it; + } + return cfg; + } + + std::string preprocess_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& depfileConfig) { smart_refctd_ptr hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); CHLSLCompiler::SPreprocessorOptions options = {}; @@ -322,6 +373,8 @@ class ShaderCompiler final : public system::IApplicationFramework includeFinder->addSearchPath(it, includeLoader); options.includeFinder = includeFinder.get(); + options.depfile = depfileConfig.enabled; + options.depfilePath = depfileConfig.path; const char* code_ptr = (const char*)shader->getContent()->getPointer(); std::string_view code({ code_ptr, strlen(code_ptr)}); @@ -329,7 +382,7 @@ class ShaderCompiler final : public system::IApplicationFramework return hlslcompiler->preprocessShader(std::string(code), shaderStage, options, nullptr); } - core::smart_refctd_ptr compile_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier) { + core::smart_refctd_ptr compile_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& depfileConfig) { smart_refctd_ptr hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); CHLSLCompiler::SOptions options = {}; @@ -348,6 +401,8 @@ class ShaderCompiler final : public system::IApplicationFramework includeFinder->addSearchPath(it, includeLoader); options.preprocessorOptions.includeFinder = includeFinder.get(); + options.preprocessorOptions.depfile = depfileConfig.enabled; + options.preprocessorOptions.depfilePath = depfileConfig.path; return hlslcompiler->compileToSPIRV((const char*)shader->getContent()->getPointer(), options); } From 682ba00a65dae272ff2e8821db3325c835c92a5b Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 2 Jan 2026 23:03:21 +0100 Subject: [PATCH 348/472] fix certain issues and update docs --- docs/nsc-prebuilds.md | 22 +++------------------- src/nbl/asset/utils/CHLSLCompiler.cpp | 4 ---- src/nbl/asset/utils/IShaderCompiler.cpp | 12 ++++-------- 3 files changed, 7 insertions(+), 31 deletions(-) diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index 4d57d7a8de..1f8c73e2ee 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -60,7 +60,7 @@ Keys are strings that match the output layout: - `INPUT` (string, required): path to `.hlsl` (relative to `CMAKE_CURRENT_SOURCE_DIR` or absolute). - `KEY` (string, required): base key (prefer without `.spv`; it is always appended, so using `foo.spv` will result in `foo.spv.spv`). - `COMPILE_OPTIONS` (array of strings, optional): per-input extra options (e.g. `["-T","cs_6_8"]`). -- `DEPENDS` (array of strings, optional): per-input dependencies (extra files that should trigger rebuild). +- `DEPENDS` (array of strings, optional): extra per-input dependencies that are not discovered via `#include` (see below). - `CAPS` (array, optional): permutation caps (see below). You can register many rules in a single call, and you can call the function multiple times to append rules to the same `TARGET`. @@ -87,18 +87,9 @@ The helper also exposes CMake options that append NSC debug flags **only for Deb ## Source files and rebuild dependencies (important) -Make sure shader inputs and includes are: +NSC supports depfiles and the CMake custom commands consume them, so **changes in any `#include`d HLSL file automatically trigger recompilation of the affected `.spv` outputs**. In most cases you no longer need to list includes manually. -1. Marked as header-only on your target (so the IDE shows them, but the build system doesn't try to compile them with default HLSL rules like `fxc`): - -```cmake -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) -``` - -2. Listed as dependencies of the NSC custom commands (so editing any of them triggers a rebuild of the `.spv` outputs). - -This is what the `DEPENDS` argument of `NBL_CREATE_NSC_COMPILE_RULES` (and/or per-input JSON `DEPENDS`) is for. Always include the main `INPUT` file itself and any files it includes; otherwise the build system might not re-run `nsc` when you change them. +Use `DEPENDS` only for **extra** inputs that are not discovered via `#include` (e.g. a generated header that is not included, a config file read by a custom include generator, or any non-HLSL file that should trigger a rebuild). You can register those extra dependencies if you need them, but in most projects `DEPENDS` should stay empty. ## Minimal usage (no permutations) @@ -106,12 +97,6 @@ Example pattern (as in `examples_tests/27_MPMCScheduler/CMakeLists.txt`): ```cmake set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/shader.comp.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(JSON [=[ [ @@ -128,7 +113,6 @@ set(JSON [=[ NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index e551d3c72b..2b98f9c192 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -428,11 +428,7 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE { IShaderCompiler::DepfileWriteParams params = {}; const std::string depfilePathString = preprocessOptions.depfilePath.generic_string(); - std::filesystem::path targetPath = preprocessOptions.depfilePath; - if (targetPath.extension() == ".d") - targetPath.replace_extension(); params.depfilePath = depfilePathString; - params.outputPath = targetPath.generic_string(); params.sourceIdentifier = preprocessOptions.sourceIdentifier; params.system = m_system.get(); if (!IShaderCompiler::writeDepfile(params, *dependenciesOut, preprocessOptions.includeFinder, preprocessOptions.logger)) diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index 1d0083f818..754229f83b 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -114,14 +114,14 @@ bool IShaderCompiler::writeDepfile( for (const char c : normalized) { if (c == ' ' || c == '#') - out.emplace_back('\\'); + out.push_back('\\'); if (c == '$') { - out.emplace_back('$'); - out.emplace_back('$'); + out.push_back('$'); + out.push_back('$'); continue; } - out.emplace_back(c); + out.push_back(c); } return out; }; @@ -217,10 +217,6 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons IShaderCompiler::DepfileWriteParams params = {}; const std::string depfilePathString = options.preprocessorOptions.depfilePath.generic_string(); params.depfilePath = depfilePathString; - auto targetPath = options.preprocessorOptions.depfilePath; - if (targetPath.extension() == ".d") - targetPath.replace_extension(); - params.outputPath = targetPath.generic_string(); params.sourceIdentifier = options.preprocessorOptions.sourceIdentifier; params.system = m_system.get(); return IShaderCompiler::writeDepfile(params, dependencies, options.preprocessorOptions.includeFinder, options.preprocessorOptions.logger); From f04d3a3692c441caef36cd371663997a0c43ae68 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 2 Jan 2026 23:10:44 +0100 Subject: [PATCH 349/472] update nsc rules, `-Wno-local-type-template-args` in REQUIRED_OPTIONS --- cmake/common.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/common.cmake b/cmake/common.cmake index 0d8b583f18..488ee7342b 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1181,6 +1181,7 @@ struct DeviceConfigCaps -fspv-target-env=vulkan1.3 -Wshadow -Wconversion + -Wno-local-type-template-args $<$:-O0> $<$:-O3> $<$:-O3> From d66fc339d832acbcf583f0cdbf25bc72299ddd5a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 3 Jan 2026 17:13:10 +0100 Subject: [PATCH 350/472] finalize depfile & NSC rules updates --- cmake/common.cmake | 65 ++++++- docs/nsc-prebuilds.md | 15 +- examples_tests | 2 +- tools/nsc/CMakeLists.txt | 5 +- tools/nsc/main.cpp | 407 +++++++++++++++++++++++---------------- 5 files changed, 308 insertions(+), 186 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 488ee7342b..c3901c6e42 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1209,6 +1209,7 @@ struct DeviceConfigCaps if(NOT NBL_EMBED_BUILTIN_RESOURCES) list(APPEND REQUIRED_OPTIONS + -no-nbl-builtins -I "${NBL_ROOT_PATH}/include" -I "${NBL_ROOT_PATH}/3rdparty/dxc/dxc/external/SPIRV-Headers/include" -I "${NBL_ROOT_PATH}/3rdparty/boost/superproject/libs/preprocessor/include" @@ -1217,7 +1218,8 @@ struct DeviceConfigCaps endif() set(REQUIRED_SINGLE_ARGS TARGET BINARY_DIR OUTPUT_VAR INPUTS INCLUDE NAMESPACE MOUNT_POINT_DEFINE) - cmake_parse_arguments(IMPL "" "${REQUIRED_SINGLE_ARGS};LINK_TO" "COMMON_OPTIONS;DEPENDS" ${ARGV}) + set(OPTIONAL_SINGLE_ARGS GLOB_DIR) + cmake_parse_arguments(IMPL "DISCARD_DEFAULT_GLOB" "${REQUIRED_SINGLE_ARGS};${OPTIONAL_SINGLE_ARGS};LINK_TO" "COMMON_OPTIONS;DEPENDS" ${ARGV}) NBL_PARSE_REQUIRED(IMPL ${REQUIRED_SINGLE_ARGS}) if(NOT TARGET ${IMPL_TARGET}) @@ -1295,6 +1297,10 @@ namespace @IMPL_NAMESPACE@ { list(APPEND MP_DEFINES ${IMPL_MOUNT_POINT_DEFINE}="${IMPL_BINARY_DIR}") set_target_properties(${IMPL_TARGET} PROPERTIES NBL_MOUNT_POINT_DEFINES "${MP_DEFINES}") + set(RTE "NSC Rules") + set(IN "${RTE}/In") + set(OUT "${RTE}/Out") + string(JSON JSON_LENGTH LENGTH "${IMPL_INPUTS}") math(EXPR LAST_INDEX "${JSON_LENGTH} - 1") @@ -1482,6 +1488,7 @@ namespace @IMPL_NAMESPACE@ { set(FINAL_KEY_REL_PATH "$/${FINAL_KEY}") set(TARGET_OUTPUT "${IMPL_BINARY_DIR}/${FINAL_KEY_REL_PATH}") set(DEPFILE_PATH "${TARGET_OUTPUT}.d") + set(NBL_NSC_LOG_PATH "${TARGET_OUTPUT}.log") set(NBL_NSC_DEPFILE_ARGS "") if(NSC_USE_DEPFILE) @@ -1496,11 +1503,18 @@ namespace @IMPL_NAMESPACE@ { "${CONFIG_FILE}" ) + get_filename_component(NBL_NSC_INPUT_NAME "${TARGET_INPUT}" NAME) + set(NBL_NSC_BYPRODUCTS "${NBL_NSC_LOG_PATH}") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_BYPRODUCTS "${DEPFILE_PATH}") + endif() + set(NBL_NSC_CUSTOM_COMMAND_ARGS OUTPUT "${TARGET_OUTPUT}" + BYPRODUCTS ${NBL_NSC_BYPRODUCTS} COMMAND ${NBL_NSC_COMPILE_COMMAND} DEPENDS ${DEPENDS_ON} - COMMENT "Creating \"${TARGET_OUTPUT}\"" + COMMENT "${NBL_NSC_INPUT_NAME}" VERBATIM COMMAND_EXPAND_LISTS ) @@ -1508,15 +1522,37 @@ namespace @IMPL_NAMESPACE@ { list(APPEND NBL_NSC_CUSTOM_COMMAND_ARGS DEPFILE "${DEPFILE_PATH}") endif() add_custom_command(${NBL_NSC_CUSTOM_COMMAND_ARGS}) - set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES GENERATED TRUE) + set(NBL_NSC_OUT_FILES "${TARGET_OUTPUT}" "${NBL_NSC_LOG_PATH}") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES "${DEPFILE_PATH}") + endif() - set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" "${TARGET_OUTPUT}") + set_source_files_properties(${NBL_NSC_OUT_FILES} PROPERTIES GENERATED TRUE) + + set(HEADER_ONLY_LIKE "${CONFIG_FILE}" "${TARGET_INPUT}" ${NBL_NSC_OUT_FILES}) target_sources(${IMPL_TARGET} PRIVATE ${HEADER_ONLY_LIKE}) set_source_files_properties(${HEADER_ONLY_LIKE} PROPERTIES HEADER_FILE_ONLY ON VS_TOOL_OVERRIDE None ) + if(CMAKE_CONFIGURATION_TYPES) + foreach(_CFG IN LISTS CMAKE_CONFIGURATION_TYPES) + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${_CFG}/${FINAL_KEY}") + set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.d") + endif() + source_group("${OUT}/${_CFG}" FILES ${NBL_NSC_OUT_FILES_IDE}) + endforeach() + else() + set(TARGET_OUTPUT_IDE "${IMPL_BINARY_DIR}/${FINAL_KEY}") + set(NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}" "${TARGET_OUTPUT_IDE}.log") + if(NSC_USE_DEPFILE) + list(APPEND NBL_NSC_OUT_FILES_IDE "${TARGET_OUTPUT_IDE}.d") + endif() + source_group("${OUT}" FILES ${NBL_NSC_OUT_FILES_IDE}) + endif() set_source_files_properties("${TARGET_OUTPUT}" PROPERTIES NBL_SPIRV_REGISTERED_INPUT "${TARGET_INPUT}" @@ -1558,12 +1594,23 @@ namespace @IMPL_NAMESPACE@ { list(APPEND KEYS ${ACCESS_KEY}) endforeach() - set(RTE "NSC Rules") - set(IN "${RTE}/In") - set(OUT "${RTE}/Out") - source_group("${IN}" FILES ${CONFIGS} ${INPUTS}) - source_group("${OUT}" FILES ${SPIRVs}) + if(NOT IMPL_DISCARD_DEFAULT_GLOB) + set(GLOB_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") + if(IMPL_GLOB_DIR) + set(GLOB_ROOT "${IMPL_GLOB_DIR}") + endif() + get_filename_component(GLOB_ROOT "${GLOB_ROOT}" ABSOLUTE BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + file(GLOB_RECURSE IMPL_HLSL_GLOB CONFIGURE_DEPENDS "${GLOB_ROOT}/*.hlsl") + if(IMPL_HLSL_GLOB) + target_sources(${IMPL_TARGET} PRIVATE ${IMPL_HLSL_GLOB}) + set_source_files_properties(${IMPL_HLSL_GLOB} PROPERTIES + HEADER_FILE_ONLY ON + VS_TOOL_OVERRIDE None + ) + source_group("HLSL Files" FILES ${IMPL_HLSL_GLOB}) + endif() + endif() set(${IMPL_OUTPUT_VAR} ${KEYS} PARENT_SCOPE) endfunction() diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md index 1f8c73e2ee..400aff5eb7 100644 --- a/docs/nsc-prebuilds.md +++ b/docs/nsc-prebuilds.md @@ -91,6 +91,11 @@ NSC supports depfiles and the CMake custom commands consume them, so **changes i Use `DEPENDS` only for **extra** inputs that are not discovered via `#include` (e.g. a generated header that is not included, a config file read by a custom include generator, or any non-HLSL file that should trigger a rebuild). You can register those extra dependencies if you need them, but in most projects `DEPENDS` should stay empty. +By default `NBL_CREATE_NSC_COMPILE_RULES` also collects `*.hlsl` files for IDE visibility. It recursively scans the current source directory (or `GLOB_DIR` if provided), adds those files as header-only, and groups them under `HLSL Files`. If you do not want this behavior, pass `DISCARD_DEFAULT_GLOB`. + +- `GLOB_DIR` (optional): root directory for the default `*.hlsl` scan. +- `DISCARD_DEFAULT_GLOB` (flag): disables the default scan and IDE grouping. + ## Minimal usage (no permutations) Example pattern (as in `examples_tests/27_MPMCScheduler/CMakeLists.txt`): @@ -282,6 +287,8 @@ If the error looks like a preprocessing issue, note that we use Boost.Wave as th
NSC rules + archive + runtime key usage +NSC emits depfiles and the custom commands consume them, so changes in `#include`d HLSL files automatically trigger recompilation of the affected outputs. In most cases you do not need to list includes manually. Use `DEPENDS` only for extra inputs that are not discovered via `#include`. + ### CMake (`CMakeLists.txt`) ```cmake @@ -290,12 +297,6 @@ include(common) nbl_create_executable_project("" "" "" "") set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") -set(DEPENDS - app_resources/common.hlsl - app_resources/shader.hlsl -) -target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) -set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(JSON [=[ [ @@ -303,7 +304,6 @@ set(JSON [=[ "INPUT": "app_resources/shader.hlsl", "KEY": "shader", "COMPILE_OPTIONS": ["-T", "lib_6_8"], - "DEPENDS": [], "CAPS": [ { "kind": "limits", @@ -325,7 +325,6 @@ set(JSON [=[ NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/examples_tests b/examples_tests index 5df217517f..58b42cfc87 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5df217517fd5af0964b6d170afb68d5194daf60d +Subproject commit 58b42cfc87274db606d593d5baec787b626bb945 diff --git a/tools/nsc/CMakeLists.txt b/tools/nsc/CMakeLists.txt index bcdcbca531..2765f02fa5 100644 --- a/tools/nsc/CMakeLists.txt +++ b/tools/nsc/CMakeLists.txt @@ -2,6 +2,9 @@ nbl_create_executable_project("" "" "" "") enable_testing() +add_dependencies(${EXECUTABLE_NAME} argparse) +target_include_directories(${EXECUTABLE_NAME} PRIVATE $) + set(GODBOLT_BINARY_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/compiler-explorer") set(GODBOLT_BINARY_PRETEST_DIRECTORY "${GODBOLT_BINARY_DIRECTORY}/pre-test") set(NBL_NSC_COMPILE_DIRECTORY "${GODBOLT_BINARY_PRETEST_DIRECTORY}/.compile/$") @@ -359,4 +362,4 @@ add_custom_target(run-compiler-explorer ALL add_dependencies(run-compiler-explorer nsc) set_target_properties(run-compiler-explorer PROPERTIES FOLDER "Godbolt") -endif() \ No newline at end of file +endif() diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 3332b8377b..56152701b3 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -3,8 +3,11 @@ #include #include +#include #include #include +#include +#include #include "nbl/asset/metadata/CHLSLMetadata.h" #include "nlohmann/json.hpp" @@ -15,6 +18,41 @@ using namespace nbl::system; using namespace nbl::core; using namespace nbl::asset; +class NscLogger final : public system::IThreadsafeLogger +{ +public: + NscLogger(core::smart_refctd_ptr&& logFile, const core::bitflag logLevelMask, const core::bitflag consoleMask) + : IThreadsafeLogger(logLevelMask), m_logFile(std::move(logFile)), m_logPos(m_logFile ? m_logFile->getSize() : 0ull), m_consoleMask(consoleMask) + { + } + +private: + void threadsafeLog_impl(const std::string_view& fmt, E_LOG_LEVEL logLevel, va_list args) override + { + const auto line = constructLogString(fmt, logLevel, args); + size_t lineSize = line.size(); + while (lineSize > 0 && line[lineSize - 1] == '\0') + --lineSize; + if (lineSize == 0) + return; + if (m_logFile) + { + system::IFile::success_t succ; + m_logFile->write(succ, line.data(), m_logPos, lineSize); + m_logPos += succ.getBytesProcessed(); + } + if (logLevel & m_consoleMask.value) + { + std::fwrite(line.data(), 1, lineSize, stdout); + std::fflush(stdout); + } + } + + core::smart_refctd_ptr m_logFile; + size_t m_logPos = 0ull; + core::bitflag m_consoleMask; +}; + class ShaderCompiler final : public system::IApplicationFramework { using base_t = system::IApplicationFramework; @@ -24,71 +62,120 @@ class ShaderCompiler final : public system::IApplicationFramework bool onAppInitialized(smart_refctd_ptr&& system) override { - const auto argc = argv.size(); - const bool insufficientArguments = argc < 2; - - if (not insufficientArguments) + const auto rawArgs = std::vector(argv.begin(), argv.end()); + auto expandArgs = [](const std::vector& args) { - // 1) NOTE: imo each example should be able to dump build info & have such mode, maybe it could go straight to IApplicationFramework main - // 2) TODO: this whole "serialize" logic should go to the GitInfo struct and be static or something, it should be standardized - - if (argv[1] == "--dump-build-info") + std::vector expanded; + expanded.reserve(args.size()); + for (const auto& arg : args) { - json j; - - auto& modules = j["modules"]; - - auto serialize = [&](const gtml::GitInfo& info, std::string_view target) -> void + if (arg.rfind("-MF", 0) == 0 && arg.size() > 3) { - auto& s = modules[target.data()]; - - s["isPopulated"] = info.isPopulated; - if (info.hasUncommittedChanges.has_value()) - s["hasUncommittedChanges"] = info.hasUncommittedChanges.value(); - else - s["hasUncommittedChanges"] = "UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"; - - s["commitAuthorName"] = info.commitAuthorName; - s["commitAuthorEmail"] = info.commitAuthorEmail; - s["commitHash"] = info.commitHash; - s["commitShortHash"] = info.commitShortHash; - s["commitDate"] = info.commitDate; - s["commitSubject"] = info.commitSubject; - s["commitBody"] = info.commitBody; - s["describe"] = info.describe; - s["branchName"] = info.branchName; - s["latestTag"] = info.latestTag; - s["latestTagName"] = info.latestTagName; - }; - - serialize(gtml::nabla_git_info, "nabla"); - serialize(gtml::dxc_git_info, "dxc"); - - const auto pretty = j.dump(4); - std::cout << pretty << std::endl; - - std::filesystem::path oPath = "build-info.json"; - - // TOOD: use argparse for it - if (argc > 3 && argv[2] == "--file") - oPath = argv[3]; - - std::ofstream outFile(oPath); - if (outFile.is_open()) + expanded.push_back("-MF"); + expanded.push_back(arg.substr(3)); + continue; + } + if (arg.rfind("-Fo", 0) == 0 && arg.size() > 3) { - outFile << pretty; - outFile.close(); - printf("Saved \"%s\"\n", oPath.string().c_str()); + expanded.push_back("-Fo"); + expanded.push_back(arg.substr(3)); + continue; } - else + if (arg.rfind("-Fc", 0) == 0 && arg.size() > 3) { - printf("Failed to open \"%s\" for writing\n", oPath.string().c_str()); - exit(-1); + expanded.push_back("-Fc"); + expanded.push_back(arg.substr(3)); + continue; } + expanded.push_back(arg); + } + return expanded; + }; - // in this mode terminate with 0 if all good - exit(0); + argparse::ArgumentParser program("nsc"); + program.add_argument("--dump-build-info").default_value(false).implicit_value(true); + program.add_argument("--file").default_value(std::string{}); + program.add_argument("-P").default_value(false).implicit_value(true); + program.add_argument("-no-nbl-builtins").default_value(false).implicit_value(true); + program.add_argument("-MD").default_value(false).implicit_value(true); + program.add_argument("-M").default_value(false).implicit_value(true); + program.add_argument("-MF").default_value(std::string{}); + program.add_argument("-Fo").default_value(std::string{}); + program.add_argument("-Fc").default_value(std::string{}); + program.add_argument("-log").default_value(std::string{}); + program.add_argument("-nolog").default_value(false).implicit_value(true); + program.add_argument("-quiet").default_value(false).implicit_value(true); + program.add_argument("-verbose").default_value(false).implicit_value(true); + + std::vector unknownArgs; + try + { + unknownArgs = program.parse_known_args(expandArgs(rawArgs)); + } + catch (const std::runtime_error& err) + { + std::cerr << err.what() << std::endl << program; + return false; + } + + if (program.get("--dump-build-info")) + { + json j; + + auto& modules = j["modules"]; + + auto serialize = [&](const gtml::GitInfo& info, std::string_view target) -> void + { + auto& s = modules[target.data()]; + + s["isPopulated"] = info.isPopulated; + if (info.hasUncommittedChanges.has_value()) + s["hasUncommittedChanges"] = info.hasUncommittedChanges.value(); + else + s["hasUncommittedChanges"] = "UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"; + + s["commitAuthorName"] = info.commitAuthorName; + s["commitAuthorEmail"] = info.commitAuthorEmail; + s["commitHash"] = info.commitHash; + s["commitShortHash"] = info.commitShortHash; + s["commitDate"] = info.commitDate; + s["commitSubject"] = info.commitSubject; + s["commitBody"] = info.commitBody; + s["describe"] = info.describe; + s["branchName"] = info.branchName; + s["latestTag"] = info.latestTag; + s["latestTagName"] = info.latestTagName; + }; + + serialize(gtml::nabla_git_info, "nabla"); + serialize(gtml::dxc_git_info, "dxc"); + + const auto pretty = j.dump(4); + std::cout << pretty << std::endl; + + std::filesystem::path oPath = "build-info.json"; + + if (program.is_used("--file")) + { + const auto filePath = program.get("--file"); + if (!filePath.empty()) + oPath = filePath; + } + + std::ofstream outFile(oPath); + if (outFile.is_open()) + { + outFile << pretty; + outFile.close(); + printf("Saved \"%s\"\n", oPath.string().c_str()); + } + else + { + printf("Failed to open \"%s\" for writing\n", oPath.string().c_str()); + exit(-1); } + + exit(0); } if (not isAPILoaded()) @@ -105,102 +192,119 @@ class ShaderCompiler final : public system::IApplicationFramework if (!m_system) return false; - m_logger = make_smart_refctd_ptr(core::bitflag(ILogger::ELL_DEBUG) | ILogger::ELL_INFO | ILogger::ELL_WARNING | ILogger::ELL_PERFORMANCE | ILogger::ELL_ERROR); + const auto defaultConsoleMask = core::bitflag(ILogger::ELL_WARNING) | ILogger::ELL_ERROR; + m_logger = make_smart_refctd_ptr(defaultConsoleMask); - if (insufficientArguments) + if (rawArgs.size() < 2) { m_logger->log("Insufficient arguments.", ILogger::ELL_ERROR); return false; } + std::string file_to_compile = rawArgs.back(); - m_arguments = std::vector(argv.begin() + 1, argv.end()-1); // turn argv into vector for convenience + if (!m_system->exists(file_to_compile, IFileBase::ECF_READ)) + { + m_logger->log("Input shader file does not exist: %s", ILogger::ELL_ERROR, file_to_compile.c_str()); + return false; + } - std::string file_to_compile = argv.back(); + const bool preprocessOnly = program.get("-P"); + const bool outputFlagFc = program.is_used("-Fc"); + const bool outputFlagFo = program.is_used("-Fo"); + if (outputFlagFc && outputFlagFo) + { + m_logger->log("Invalid arguments. Passed both -Fo and -Fc.", ILogger::ELL_ERROR); + return false; + } + if (!outputFlagFc && !outputFlagFo) + { + m_logger->log("Missing arguments. Expecting `-Fc {filename}` or `-Fo {filename}`.", ILogger::ELL_ERROR); + return false; + } - if (!m_system->exists(file_to_compile, IFileBase::ECF_READ)) { - m_logger->log("Incorrect arguments. Expecting last argument to be filename of the shader intended to compile.", ILogger::ELL_ERROR); + std::string output_filepath = outputFlagFc ? program.get("-Fc") : program.get("-Fo"); + if (output_filepath.empty()) + { + m_logger->log("Invalid output file path.", ILogger::ELL_ERROR); return false; } - std::string output_filepath = ""; - auto builtin_flag_pos = std::find(m_arguments.begin(), m_arguments.end(), "-no-nbl-builtins"); - if (builtin_flag_pos != m_arguments.end()) { - m_logger->log("Unmounting builtins."); - m_system->unmountBuiltins(); - no_nbl_builtins = true; - m_arguments.erase(builtin_flag_pos); + const bool quietFlag = program.get("-quiet"); + const bool verboseFlag = program.get("-verbose"); + if (quietFlag && verboseFlag) + { + m_logger->log("Invalid arguments. Passed both -quiet and -verbose.", ILogger::ELL_ERROR); + return false; } - auto split = [&](const std::string& str, char delim) + LogConfig logConfig; + if (verboseFlag) + logConfig.quiet = false; + if (quietFlag) + logConfig.quiet = true; + + logConfig.noLog = program.get("-nolog"); + if (program.is_used("-log")) { - std::vector strings; - size_t start, end = 0; - - while ((start = str.find_first_not_of(delim, end)) != std::string::npos) + logConfig.path = program.get("-log"); + if (logConfig.path.empty()) { - end = str.find(delim, start); - strings.push_back(str.substr(start, end - start)); + m_logger->log("Incorrect arguments. Expecting filename after -log.", ILogger::ELL_ERROR); + return false; } - - return strings; - }; - - auto findOutputFlag = [&](const std::string_view& outputFlag) - { - return std::find_if(m_arguments.begin(), m_arguments.end(), [&](const std::string& argument) - { - return argument.find(outputFlag.data()) != std::string::npos; - }); - }; - - auto preprocessOnly = findOutputFlag("-P") != m_arguments.end(); - auto output_flag_pos_fc = findOutputFlag("-Fc"); - auto output_flag_pos_fo = findOutputFlag("-Fo"); - if (output_flag_pos_fc != m_arguments.end() && output_flag_pos_fo != m_arguments.end()) { - m_logger->log("Invalid arguments. Passed both -Fo and -Fc.", ILogger::ELL_ERROR); - return false; } - auto output_flag_pos = output_flag_pos_fc != m_arguments.end() ? output_flag_pos_fc : output_flag_pos_fo; - if (output_flag_pos == m_arguments.end()) + + if (logConfig.noLog && !logConfig.path.empty()) { - m_logger->log("Missing arguments. Expecting `-Fc {filename}` or `-Fo {filename}`.", ILogger::ELL_ERROR); + m_logger->log("Invalid arguments. Passed both -nolog and -log.", ILogger::ELL_ERROR); return false; } - else + + const auto consoleMask = logConfig.quiet ? (core::bitflag(ILogger::ELL_WARNING) | ILogger::ELL_ERROR) : core::bitflag(ILogger::ELL_ALL); + m_logger = make_smart_refctd_ptr(consoleMask); + + if (!logConfig.noLog) { - // we need to assume -Fc may be passed with output file name quoted together with "", so we split it (DXC does it) - const auto& outputFlag = *output_flag_pos; - auto outputFlagVector = split(outputFlag, ' '); - - if(outputFlag == "-Fc" || outputFlag == "-Fo") - { - if (output_flag_pos + 1 != m_arguments.end()) - { - output_filepath = *(output_flag_pos + 1); - } - else - { - m_logger->log("Incorrect arguments. Expecting filename after %s.", ILogger::ELL_ERROR, outputFlag); - return false; - } - } + const std::filesystem::path logPath = logConfig.path.empty() ? std::filesystem::path(output_filepath).concat(".log") : std::filesystem::path(logConfig.path); + const auto parentDirectory = logPath.parent_path(); + if (!parentDirectory.empty() && !std::filesystem::exists(parentDirectory)) + std::filesystem::create_directories(parentDirectory); + + m_system->deleteFile(logPath); + + system::ISystem::future_t> future; + m_system->createFile(future, logPath, system::IFileBase::ECF_WRITE); + core::smart_refctd_ptr logFile; + if (future.wait()) + future.acquire().move_into(logFile); + + if (logFile) + m_logger = make_smart_refctd_ptr(std::move(logFile), core::bitflag(ILogger::ELL_ALL), consoleMask); else - { - output_filepath = outputFlagVector[1]; - } - m_arguments.erase(output_flag_pos, output_flag_pos+2); + m_logger->log("Failed to open log file: %s", ILogger::ELL_ERROR, logPath.string().c_str()); + } - if (output_filepath.empty()) - { - m_logger->log("Invalid output file path!" + output_filepath, ILogger::ELL_ERROR); - return false; - } - - std::string outputType = preprocessOnly ? "Preprocessed" : "Compiled"; - m_logger->log(outputType + " shader code will be saved to " + output_filepath, ILogger::ELL_INFO); + const char* action = preprocessOnly ? "Preprocessing" : "Compiling"; + m_logger->log("%s %s", ILogger::ELL_INFO, action, file_to_compile.c_str()); + const char* outputType = preprocessOnly ? "Preprocessed" : "Compiled"; + m_logger->log("%s shader code will be saved to %s", ILogger::ELL_INFO, outputType, output_filepath.c_str()); + + m_arguments = std::move(unknownArgs); + if (!m_arguments.empty() && m_arguments.back() == file_to_compile) + m_arguments.pop_back(); + + no_nbl_builtins = program.get("-no-nbl-builtins"); + if (no_nbl_builtins) + { + m_logger->log("Unmounting builtins."); + m_system->unmountBuiltins(); } - DepfileConfig depfileConfig = parseDepfileArgs(m_arguments); + DepfileConfig depfileConfig; + if (program.get("-MD") || program.get("-M") || program.is_used("-MF")) + depfileConfig.enabled = true; + if (program.is_used("-MF")) + depfileConfig.path = program.get("-MF"); if (depfileConfig.enabled && depfileConfig.path.empty()) depfileConfig.path = output_filepath + ".d"; if (depfileConfig.enabled) @@ -215,12 +319,11 @@ class ShaderCompiler final : public system::IApplicationFramework #endif if (std::find(m_arguments.begin(), m_arguments.end(), "-E") == m_arguments.end()) { - //Insert '-E main' into arguments if no entry point is specified m_arguments.push_back("-E"); m_arguments.push_back("main"); } - for (size_t i = 0; i < m_arguments.size() - 1; ++i) // -I must be given with second arg, no need to include iteration over last one + for (size_t i = 0; i + 1 < m_arguments.size(); ++i) { const auto& arg = m_arguments[i]; if (arg == "-I") @@ -250,7 +353,6 @@ class ShaderCompiler final : public system::IApplicationFramework } auto end = std::chrono::high_resolution_clock::now(); - // write compiled/preprocessed shader to file as bytes std::string operationType = preprocessOnly ? "preprocessing" : "compilation"; const bool success = preprocessOnly ? preprocessing_result != std::string{} : bool(compilation_result); if (success) @@ -316,47 +418,18 @@ class ShaderCompiler final : public system::IApplicationFramework private: - struct DepfileConfig + struct LogConfig { - bool enabled = false; + bool quiet = true; + bool noLog = false; std::string path; }; - DepfileConfig parseDepfileArgs(std::vector& args) + struct DepfileConfig { - DepfileConfig cfg; - for (auto it = args.begin(); it != args.end();) - { - const std::string& arg = *it; - if (arg == "-MD" || arg == "-M") - { - cfg.enabled = true; - it = args.erase(it); - continue; - } - if (arg == "-MF") - { - if (it + 1 == args.end()) - { - m_logger->log("Incorrect arguments. Expecting filename after -MF.", ILogger::ELL_ERROR); - return cfg; - } - cfg.enabled = true; - cfg.path = *(it + 1); - it = args.erase(it, it + 2); - continue; - } - if (arg.rfind("-MF", 0) == 0 && arg.size() > 3) - { - cfg.enabled = true; - cfg.path = arg.substr(3); - it = args.erase(it); - continue; - } - ++it; - } - return cfg; - } + bool enabled = false; + std::string path; + }; std::string preprocess_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& depfileConfig) { smart_refctd_ptr hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); @@ -448,7 +521,7 @@ class ShaderCompiler final : public system::IApplicationFramework bool no_nbl_builtins{ false }; smart_refctd_ptr m_system; - smart_refctd_ptr m_logger; + smart_refctd_ptr m_logger; std::vector m_arguments, m_include_search_paths; core::smart_refctd_ptr m_assetMgr; From f4d5fde949d759a0a6ecf20525cde963549c2dde Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 4 Jan 2026 07:50:24 +0100 Subject: [PATCH 351/472] fix depfile generation normalize paths, have a .tmp before writing final file --- cmake/common.cmake | 32 ++++++++++--------- src/nbl/asset/utils/CHLSLCompiler.cpp | 3 ++ src/nbl/asset/utils/IShaderCompiler.cpp | 41 ++++++++++++++++++++++--- 3 files changed, 57 insertions(+), 19 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index c3901c6e42..a4ce345096 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1222,6 +1222,16 @@ struct DeviceConfigCaps cmake_parse_arguments(IMPL "DISCARD_DEFAULT_GLOB" "${REQUIRED_SINGLE_ARGS};${OPTIONAL_SINGLE_ARGS};LINK_TO" "COMMON_OPTIONS;DEPENDS" ${ARGV}) NBL_PARSE_REQUIRED(IMPL ${REQUIRED_SINGLE_ARGS}) + set(IMPL_HLSL_GLOB "") + if(NOT IMPL_DISCARD_DEFAULT_GLOB) + set(GLOB_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") + if(IMPL_GLOB_DIR) + set(GLOB_ROOT "${IMPL_GLOB_DIR}") + endif() + get_filename_component(GLOB_ROOT "${GLOB_ROOT}" ABSOLUTE BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + file(GLOB_RECURSE IMPL_HLSL_GLOB CONFIGURE_DEPENDS "${GLOB_ROOT}/*.hlsl") + endif() + if(NOT TARGET ${IMPL_TARGET}) add_library(${IMPL_TARGET} INTERFACE) endif() @@ -1595,21 +1605,13 @@ namespace @IMPL_NAMESPACE@ { endforeach() source_group("${IN}" FILES ${CONFIGS} ${INPUTS}) - if(NOT IMPL_DISCARD_DEFAULT_GLOB) - set(GLOB_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") - if(IMPL_GLOB_DIR) - set(GLOB_ROOT "${IMPL_GLOB_DIR}") - endif() - get_filename_component(GLOB_ROOT "${GLOB_ROOT}" ABSOLUTE BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") - file(GLOB_RECURSE IMPL_HLSL_GLOB CONFIGURE_DEPENDS "${GLOB_ROOT}/*.hlsl") - if(IMPL_HLSL_GLOB) - target_sources(${IMPL_TARGET} PRIVATE ${IMPL_HLSL_GLOB}) - set_source_files_properties(${IMPL_HLSL_GLOB} PROPERTIES - HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None - ) - source_group("HLSL Files" FILES ${IMPL_HLSL_GLOB}) - endif() + if(IMPL_HLSL_GLOB) + target_sources(${IMPL_TARGET} PRIVATE ${IMPL_HLSL_GLOB}) + set_source_files_properties(${IMPL_HLSL_GLOB} PROPERTIES + HEADER_FILE_ONLY ON + VS_TOOL_OVERRIDE None + ) + source_group("HLSL Files" FILES ${IMPL_HLSL_GLOB}) endif() set(${IMPL_OUTPUT_VAR} ${KEYS} PARENT_SCOPE) diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 2b98f9c192..1020fa9446 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -430,6 +431,8 @@ std::string CHLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE const std::string depfilePathString = preprocessOptions.depfilePath.generic_string(); params.depfilePath = depfilePathString; params.sourceIdentifier = preprocessOptions.sourceIdentifier; + if (!params.sourceIdentifier.empty()) + params.workingDirectory = std::filesystem::path(std::string(params.sourceIdentifier)).parent_path(); params.system = m_system.get(); if (!IShaderCompiler::writeDepfile(params, *dependenciesOut, preprocessOptions.includeFinder, preprocessOptions.logger)) return {}; diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index 754229f83b..a6cd95b441 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -54,13 +54,30 @@ bool IShaderCompiler::writeDepfile( std::vector depPaths; depPaths.reserve(dependencies.size() + 1); - auto addDepPath = [&depPaths](const std::filesystem::path& path) + auto addDepPath = [&depPaths, ¶ms](std::filesystem::path path) { if (path.empty()) return; - if (!std::filesystem::exists(path)) + if (path.is_relative()) + { + if (params.workingDirectory.empty()) + return; + path = std::filesystem::path(params.workingDirectory) / path; + } + std::error_code ec; + std::filesystem::path normalized = std::filesystem::weakly_canonical(path, ec); + if (ec) + { + normalized = std::filesystem::absolute(path, ec); + if (ec) + return; + } + if (normalized.empty() || !std::filesystem::exists(normalized)) + return; + auto normalizedString = normalized.generic_string(); + if (normalizedString.find_first_of("\r\n") != std::string::npos) return; - depPaths.emplace_back(path.generic_string()); + depPaths.emplace_back(std::move(normalizedString)); }; if (!params.sourceIdentifier.empty()) @@ -132,10 +149,15 @@ bool IShaderCompiler::writeDepfile( return false; } + const auto depfilePath = std::filesystem::path(depfilePathString); + auto tempPath = depfilePath; + tempPath += ".tmp"; + params.system->deleteFile(tempPath); + core::smart_refctd_ptr depfile; { system::ISystem::future_t> future; - params.system->createFile(future, system::path(depfilePathString), system::IFileBase::ECF_WRITE); + params.system->createFile(future, tempPath, system::IFileBase::ECF_WRITE); if (!future.wait()) { logger.log("Failed to open depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); @@ -195,6 +217,15 @@ bool IShaderCompiler::writeDepfile( logger.log("Failed to write depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); return false; } + depfile = nullptr; + + params.system->deleteFile(depfilePath); + const std::error_code moveError = params.system->moveFileOrDirectory(tempPath, depfilePath); + if (moveError) + { + logger.log("Failed to replace depfile: %s", system::ILogger::ELL_ERROR, depfilePathString.c_str()); + return false; + } return true; } @@ -218,6 +249,8 @@ core::smart_refctd_ptr nbl::asset::IShaderCompiler::compileToSPIRV(cons const std::string depfilePathString = options.preprocessorOptions.depfilePath.generic_string(); params.depfilePath = depfilePathString; params.sourceIdentifier = options.preprocessorOptions.sourceIdentifier; + if (!params.sourceIdentifier.empty()) + params.workingDirectory = std::filesystem::path(std::string(params.sourceIdentifier)).parent_path(); params.system = m_system.get(); return IShaderCompiler::writeDepfile(params, dependencies, options.preprocessorOptions.includeFinder, options.preprocessorOptions.logger); }; From 67b9a40ba404eff91b976bbd13d6342977f43245 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 5 Jan 2026 00:12:39 +0100 Subject: [PATCH 352/472] add new flags to IFileBase, CSystemWin32 & polish NSC code --- include/nbl/system/IFileBase.h | 5 +- src/nbl/system/CSystemWin32.cpp | 14 +- tools/nsc/main.cpp | 1015 ++++++++++++++++--------------- 3 files changed, 534 insertions(+), 500 deletions(-) diff --git a/include/nbl/system/IFileBase.h b/include/nbl/system/IFileBase.h index cb0170157e..58ab34fac5 100644 --- a/include/nbl/system/IFileBase.h +++ b/include/nbl/system/IFileBase.h @@ -25,7 +25,10 @@ class IFileBase : public core::IReferenceCounted ECF_READ_WRITE = 0b0011, ECF_MAPPABLE = 0b0100, //! Implies ECF_MAPPABLE - ECF_COHERENT = 0b1100 + ECF_COHERENT = 0b1100, + ECF_SHARE_READ = 0b10000, + ECF_SHARE_WRITE = 0b100000, + ECF_SHARE_DELETE = 0b1000000 }; //! Get size of file. diff --git a/src/nbl/system/CSystemWin32.cpp b/src/nbl/system/CSystemWin32.cpp index cab809c145..af33e1460d 100644 --- a/src/nbl/system/CSystemWin32.cpp +++ b/src/nbl/system/CSystemWin32.cpp @@ -43,6 +43,14 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: { const bool writeAccess = flags.value&IFile::ECF_WRITE; const DWORD fileAccess = ((flags.value&IFile::ECF_READ) ? FILE_GENERIC_READ:0)|(writeAccess ? FILE_GENERIC_WRITE:0); + const bool hasShareFlags = flags.value & (IFile::ECF_SHARE_READ | IFile::ECF_SHARE_WRITE | IFile::ECF_SHARE_DELETE); + DWORD shareMode = hasShareFlags ? 0 : FILE_SHARE_READ; + if (flags.value & IFile::ECF_SHARE_READ) + shareMode |= FILE_SHARE_READ; + if (flags.value & IFile::ECF_SHARE_WRITE) + shareMode |= FILE_SHARE_WRITE; + if (flags.value & IFile::ECF_SHARE_DELETE) + shareMode |= FILE_SHARE_DELETE; SECURITY_ATTRIBUTES secAttribs{ sizeof(SECURITY_ATTRIBUTES), nullptr, FALSE }; @@ -51,8 +59,8 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: p.make_preferred(); // Replace "/" separators with "\" // only write access should create new files if they don't exist - const auto creationDisposition = writeAccess ? OPEN_ALWAYS:OPEN_EXISTING; - HANDLE _native = CreateFileA(p.string().data(), fileAccess, FILE_SHARE_READ, &secAttribs, creationDisposition, FILE_ATTRIBUTE_NORMAL, nullptr); + const auto creationDisposition = writeAccess ? OPEN_ALWAYS : OPEN_EXISTING; + HANDLE _native = CreateFileA(p.string().data(), fileAccess, shareMode, &secAttribs, creationDisposition, FILE_ATTRIBUTE_NORMAL, nullptr); if (_native==INVALID_HANDLE_VALUE) { auto e = GetLastError(); @@ -107,4 +115,4 @@ bool isDebuggerAttached() return IsDebuggerPresent(); } -#endif \ No newline at end of file +#endif diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 56152701b3..12738c038e 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -1,531 +1,554 @@ #include "nabla.h" #include "nbl/system/IApplicationFramework.h" - #include #include #include #include #include #include +#include +#include +#include +#include +#include +#include #include - #include "nbl/asset/metadata/CHLSLMetadata.h" #include "nlohmann/json.hpp" -using json = nlohmann::json; +using json = nlohmann::json; using namespace nbl; using namespace nbl::system; using namespace nbl::core; using namespace nbl::asset; -class NscLogger final : public system::IThreadsafeLogger +class TrimStdoutLogger final : public CStdoutLogger { public: - NscLogger(core::smart_refctd_ptr&& logFile, const core::bitflag logLevelMask, const core::bitflag consoleMask) - : IThreadsafeLogger(logLevelMask), m_logFile(std::move(logFile)), m_logPos(m_logFile ? m_logFile->getSize() : 0ull), m_consoleMask(consoleMask) - { - } + TrimStdoutLogger(const bitflag logLevelMask) : CStdoutLogger(logLevelMask) {} + +protected: + void threadsafeLog_impl(const std::string_view& fmt, E_LOG_LEVEL logLevel, va_list args) override + { + const auto str = constructLogString(fmt, logLevel, args); + size_t size = str.size(); + while (size && str[size - 1] == '\0') + --size; + if (!size) + return; + std::fwrite(str.data(), 1, size, stdout); + std::fflush(stdout); + } +}; + +class TrimFileLogger final : public CFileLogger +{ +public: + using CFileLogger::CFileLogger; + +protected: + void threadsafeLog_impl(const std::string_view& fmt, E_LOG_LEVEL logLevel, va_list args) override + { + const auto str = constructLogString(fmt, logLevel, args); + size_t size = str.size(); + while (size && str[size - 1] == '\0') + --size; + if (!size) + return; + IFile::success_t succ; + m_file->write(succ, str.data(), m_pos, size); + m_pos += succ.getBytesProcessed(); + } +}; + +class ShaderLogger final : public IThreadsafeLogger +{ +public: + ShaderLogger(smart_refctd_ptr system, path logPath, const bitflag fileMask, const bitflag consoleMask, const bool noLog) + : IThreadsafeLogger(fileMask | consoleMask), m_system(std::move(system)), m_logPath(std::move(logPath)), m_fileMask(fileMask), m_consoleMask(consoleMask), m_noLog(noLog) + { + m_stdoutLogger = make_smart_refctd_ptr(m_consoleMask); + beginBuild(); + } + + void beginBuild() + { + m_fileLogger = nullptr; + m_file = nullptr; + + if (m_noLog) + return; + if (!m_system || m_logPath.empty()) + return; + + const auto parent = std::filesystem::path(m_logPath).parent_path(); + if (!parent.empty() && !std::filesystem::exists(parent)) + std::filesystem::create_directories(parent); + + for (auto attempt = 0u; attempt < kDeleteRetries; ++attempt) + { + if (m_system->deleteFile(m_logPath)) + break; + std::this_thread::sleep_for(kDeleteDelay); + } + + ISystem::future_t> fut; + m_system->createFile(fut, m_logPath, kLogFlags); + + if (fut.wait()) + { + auto lk = fut.acquire(); + if (lk) + lk.move_into(m_file); + } + + if (!m_file) + return; + + m_fileLogger = make_smart_refctd_ptr(smart_refctd_ptr(m_file), true, m_fileMask); + } private: - void threadsafeLog_impl(const std::string_view& fmt, E_LOG_LEVEL logLevel, va_list args) override - { - const auto line = constructLogString(fmt, logLevel, args); - size_t lineSize = line.size(); - while (lineSize > 0 && line[lineSize - 1] == '\0') - --lineSize; - if (lineSize == 0) - return; - if (m_logFile) - { - system::IFile::success_t succ; - m_logFile->write(succ, line.data(), m_logPos, lineSize); - m_logPos += succ.getBytesProcessed(); - } - if (logLevel & m_consoleMask.value) - { - std::fwrite(line.data(), 1, lineSize, stdout); - std::fflush(stdout); - } - } - - core::smart_refctd_ptr m_logFile; - size_t m_logPos = 0ull; - core::bitflag m_consoleMask; + static constexpr auto kDeleteRetries = 3u; + static constexpr auto kDeleteDelay = std::chrono::milliseconds(100); + static constexpr auto kLogFlags = bitflag(IFileBase::ECF_WRITE) | IFileBase::ECF_SHARE_READ | IFileBase::ECF_SHARE_WRITE | IFileBase::ECF_SHARE_DELETE; + + static inline std::string formatMessageOnly(const std::string_view& fmt, va_list args) + { + va_list a; + va_copy(a, args); + const int n = std::vsnprintf(nullptr, 0, fmt.data(), a); + va_end(a); + if (n <= 0) + return {}; + std::string s(size_t(n) + 1u, '\0'); + std::vsnprintf(s.data(), s.size(), fmt.data(), args); + s.resize(size_t(n)); + return s; + } + + void threadsafeLog_impl(const std::string_view& fmt, E_LOG_LEVEL logLevel, va_list args) override + { + const auto msg = formatMessageOnly(fmt, args); + if (msg.empty()) + return; + + if (m_stdoutLogger && (logLevel & m_consoleMask.value)) + m_stdoutLogger->log("%s", logLevel, msg.c_str()); + + if (m_noLog || !(logLevel & m_fileMask.value) || !m_fileLogger) + return; + + m_fileLogger->log("%s", logLevel, msg.c_str()); + } + + smart_refctd_ptr m_system; + smart_refctd_ptr m_file; + smart_refctd_ptr m_stdoutLogger; + smart_refctd_ptr m_fileLogger; + path m_logPath; + bitflag m_fileMask; + bitflag m_consoleMask; + bool m_noLog = false; }; -class ShaderCompiler final : public system::IApplicationFramework +class ShaderCompiler final : public IApplicationFramework { - using base_t = system::IApplicationFramework; + using base_t = IApplicationFramework; public: - using base_t::base_t; - - bool onAppInitialized(smart_refctd_ptr&& system) override - { - const auto rawArgs = std::vector(argv.begin(), argv.end()); - auto expandArgs = [](const std::vector& args) - { - std::vector expanded; - expanded.reserve(args.size()); - for (const auto& arg : args) - { - if (arg.rfind("-MF", 0) == 0 && arg.size() > 3) - { - expanded.push_back("-MF"); - expanded.push_back(arg.substr(3)); - continue; - } - if (arg.rfind("-Fo", 0) == 0 && arg.size() > 3) - { - expanded.push_back("-Fo"); - expanded.push_back(arg.substr(3)); - continue; - } - if (arg.rfind("-Fc", 0) == 0 && arg.size() > 3) - { - expanded.push_back("-Fc"); - expanded.push_back(arg.substr(3)); - continue; - } - expanded.push_back(arg); - } - return expanded; - }; - - argparse::ArgumentParser program("nsc"); - program.add_argument("--dump-build-info").default_value(false).implicit_value(true); - program.add_argument("--file").default_value(std::string{}); - program.add_argument("-P").default_value(false).implicit_value(true); - program.add_argument("-no-nbl-builtins").default_value(false).implicit_value(true); - program.add_argument("-MD").default_value(false).implicit_value(true); - program.add_argument("-M").default_value(false).implicit_value(true); - program.add_argument("-MF").default_value(std::string{}); - program.add_argument("-Fo").default_value(std::string{}); - program.add_argument("-Fc").default_value(std::string{}); - program.add_argument("-log").default_value(std::string{}); - program.add_argument("-nolog").default_value(false).implicit_value(true); - program.add_argument("-quiet").default_value(false).implicit_value(true); - program.add_argument("-verbose").default_value(false).implicit_value(true); - - std::vector unknownArgs; - try - { - unknownArgs = program.parse_known_args(expandArgs(rawArgs)); - } - catch (const std::runtime_error& err) - { - std::cerr << err.what() << std::endl << program; - return false; - } - - if (program.get("--dump-build-info")) - { - json j; - - auto& modules = j["modules"]; - - auto serialize = [&](const gtml::GitInfo& info, std::string_view target) -> void - { - auto& s = modules[target.data()]; - - s["isPopulated"] = info.isPopulated; - if (info.hasUncommittedChanges.has_value()) - s["hasUncommittedChanges"] = info.hasUncommittedChanges.value(); - else - s["hasUncommittedChanges"] = "UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"; - - s["commitAuthorName"] = info.commitAuthorName; - s["commitAuthorEmail"] = info.commitAuthorEmail; - s["commitHash"] = info.commitHash; - s["commitShortHash"] = info.commitShortHash; - s["commitDate"] = info.commitDate; - s["commitSubject"] = info.commitSubject; - s["commitBody"] = info.commitBody; - s["describe"] = info.describe; - s["branchName"] = info.branchName; - s["latestTag"] = info.latestTag; - s["latestTagName"] = info.latestTagName; - }; - - serialize(gtml::nabla_git_info, "nabla"); - serialize(gtml::dxc_git_info, "dxc"); - - const auto pretty = j.dump(4); - std::cout << pretty << std::endl; - - std::filesystem::path oPath = "build-info.json"; - - if (program.is_used("--file")) - { - const auto filePath = program.get("--file"); - if (!filePath.empty()) - oPath = filePath; - } - - std::ofstream outFile(oPath); - if (outFile.is_open()) - { - outFile << pretty; - outFile.close(); - printf("Saved \"%s\"\n", oPath.string().c_str()); - } - else - { - printf("Failed to open \"%s\" for writing\n", oPath.string().c_str()); - exit(-1); - } - - exit(0); - } - - if (not isAPILoaded()) - { - std::cerr << "Could not load Nabla API, terminating!"; - return false; - } - - if (system) - m_system = std::move(system); - else - m_system = system::IApplicationFramework::createSystem(); - - if (!m_system) - return false; - - const auto defaultConsoleMask = core::bitflag(ILogger::ELL_WARNING) | ILogger::ELL_ERROR; - m_logger = make_smart_refctd_ptr(defaultConsoleMask); - - if (rawArgs.size() < 2) - { - m_logger->log("Insufficient arguments.", ILogger::ELL_ERROR); - return false; - } - std::string file_to_compile = rawArgs.back(); - - if (!m_system->exists(file_to_compile, IFileBase::ECF_READ)) - { - m_logger->log("Input shader file does not exist: %s", ILogger::ELL_ERROR, file_to_compile.c_str()); - return false; - } - - const bool preprocessOnly = program.get("-P"); - const bool outputFlagFc = program.is_used("-Fc"); - const bool outputFlagFo = program.is_used("-Fo"); - if (outputFlagFc && outputFlagFo) - { - m_logger->log("Invalid arguments. Passed both -Fo and -Fc.", ILogger::ELL_ERROR); - return false; - } - if (!outputFlagFc && !outputFlagFo) - { - m_logger->log("Missing arguments. Expecting `-Fc {filename}` or `-Fo {filename}`.", ILogger::ELL_ERROR); - return false; - } - - std::string output_filepath = outputFlagFc ? program.get("-Fc") : program.get("-Fo"); - if (output_filepath.empty()) - { - m_logger->log("Invalid output file path.", ILogger::ELL_ERROR); - return false; - } - - const bool quietFlag = program.get("-quiet"); - const bool verboseFlag = program.get("-verbose"); - if (quietFlag && verboseFlag) - { - m_logger->log("Invalid arguments. Passed both -quiet and -verbose.", ILogger::ELL_ERROR); - return false; - } - - LogConfig logConfig; - if (verboseFlag) - logConfig.quiet = false; - if (quietFlag) - logConfig.quiet = true; - - logConfig.noLog = program.get("-nolog"); - if (program.is_used("-log")) - { - logConfig.path = program.get("-log"); - if (logConfig.path.empty()) - { - m_logger->log("Incorrect arguments. Expecting filename after -log.", ILogger::ELL_ERROR); - return false; - } - } - - if (logConfig.noLog && !logConfig.path.empty()) - { - m_logger->log("Invalid arguments. Passed both -nolog and -log.", ILogger::ELL_ERROR); - return false; - } - - const auto consoleMask = logConfig.quiet ? (core::bitflag(ILogger::ELL_WARNING) | ILogger::ELL_ERROR) : core::bitflag(ILogger::ELL_ALL); - m_logger = make_smart_refctd_ptr(consoleMask); - - if (!logConfig.noLog) - { - const std::filesystem::path logPath = logConfig.path.empty() ? std::filesystem::path(output_filepath).concat(".log") : std::filesystem::path(logConfig.path); - const auto parentDirectory = logPath.parent_path(); - if (!parentDirectory.empty() && !std::filesystem::exists(parentDirectory)) - std::filesystem::create_directories(parentDirectory); - - m_system->deleteFile(logPath); - - system::ISystem::future_t> future; - m_system->createFile(future, logPath, system::IFileBase::ECF_WRITE); - core::smart_refctd_ptr logFile; - if (future.wait()) - future.acquire().move_into(logFile); - - if (logFile) - m_logger = make_smart_refctd_ptr(std::move(logFile), core::bitflag(ILogger::ELL_ALL), consoleMask); - else - m_logger->log("Failed to open log file: %s", ILogger::ELL_ERROR, logPath.string().c_str()); - } - - const char* action = preprocessOnly ? "Preprocessing" : "Compiling"; - m_logger->log("%s %s", ILogger::ELL_INFO, action, file_to_compile.c_str()); - const char* outputType = preprocessOnly ? "Preprocessed" : "Compiled"; - m_logger->log("%s shader code will be saved to %s", ILogger::ELL_INFO, outputType, output_filepath.c_str()); - - m_arguments = std::move(unknownArgs); - if (!m_arguments.empty() && m_arguments.back() == file_to_compile) - m_arguments.pop_back(); - - no_nbl_builtins = program.get("-no-nbl-builtins"); - if (no_nbl_builtins) - { - m_logger->log("Unmounting builtins."); - m_system->unmountBuiltins(); - } - - DepfileConfig depfileConfig; - if (program.get("-MD") || program.get("-M") || program.is_used("-MF")) - depfileConfig.enabled = true; - if (program.is_used("-MF")) - depfileConfig.path = program.get("-MF"); - if (depfileConfig.enabled && depfileConfig.path.empty()) - depfileConfig.path = output_filepath + ".d"; - if (depfileConfig.enabled) - m_logger->log("Dependency file will be saved to %s", ILogger::ELL_INFO, depfileConfig.path.c_str()); + using base_t::base_t; + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + const auto rawArgs = std::vector(argv.begin(), argv.end()); + const auto expandedArgs = expandJoinedArgs(rawArgs); + + argparse::ArgumentParser program("nsc"); + program.add_argument("--dump-build-info").default_value(false).implicit_value(true); + program.add_argument("--file").default_value(std::string{}); + program.add_argument("-P").default_value(false).implicit_value(true); + program.add_argument("-no-nbl-builtins").default_value(false).implicit_value(true); + program.add_argument("-MD").default_value(false).implicit_value(true); + program.add_argument("-M").default_value(false).implicit_value(true); + program.add_argument("-MF").default_value(std::string{}); + program.add_argument("-Fo").default_value(std::string{}); + program.add_argument("-Fc").default_value(std::string{}); + program.add_argument("-log").default_value(std::string{}); + program.add_argument("-nolog").default_value(false).implicit_value(true); + program.add_argument("-quiet").default_value(false).implicit_value(true); + program.add_argument("-verbose").default_value(false).implicit_value(true); + + std::vector unknownArgs; + try + { + unknownArgs = program.parse_known_args(expandedArgs); + } + catch (const std::runtime_error& err) + { + std::cerr << err.what() << std::endl << program; + return false; + } + + if (program.get("--dump-build-info")) + { + dumpBuildInfo(program); + std::exit(0); + } + + if (!isAPILoaded()) + { + std::cerr << "Could not load Nabla API, terminating!"; + return false; + } + + m_system = system ? std::move(system) : IApplicationFramework::createSystem(); + if (!m_system) + return false; + + if (rawArgs.size() < 2) + { + std::cerr << "Insufficient arguments.\n"; + return false; + } + + const std::string fileToCompile = rawArgs.back(); + if (!m_system->exists(fileToCompile, IFileBase::ECF_READ)) + { + std::cerr << "Input shader file does not exist: " << fileToCompile << "\n"; + return false; + } + + const bool preprocessOnly = program.get("-P"); + const bool hasFc = program.is_used("-Fc"); + const bool hasFo = program.is_used("-Fo"); + + if (hasFc == hasFo) + { + if (hasFc) + std::cerr << "Invalid arguments. Passed both -Fo and -Fc.\n"; + else + std::cerr << "Missing arguments. Expecting `-Fc {filename}` or `-Fo {filename}`.\n"; + return false; + } + + const std::string outputFilepath = hasFc ? program.get("-Fc") : program.get("-Fo"); + if (outputFilepath.empty()) + { + std::cerr << "Invalid output file path.\n"; + return false; + } + + const bool quiet = program.get("-quiet"); + const bool verbose = program.get("-verbose"); + if (quiet && verbose) + { + std::cerr << "Invalid arguments. Passed both -quiet and -verbose.\n"; + return false; + } + + const bool noLog = program.get("-nolog"); + const std::string logPathOverride = program.is_used("-log") ? program.get("-log") : std::string{}; + if (noLog && !logPathOverride.empty()) + { + std::cerr << "Invalid arguments. Passed both -nolog and -log.\n"; + return false; + } + + const auto logPath = logPathOverride.empty() ? std::filesystem::path(outputFilepath).concat(".log") : std::filesystem::path(logPathOverride); + const auto fileMask = bitflag(ILogger::ELL_ALL); + const auto consoleMask = bitflag(ILogger::ELL_WARNING) | ILogger::ELL_ERROR; + + m_logger = make_smart_refctd_ptr(m_system, logPath, fileMask, consoleMask, noLog); + + m_arguments = std::move(unknownArgs); + if (!m_arguments.empty() && m_arguments.back() == fileToCompile) + m_arguments.pop_back(); + + bool noNblBuiltins = program.get("-no-nbl-builtins"); + if (noNblBuiltins) + { + m_logger->log("Unmounting builtins."); + m_system->unmountBuiltins(); + } + + DepfileConfig dep; + if (program.get("-MD") || program.get("-M") || program.is_used("-MF")) + dep.enabled = true; + if (program.is_used("-MF")) + dep.path = program.get("-MF"); + if (dep.enabled && dep.path.empty()) + dep.path = outputFilepath + ".d"; + if (dep.enabled) + m_logger->log("Dependency file will be saved to %s", ILogger::ELL_INFO, dep.path.c_str()); #ifndef NBL_EMBED_BUILTIN_RESOURCES - if (!no_nbl_builtins) { - m_system->unmountBuiltins(); - no_nbl_builtins = true; - m_logger->log("nsc.exe was compiled with builtin resources disabled. Force enabling -no-nbl-builtins.", ILogger::ELL_WARNING); - } + if (!noNblBuiltins) + { + m_system->unmountBuiltins(); + noNblBuiltins = true; + m_logger->log("nsc.exe was compiled with builtin resources disabled. Force enabling -no-nbl-builtins.", ILogger::ELL_WARNING); + } #endif - if (std::find(m_arguments.begin(), m_arguments.end(), "-E") == m_arguments.end()) - { - m_arguments.push_back("-E"); - m_arguments.push_back("main"); - } - - for (size_t i = 0; i + 1 < m_arguments.size(); ++i) - { - const auto& arg = m_arguments[i]; - if (arg == "-I") - m_include_search_paths.emplace_back(m_arguments[i + 1]); - } - - auto [shader, shaderStage] = open_shader_file(file_to_compile); - if (shader->getContentType() != IShader::E_CONTENT_TYPE::ECT_HLSL) - { - m_logger->log("Error. Loaded shader file content is not HLSL.", ILogger::ELL_ERROR); - return false; - } - - auto start = std::chrono::high_resolution_clock::now(); - smart_refctd_ptr compilation_result; - std::string preprocessing_result; - std::string_view result_view; - if (preprocessOnly) - { - preprocessing_result = preprocess_shader(shader.get(), shaderStage, file_to_compile, depfileConfig); - result_view = preprocessing_result; - } - else - { - compilation_result = compile_shader(shader.get(), shaderStage, file_to_compile, depfileConfig); - result_view = { (const char*)compilation_result->getContent()->getPointer(), compilation_result->getContent()->getSize() }; - } - auto end = std::chrono::high_resolution_clock::now(); - - std::string operationType = preprocessOnly ? "preprocessing" : "compilation"; - const bool success = preprocessOnly ? preprocessing_result != std::string{} : bool(compilation_result); - if (success) - { - m_logger->log("Shader " + operationType + " successful.", ILogger::ELL_INFO); - const auto took = std::to_string(std::chrono::duration_cast(end - start).count()); - m_logger->log("Took %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); - { - const auto location = std::filesystem::path(output_filepath); - const auto parentDirectory = location.parent_path(); - - if (!std::filesystem::exists(parentDirectory)) - { - if (!std::filesystem::create_directories(parentDirectory)) - { - m_logger->log("Failed to create parent directory for the " + output_filepath + "output!", ILogger::ELL_ERROR); - return false; - } - } - } - - std::fstream output_file(output_filepath, std::ios::out | std::ios::binary); - - if (!output_file.is_open()) - { - m_logger->log("Failed to open output file: " + output_filepath, ILogger::ELL_ERROR); - return false; - } - - output_file.write(result_view.data(), result_view.size()); - - if (output_file.fail()) - { - m_logger->log("Failed to write to output file: " + output_filepath, ILogger::ELL_ERROR); - output_file.close(); - return false; - } - - output_file.close(); - - if (output_file.fail()) - { - m_logger->log("Failed to close output file: " + output_filepath, ILogger::ELL_ERROR); - return false; - } - - if (depfileConfig.enabled) - m_logger->log("Dependency file written to %s", ILogger::ELL_INFO, depfileConfig.path.c_str()); - - return true; - } - else - { - m_logger->log("Shader " + operationType + " failed.", ILogger::ELL_ERROR); - return false; - } - } - - void workLoopBody() override {} - - bool keepRunning() override { return false; } + if (std::find(m_arguments.begin(), m_arguments.end(), "-E") == m_arguments.end()) + { + m_arguments.push_back("-E"); + m_arguments.push_back("main"); + } + + for (size_t i = 0; i + 1 < m_arguments.size(); ++i) + { + if (m_arguments[i] == "-I") + m_include_search_paths.emplace_back(m_arguments[i + 1]); + } + + const char* const action = preprocessOnly ? "Preprocessing" : "Compiling"; + const char* const outType = preprocessOnly ? "Preprocessed" : "Compiled"; + m_logger->log("%s %s", ILogger::ELL_INFO, action, fileToCompile.c_str()); + m_logger->log("%s shader code will be saved to %s", ILogger::ELL_INFO, outType, outputFilepath.c_str()); + + auto [shader, shaderStage] = open_shader_file(fileToCompile); + if (!shader || shader->getContentType() != IShader::E_CONTENT_TYPE::ECT_HLSL) + { + m_logger->log("Error. Loaded shader file content is not HLSL.", ILogger::ELL_ERROR); + return false; + } + + const auto start = std::chrono::high_resolution_clock::now(); + const auto job = runShaderJob(shader.get(), shaderStage, fileToCompile, dep, preprocessOnly); + const auto end = std::chrono::high_resolution_clock::now(); + + const char* const op = preprocessOnly ? "preprocessing" : "compilation"; + if (!job.ok) + { + m_logger->log("Shader %s failed.", ILogger::ELL_ERROR, op); + return false; + } + + const auto took = std::to_string(std::chrono::duration_cast(end - start).count()); + m_logger->log("Shader %s successful.", ILogger::ELL_INFO, op); + m_logger->log("Took %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); + + const auto outParent = std::filesystem::path(outputFilepath).parent_path(); + if (!outParent.empty() && !std::filesystem::exists(outParent)) + { + if (!std::filesystem::create_directories(outParent)) + { + m_logger->log("Failed to create parent directory for output %s.", ILogger::ELL_ERROR, outputFilepath.c_str()); + return false; + } + } + + std::fstream out(outputFilepath, std::ios::out | std::ios::binary); + if (!out.is_open()) + { + m_logger->log("Failed to open output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); + return false; + } + + out.write(job.view.data(), job.view.size()); + if (out.fail()) + { + m_logger->log("Failed to write to output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); + out.close(); + return false; + } + + out.close(); + if (out.fail()) + { + m_logger->log("Failed to close output file: %s", ILogger::ELL_ERROR, outputFilepath.c_str()); + return false; + } + + if (dep.enabled) + m_logger->log("Dependency file written to %s", ILogger::ELL_INFO, dep.path.c_str()); + + return true; + } + + void workLoopBody() override {} + bool keepRunning() override { return false; } private: - - struct LogConfig - { - bool quiet = true; - bool noLog = false; - std::string path; - }; - - struct DepfileConfig - { - bool enabled = false; - std::string path; - }; - - std::string preprocess_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& depfileConfig) { - smart_refctd_ptr hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - - CHLSLCompiler::SPreprocessorOptions options = {}; - options.sourceIdentifier = sourceIdentifier; - options.logger = m_logger.get(); - - auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - auto includeLoader = includeFinder->getDefaultFileSystemLoader(); - - // because before real compilation we do preprocess the input it doesn't really matter we proxy include search direcotries further with dxcOptions since at the end all includes are resolved to single file - for (const auto& it : m_include_search_paths) - includeFinder->addSearchPath(it, includeLoader); - - options.includeFinder = includeFinder.get(); - options.depfile = depfileConfig.enabled; - options.depfilePath = depfileConfig.path; - - const char* code_ptr = (const char*)shader->getContent()->getPointer(); - std::string_view code({ code_ptr, strlen(code_ptr)}); - - return hlslcompiler->preprocessShader(std::string(code), shaderStage, options, nullptr); - } - - core::smart_refctd_ptr compile_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& depfileConfig) { - smart_refctd_ptr hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - - CHLSLCompiler::SOptions options = {}; - options.stage = shaderStage; - options.preprocessorOptions.sourceIdentifier = sourceIdentifier; - options.preprocessorOptions.logger = m_logger.get(); - - options.debugInfoFlags = core::bitflag(asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT); - options.dxcOptions = std::span(m_arguments); - - auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - auto includeLoader = includeFinder->getDefaultFileSystemLoader(); - - // because before real compilation we do preprocess the input it doesn't really matter we proxy include search direcotries further with dxcOptions since at the end all includes are resolved to single file - for(const auto& it : m_include_search_paths) - includeFinder->addSearchPath(it, includeLoader); - - options.preprocessorOptions.includeFinder = includeFinder.get(); - options.preprocessorOptions.depfile = depfileConfig.enabled; - options.preprocessorOptions.depfilePath = depfileConfig.path; - - return hlslcompiler->compileToSPIRV((const char*)shader->getContent()->getPointer(), options); - } - - - std::tuple, hlsl::ShaderStage> open_shader_file(std::string filepath) { - - m_assetMgr = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = localInputCWD; - auto assetBundle = m_assetMgr->getAsset(filepath, lp); - const auto assets = assetBundle.getContents(); - const auto* metadata = assetBundle.getMetadata(); - if (assets.empty()) { - m_logger->log("Could not load shader %s", ILogger::ELL_ERROR, filepath); - return {nullptr, hlsl::ShaderStage::ESS_UNKNOWN}; - } - assert(assets.size() == 1); - - // could happen when the file is missing an extension and we can't deduce its a shader - if (assetBundle.getAssetType() == IAsset::ET_BUFFER) - { - auto buf = IAsset::castDown(assets[0]); - std::string source; source.resize(buf->getSize()+1); - memcpy(source.data(),buf->getPointer(),buf->getSize()); - return { core::make_smart_refctd_ptr(source.data(), IShader::E_CONTENT_TYPE::ECT_HLSL, std::move(filepath)), hlsl::ShaderStage::ESS_UNKNOWN}; - } - else if (assetBundle.getAssetType() == IAsset::ET_SHADER) - { - const auto hlslMetadata = static_cast(metadata); - return { smart_refctd_ptr_static_cast(assets[0]), hlslMetadata->shaderStages->front()}; - } - else - { - m_logger->log("file '%s' is an asset that is neither a buffer or a shader.", ILogger::ELL_ERROR, filepath); - } - - return {nullptr, hlsl::ShaderStage::ESS_UNKNOWN}; - } - - - bool no_nbl_builtins{ false }; - smart_refctd_ptr m_system; - smart_refctd_ptr m_logger; - std::vector m_arguments, m_include_search_paths; - core::smart_refctd_ptr m_assetMgr; - - + struct DepfileConfig + { + bool enabled = false; + std::string path; + }; + + struct RunResult + { + bool ok = false; + std::string text; + smart_refctd_ptr compiled; + std::string_view view; + }; + + static std::vector expandJoinedArgs(const std::vector& args) + { + std::vector out; + out.reserve(args.size()); + + auto split = [&](const std::string& a, const char* p) + { + const size_t n = std::strlen(p); + if (a.rfind(p, 0) == 0 && a.size() > n) + { + out.emplace_back(p); + out.emplace_back(a.substr(n)); + return true; + } + return false; + }; + + for (const auto& a : args) + { + if (split(a, "-MF")) continue; + if (split(a, "-Fo")) continue; + if (split(a, "-Fc")) continue; + out.push_back(a); + } + + return out; + } + + static void dumpBuildInfo(const argparse::ArgumentParser& program) + { + json j; + auto& modules = j["modules"]; + + auto serialize = [&](const gtml::GitInfo& info, std::string_view target) + { + auto& s = modules[target.data()]; + s["isPopulated"] = info.isPopulated; + s["hasUncommittedChanges"] = info.hasUncommittedChanges.has_value() ? json(info.hasUncommittedChanges.value()) : json("UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"); + s["commitAuthorName"] = info.commitAuthorName; + s["commitAuthorEmail"] = info.commitAuthorEmail; + s["commitHash"] = info.commitHash; + s["commitShortHash"] = info.commitShortHash; + s["commitDate"] = info.commitDate; + s["commitSubject"] = info.commitSubject; + s["commitBody"] = info.commitBody; + s["describe"] = info.describe; + s["branchName"] = info.branchName; + s["latestTag"] = info.latestTag; + s["latestTagName"] = info.latestTagName; + }; + + serialize(gtml::nabla_git_info, "nabla"); + serialize(gtml::dxc_git_info, "dxc"); + + const auto pretty = j.dump(4); + std::cout << pretty << std::endl; + + std::filesystem::path oPath = "build-info.json"; + if (program.is_used("--file")) + { + const auto filePath = program.get("--file"); + if (!filePath.empty()) + oPath = filePath; + } + + std::ofstream outFile(oPath); + if (!outFile.is_open()) + { + std::printf("Failed to open \"%s\" for writing\n", oPath.string().c_str()); + std::exit(-1); + } + + outFile << pretty; + std::printf("Saved \"%s\"\n", oPath.string().c_str()); + } + + RunResult runShaderJob(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier, const DepfileConfig& dep, const bool preprocessOnly) + { + RunResult r; + auto hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + + auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + auto includeLoader = includeFinder->getDefaultFileSystemLoader(); + for (const auto& p : m_include_search_paths) + includeFinder->addSearchPath(p, includeLoader); + + if (preprocessOnly) + { + CHLSLCompiler::SPreprocessorOptions opt = {}; + opt.sourceIdentifier = sourceIdentifier; + opt.logger = m_logger.get(); + opt.includeFinder = includeFinder.get(); + opt.depfile = dep.enabled; + opt.depfilePath = dep.path; + + const char* codePtr = (const char*)shader->getContent()->getPointer(); + std::string_view code(codePtr, std::strlen(codePtr)); + + r.text = hlslcompiler->preprocessShader(std::string(code), shaderStage, opt, nullptr); + r.ok = !r.text.empty(); + r.view = r.text; + return r; + } + + CHLSLCompiler::SOptions opt = {}; + opt.stage = shaderStage; + opt.preprocessorOptions.sourceIdentifier = sourceIdentifier; + opt.preprocessorOptions.logger = m_logger.get(); + opt.preprocessorOptions.includeFinder = includeFinder.get(); + opt.preprocessorOptions.depfile = dep.enabled; + opt.preprocessorOptions.depfilePath = dep.path; + opt.debugInfoFlags = bitflag(IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT); + opt.dxcOptions = std::span(m_arguments); + + r.compiled = hlslcompiler->compileToSPIRV((const char*)shader->getContent()->getPointer(), opt); + r.ok = bool(r.compiled); + if (r.ok) + r.view = { (const char*)r.compiled->getContent()->getPointer(), r.compiled->getContent()->getSize() }; + + return r; + } + + std::tuple, hlsl::ShaderStage> open_shader_file(std::string filepath) + { + m_assetMgr = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = localInputCWD; + + auto bundle = m_assetMgr->getAsset(filepath, lp); + const auto assets = bundle.getContents(); + const auto* metadata = bundle.getMetadata(); + + if (assets.empty()) + { + m_logger->log("Could not load shader %s", ILogger::ELL_ERROR, filepath.c_str()); + return { nullptr, hlsl::ShaderStage::ESS_UNKNOWN }; + } + + if (bundle.getAssetType() == IAsset::ET_BUFFER) + { + auto buf = IAsset::castDown(assets[0]); + std::string source; + source.resize(buf->getSize() + 1); + std::memcpy(source.data(), buf->getPointer(), buf->getSize()); + return { make_smart_refctd_ptr(source.data(), IShader::E_CONTENT_TYPE::ECT_HLSL, std::move(filepath)), hlsl::ShaderStage::ESS_UNKNOWN }; + } + + if (bundle.getAssetType() == IAsset::ET_SHADER) + { + const auto hlslMetadata = static_cast(metadata); + return { smart_refctd_ptr_static_cast(assets[0]), hlslMetadata->shaderStages->front() }; + } + + m_logger->log("file '%s' is an asset that is neither a buffer or a shader.", ILogger::ELL_ERROR, filepath.c_str()); + return { nullptr, hlsl::ShaderStage::ESS_UNKNOWN }; + } + + smart_refctd_ptr m_system; + smart_refctd_ptr m_logger; + std::vector m_arguments, m_include_search_paths; + smart_refctd_ptr m_assetMgr; }; NBL_MAIN_FUNC(ShaderCompiler) From 00a613d1166f919254a03d56fce5dfab2b946f9a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 5 Jan 2026 01:04:38 +0100 Subject: [PATCH 353/472] cmake: show config name in NSC comment --- cmake/common.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index a4ce345096..2de6dc758f 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1514,6 +1514,7 @@ namespace @IMPL_NAMESPACE@ { ) get_filename_component(NBL_NSC_INPUT_NAME "${TARGET_INPUT}" NAME) + get_filename_component(NBL_NSC_CONFIG_NAME "${CONFIG_FILE}" NAME) set(NBL_NSC_BYPRODUCTS "${NBL_NSC_LOG_PATH}") if(NSC_USE_DEPFILE) list(APPEND NBL_NSC_BYPRODUCTS "${DEPFILE_PATH}") @@ -1524,7 +1525,7 @@ namespace @IMPL_NAMESPACE@ { BYPRODUCTS ${NBL_NSC_BYPRODUCTS} COMMAND ${NBL_NSC_COMPILE_COMMAND} DEPENDS ${DEPENDS_ON} - COMMENT "${NBL_NSC_INPUT_NAME}" + COMMENT "${NBL_NSC_CONFIG_NAME} (${NBL_NSC_INPUT_NAME})" VERBATIM COMMAND_EXPAND_LISTS ) From dc13c4544e74d89a2807fa20a954d45e21a7124d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 5 Jan 2026 07:19:12 +0100 Subject: [PATCH 354/472] ECF_SHARE_READ_WRITE for backwards compatibility --- include/nbl/system/IFileBase.h | 3 +-- src/nbl/system/CSystemWin32.cpp | 7 ++----- tools/nsc/main.cpp | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/include/nbl/system/IFileBase.h b/include/nbl/system/IFileBase.h index 58ab34fac5..c9ceb13a04 100644 --- a/include/nbl/system/IFileBase.h +++ b/include/nbl/system/IFileBase.h @@ -26,8 +26,7 @@ class IFileBase : public core::IReferenceCounted ECF_MAPPABLE = 0b0100, //! Implies ECF_MAPPABLE ECF_COHERENT = 0b1100, - ECF_SHARE_READ = 0b10000, - ECF_SHARE_WRITE = 0b100000, + ECF_SHARE_READ_WRITE = 0b100000, ECF_SHARE_DELETE = 0b1000000 }; diff --git a/src/nbl/system/CSystemWin32.cpp b/src/nbl/system/CSystemWin32.cpp index af33e1460d..2798b4fb27 100644 --- a/src/nbl/system/CSystemWin32.cpp +++ b/src/nbl/system/CSystemWin32.cpp @@ -43,11 +43,8 @@ core::smart_refctd_ptr CSystemWin32::CCaller::createFile(const std: { const bool writeAccess = flags.value&IFile::ECF_WRITE; const DWORD fileAccess = ((flags.value&IFile::ECF_READ) ? FILE_GENERIC_READ:0)|(writeAccess ? FILE_GENERIC_WRITE:0); - const bool hasShareFlags = flags.value & (IFile::ECF_SHARE_READ | IFile::ECF_SHARE_WRITE | IFile::ECF_SHARE_DELETE); - DWORD shareMode = hasShareFlags ? 0 : FILE_SHARE_READ; - if (flags.value & IFile::ECF_SHARE_READ) - shareMode |= FILE_SHARE_READ; - if (flags.value & IFile::ECF_SHARE_WRITE) + DWORD shareMode = FILE_SHARE_READ; + if (flags.value & IFile::ECF_SHARE_READ_WRITE) shareMode |= FILE_SHARE_WRITE; if (flags.value & IFile::ECF_SHARE_DELETE) shareMode |= FILE_SHARE_DELETE; diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 12738c038e..5ab01d72e5 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -111,7 +111,7 @@ class ShaderLogger final : public IThreadsafeLogger private: static constexpr auto kDeleteRetries = 3u; static constexpr auto kDeleteDelay = std::chrono::milliseconds(100); - static constexpr auto kLogFlags = bitflag(IFileBase::ECF_WRITE) | IFileBase::ECF_SHARE_READ | IFileBase::ECF_SHARE_WRITE | IFileBase::ECF_SHARE_DELETE; + static constexpr auto kLogFlags = bitflag(IFileBase::ECF_WRITE) | IFileBase::ECF_SHARE_READ_WRITE | IFileBase::ECF_SHARE_DELETE; static inline std::string formatMessageOnly(const std::string_view& fmt, va_list args) { From 9b008be07cbead907ea36ff687c035d50abb7cb8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 5 Jan 2026 08:19:39 +0100 Subject: [PATCH 355/472] post-merge examples_tests submodule update --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 58b42cfc87..4c4e5e803e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 58b42cfc87274db606d593d5baec787b626bb945 +Subproject commit 4c4e5e803e81e043390699f76cc51c6c360908d1 From 232fc7b8e64353b940fef265b702735cd8ec52eb Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 5 Jan 2026 21:50:30 +0100 Subject: [PATCH 356/472] Refactored CArchiveLoaderZIP --- src/nbl/system/CArchiveLoaderZip.cpp | 320 ++++++++++++++++++++++++--- src/nbl/system/CArchiveLoaderZip.h | 12 + 2 files changed, 306 insertions(+), 26 deletions(-) diff --git a/src/nbl/system/CArchiveLoaderZip.cpp b/src/nbl/system/CArchiveLoaderZip.cpp index 9f22e60790..7b949893d6 100644 --- a/src/nbl/system/CArchiveLoaderZip.cpp +++ b/src/nbl/system/CArchiveLoaderZip.cpp @@ -35,7 +35,16 @@ struct SZIPFileCentralDirFileHeader // extra field (variable size) // file comment (variable size) + static constexpr uint32_t ExpectedSignature = 0x02014b50u; + + size_t calcSize() const + { + return sizeof(SZIPFileCentralDirFileHeader) + FilenameLength + ExtraFieldLength + FileCommentLength; + } } PACK_STRUCT; + +static_assert(sizeof(SZIPFileCentralDirFileHeader) == 46); + struct SZIPFileCentralDirEnd { static inline constexpr uint32_t ExpectedSig = 0x06054b50u; @@ -65,9 +74,13 @@ struct SGZIPMemberHeader uint32_t time; uint8_t extraFlags; // slow compress = 2, fast compress = 4 uint8_t operatingSystem; + + static constexpr uint16_t ExpectedSignature = 0x8b1fu; } PACK_STRUCT; #include "nbl/nblunpack.h" +static_assert(sizeof(SGZIPMemberHeader) == 10); + enum E_GZIP_FLAGS { EGZF_TEXT_DAT = 1, @@ -92,7 +105,288 @@ constexpr int16_t ZIP_INFO_IN_DATA_DESCRIPTOR = 0x0008; using namespace nbl; using namespace nbl::system; +core::smart_refctd_ptr CArchiveLoaderZip::createArchiveFromGZIP(core::smart_refctd_ptr&& file, const std::string_view& password) const +{ + std::shared_ptr> items = std::make_shared>(); + core::vector itemsMetadata; + + items->reserve(1u); + itemsMetadata.reserve(1u); + auto addItem = [&items, &itemsMetadata](const std::string& _path, const size_t offset, const SZIPFileHeader& meta) -> void + { + // we need to have a filename or we skip + if (_path.empty()) + return; + + auto& item = items->emplace_back(); + item.pathRelativeToArchive = _path; + item.size = meta.DataDescriptor.UncompressedSize; + item.offset = offset; + item.ID = itemsMetadata.size(); + item.allocatorType = meta.CompressionMethod ? IFileArchive::EAT_VIRTUAL_ALLOC : IFileArchive::EAT_NULL; + itemsMetadata.push_back(meta); + }; + + std::string filename; + size_t gzipFileOffset = 0ull; + auto readStringFromFile = [&file, &gzipFileOffset](auto charCallback) -> bool + { + char c = 0x45; // make sure we start with non-zero char + while (c) + { + IFile::success_t success; + file->read(success, &c, gzipFileOffset, sizeof(c)); + if (!success) + return false; + gzipFileOffset += success.getBytesToProcess(); + charCallback(c); + } + // if string is not null terminated, something went wrong reading the file + return !c; + }; + + SGZIPMemberHeader gzipHeader; + { + IFile::success_t success; + file->read(success, &gzipHeader, gzipFileOffset, sizeof(gzipHeader)); + if (!success) + return nullptr; + gzipFileOffset += sizeof(gzipHeader); + } + + //! The gzip file format seems to think that there can be multiple files in a gzip file + //! TODO: But OLD Irrlicht Impl doesn't honor it!? + if (gzipHeader.sig != SGZIPMemberHeader::ExpectedSignature) + return nullptr; + + // now get the file info + if (gzipHeader.flags & EGZF_EXTRA_FIELDS) + { + // read lenth of extra data + uint16_t dataLen; + IFile::success_t success; + file->read(success, &dataLen, gzipFileOffset, sizeof(dataLen)); + if (!success) + return nullptr; + gzipFileOffset += success.getBytesToProcess(); + // skip the extra data + gzipFileOffset += dataLen; + } + // + if (gzipHeader.flags & EGZF_FILE_NAME) + { + filename.clear(); + if (!readStringFromFile([&](const char c) {filename.push_back(c); })) + return nullptr; + } + // + if (gzipHeader.flags & EGZF_COMMENT) + { + if (!readStringFromFile([](const char c) {})) + return nullptr; + } + // skip crc16 + if (gzipHeader.flags & EGZF_CRC16) + gzipFileOffset += 2; + + + SZIPFileHeader header{}; + header.FilenameLength = filename.length(); + header.CompressionMethod = gzipHeader.compressionMethod; + header.DataDescriptor.CompressedSize = file->getSize() - (gzipFileOffset + sizeof(uint64_t)); + + const size_t itemOffset = gzipFileOffset; + + gzipFileOffset += header.DataDescriptor.CompressedSize; + // read CRC + { + IFile::success_t success; + file->read(success, &header.DataDescriptor.CRC32, gzipFileOffset, sizeof(header.DataDescriptor.CRC32)); + if (!success) + return nullptr; + gzipFileOffset += success.getBytesToProcess(); + } + // read uncompressed size + { + IFile::success_t success; + file->read(success, &header.DataDescriptor.UncompressedSize, gzipFileOffset, sizeof(header.DataDescriptor.UncompressedSize)); + if (!success) + return nullptr; + gzipFileOffset += success.getBytesToProcess(); + } + + // + addItem(filename, itemOffset, header); + + assert(items->size() == itemsMetadata.size()); + if (items->empty()) + return nullptr; + + return core::make_smart_refctd_ptr(std::move(file), core::smart_refctd_ptr(m_logger.get()), items, std::move(itemsMetadata)); +} +core::smart_refctd_ptr CArchiveLoaderZip::createArchiveFromZIP(core::smart_refctd_ptr&& file, const std::string_view& password) const +{ + SZIPFileCentralDirEnd dirEnd; + { + dirEnd.Sig = 0u; + // First place where the end record could be stored + size_t endOfCentralDirectoryOffset = file->getSize() - sizeof(SZIPFileCentralDirEnd) + 1ull; + while (dirEnd.Sig != SZIPFileCentralDirEnd::ExpectedSig) + { + IFile::success_t success; + file->read(success, &dirEnd, --endOfCentralDirectoryOffset, sizeof(dirEnd)); + if (!success) + return nullptr; + } + } + + // multiple disks are not supported + if (dirEnd.NumberDisk != 0) + { + assert(false); + return nullptr; + } + + std::shared_ptr> items = std::make_shared>(); + core::vector itemsMetadata; + + items->reserve(dirEnd.TotalEntries); + itemsMetadata.reserve(dirEnd.TotalEntries); + auto addItem = [&items, &itemsMetadata](const std::string& _path, const size_t offset, const SZIPFileHeader& meta) -> void + { + // we need to have a filename or we skip + if (_path.empty()) + return; + + auto& item = items->emplace_back(); + item.pathRelativeToArchive = _path; + item.size = meta.DataDescriptor.UncompressedSize; + item.offset = offset; + item.ID = itemsMetadata.size(); + item.allocatorType = meta.CompressionMethod ? IFileArchive::EAT_VIRTUAL_ALLOC : IFileArchive::EAT_NULL; + itemsMetadata.push_back(meta); + }; + + size_t centralDirectoryOffset = dirEnd.Offset; + for (int i = 0; i < dirEnd.TotalEntries; ++i) + { + SZIPFileCentralDirFileHeader centralDirectoryHeader; + { + IFile::success_t success; + file->read(success, ¢ralDirectoryHeader, centralDirectoryOffset, sizeof(SZIPFileCentralDirFileHeader)); + if (!success) + return nullptr; + } + centralDirectoryOffset += centralDirectoryHeader.calcSize(); + + if (centralDirectoryHeader.Sig != SZIPFileCentralDirFileHeader::ExpectedSignature) + { + // .zip file is corrupted + assert(false); + return nullptr; + } + + SZIPFileHeader localFileHeader; + { + IFile::success_t success; + file->read(success, &localFileHeader, centralDirectoryHeader.RelativeOffsetOfLocalHeader, sizeof(SZIPFileHeader)); + if (!success) + return nullptr; + } + + std::string filename; + filename.resize(localFileHeader.FilenameLength); + { + IFile::success_t success; + const size_t filenameOffset = centralDirectoryHeader.RelativeOffsetOfLocalHeader + sizeof(SZIPFileHeader); + file->read(success, filename.data(), filenameOffset, localFileHeader.FilenameLength); + // TODO: assertion + if (!success) + return nullptr; + } + + // AES encryption + if ((localFileHeader.GeneralBitFlag & ZIP_FILE_ENCRYPTED) && (localFileHeader.CompressionMethod == 99)) + { + SZipFileExtraHeader extraHeader; + SZipFileAESExtraData data; + + size_t localOffset = centralDirectoryHeader.RelativeOffsetOfLocalHeader + sizeof(SZIPFileHeader) + localFileHeader.FilenameLength; + size_t offset = localOffset + localFileHeader.ExtraFieldLength; + while (true) + { + { + IFile::success_t success; + file->read(success, &extraHeader, localOffset, sizeof(extraHeader)); + if (!success) + break; + localOffset += success.getBytesToProcess(); + if (localOffset > offset) + break; + } + + if (extraHeader.ID != 0x9901u) + continue; + + { + IFile::success_t success; + file->read(success, &data, localOffset, sizeof(data)); + if (!success) + break; + localOffset += success.getBytesToProcess(); + if (localOffset > offset) + break; + } + if (data.Vendor[0] == 'A' && data.Vendor[1] == 'E') + { +#ifdef _NBL_COMPILE_WITH_ZIP_ENCRYPTION_ + // encode values into Sig + // AE-Version | Strength | ActualMode + localFileHeader.Sig = + ((data.Version & 0xff) << 24) | + (data.EncryptionStrength << 16) | + (data.CompressionMode); + break; +#else + filename.clear(); // no support, can't decrypt +#endif + } + } + } + + const size_t fileDataOffset = centralDirectoryHeader.RelativeOffsetOfLocalHeader + localFileHeader.calcSize(); + addItem(filename, fileDataOffset, localFileHeader); + } + + assert(items->size() == itemsMetadata.size()); + if (items->empty()) + return nullptr; + + return core::make_smart_refctd_ptr(std::move(file), core::smart_refctd_ptr(m_logger.get()), items, std::move(itemsMetadata)); +} + +core::smart_refctd_ptr CArchiveLoaderZip::createArchive_impl(core::smart_refctd_ptr&& file, const std::string_view& password) const +{ + if (!file) + return nullptr; + + uint16_t sig; + IFile::success_t success; + file->read(success, &sig, 0, sizeof(sig)); + if (!success) + return nullptr; + const bool isGZIP = sig == SGZIPMemberHeader::ExpectedSignature; + if (isGZIP) + { + return createArchiveFromGZIP(std::move(file), password); + } + else + { + return createArchiveFromZIP(std::move(file), password); + } +} +#if 0 core::smart_refctd_ptr CArchiveLoaderZip::createArchive_impl(core::smart_refctd_ptr&& file, const std::string_view& password) const { if (!file) @@ -339,32 +633,6 @@ core::smart_refctd_ptr CArchiveLoaderZip::createArchive_impl(core: return core::make_smart_refctd_ptr(std::move(file),core::smart_refctd_ptr(m_logger.get()), items, std::move(itemsMetadata)); } - -#if 0 -bool CFileArchiveZip::scanCentralDirectoryHeader(size_t& offset) -{ - std::filesystem::path ZipFileName = ""; - SZIPFileCentralDirFileHeader entry; - { - system::future fut; - m_file->read(fut, &entry, offset, sizeof(SZIPFileCentralDirFileHeader)); - fut.get(); - offset += sizeof(SZIPFileCentralDirFileHeader); - } - - if (entry.Sig != 0x02014b50) - return false; // central dir headers end here. - - const long pos = offset; - offset = entry.RelativeOffsetOfLocalHeader; - scanZipHeader(offset, true); - offset = pos + entry.FilenameLength + entry.ExtraFieldLength + entry.FileCommentLength; - m_fileInfo.back().header.DataDescriptor.CompressedSize = entry.CompressedSize; - m_fileInfo.back().header.DataDescriptor.UncompressedSize = entry.UncompressedSize; - m_fileInfo.back().header.DataDescriptor.CRC32 = entry.CRC32; - m_files.back().size = entry.UncompressedSize; - return true; -} #endif CFileArchive::file_buffer_t CArchiveLoaderZip::CArchive::getFileBuffer(const IFileArchive::SFileList::found_t& item) diff --git a/src/nbl/system/CArchiveLoaderZip.h b/src/nbl/system/CArchiveLoaderZip.h index 0a57b62ec9..1524bbe22d 100644 --- a/src/nbl/system/CArchiveLoaderZip.h +++ b/src/nbl/system/CArchiveLoaderZip.h @@ -31,8 +31,18 @@ class CArchiveLoaderZip final : public IArchiveLoader int16_t ExtraFieldLength; // filename (variable size) // extra field (variable size ) + + static constexpr uint32_t ExpectedSignature = 0x04034b50u; + + size_t calcSize() const + { + return sizeof(SZIPFileHeader) + FilenameLength + ExtraFieldLength; + } } PACK_STRUCT; #include "nbl/nblunpack.h" + + static_assert(sizeof(SZIPFileHeader) == 30); + class CArchive final : public CFileArchive { public: @@ -75,6 +85,8 @@ class CArchiveLoaderZip final : public IArchiveLoader private: core::smart_refctd_ptr createArchive_impl(core::smart_refctd_ptr&& file, const std::string_view& password) const override; + core::smart_refctd_ptr createArchiveFromGZIP(core::smart_refctd_ptr&& file, const std::string_view& password) const; + core::smart_refctd_ptr createArchiveFromZIP(core::smart_refctd_ptr&& file, const std::string_view& password) const; }; } From a8bc4e42c7615f41123e6b7af99bee049586af8c Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 5 Jan 2026 22:36:13 +0100 Subject: [PATCH 357/472] Removed old code --- src/nbl/system/CArchiveLoaderZip.cpp | 248 --------------------------- 1 file changed, 248 deletions(-) diff --git a/src/nbl/system/CArchiveLoaderZip.cpp b/src/nbl/system/CArchiveLoaderZip.cpp index 7b949893d6..e7ac7ceb3e 100644 --- a/src/nbl/system/CArchiveLoaderZip.cpp +++ b/src/nbl/system/CArchiveLoaderZip.cpp @@ -386,254 +386,6 @@ core::smart_refctd_ptr CArchiveLoaderZip::createArchive_impl(core: return createArchiveFromZIP(std::move(file), password); } } -#if 0 -core::smart_refctd_ptr CArchiveLoaderZip::createArchive_impl(core::smart_refctd_ptr&& file, const std::string_view& password) const -{ - if (!file) - return nullptr; - - uint16_t sig; - { - IFile::success_t success; - file->read(success,&sig,0,sizeof(sig)); - if (!success) - return nullptr; - } - - std::shared_ptr> items = std::make_shared>(); - core::vector itemsMetadata; - // load file entries - { - const bool isGZip = sig==0x8b1fu; - - // - auto addItem = [&items,&itemsMetadata](const std::string& _path, const size_t offset, const SZIPFileHeader& meta) -> void - { - // we need to have a filename or we skip - if (_path.empty()) - return; - - auto& item = items->emplace_back(); - item.pathRelativeToArchive = _path; - item.size = meta.DataDescriptor.UncompressedSize; - item.offset = offset; - item.ID = itemsMetadata.size(); - item.allocatorType = meta.CompressionMethod ? IFileArchive::EAT_VIRTUAL_ALLOC:IFileArchive::EAT_NULL; - itemsMetadata.push_back(meta); - }; - - // - size_t offset = 0ull; - auto readStringFromFile = [&file,&offset](auto charCallback) -> bool - { - char c = 0x45; // make sure we start with non-zero char - while (c) - { - IFile::success_t success; - file->read(success,&c,offset,sizeof(c)); - if (!success) - return false; - offset += success.getBytesToProcess(); - charCallback(c); - } - // if string is not null terminated, something went wrong reading the file - return !c; - }; - - // - std::string filename; - filename.reserve(ISystem::MAX_FILENAME_LENGTH); - if (isGZip) - { - SGZIPMemberHeader gzipHeader; - { - IFile::success_t success; - file->read(success,&gzipHeader,0ull,sizeof(gzipHeader)); - if (!success) - return nullptr; - offset += success.getBytesToProcess(); - } - - //! The gzip file format seems to think that there can be multiple files in a gzip file - //! TODO: But OLD Irrlicht Impl doesn't honor it!? - if (gzipHeader.sig!=0x8b1fu) - return nullptr; - - // now get the file info - if (gzipHeader.flags&EGZF_EXTRA_FIELDS) - { - // read lenth of extra data - uint16_t dataLen; - IFile::success_t success; - file->read(success,&dataLen,offset,sizeof(dataLen)); - if (!success) - return nullptr; - offset += success.getBytesToProcess(); - // skip the extra data - offset += dataLen; - } - // - if (gzipHeader.flags&EGZF_FILE_NAME) - { - filename.clear(); - if (!readStringFromFile([&](const char c){filename.push_back(c);})) - return nullptr; - } - // - if (gzipHeader.flags&EGZF_COMMENT) - { - if (!readStringFromFile([](const char c){})) - return nullptr; - } - // skip crc16 - if (gzipHeader.flags&EGZF_CRC16) - offset += 2; - - - SZIPFileHeader header; - memset(&header,0,sizeof(SZIPFileHeader)); - header.FilenameLength = filename.length(); - header.CompressionMethod = gzipHeader.compressionMethod; - header.DataDescriptor.CompressedSize = file->getSize()-(offset+sizeof(uint64_t)); - - const size_t itemOffset = offset; - - offset += header.DataDescriptor.CompressedSize; - // read CRC - { - IFile::success_t success; - file->read(success,&header.DataDescriptor.CRC32,offset,sizeof(header.DataDescriptor.CRC32)); - if (!success) - return nullptr; - offset += success.getBytesToProcess(); - } - // read uncompressed size - { - IFile::success_t success; - file->read(success,&header.DataDescriptor.UncompressedSize,offset,sizeof(header.DataDescriptor.UncompressedSize)); - if (!success) - return nullptr; - offset += success.getBytesToProcess(); - } - - // - addItem(filename,itemOffset,header); - } - else - { - while (true) - { - SZIPFileHeader zipHeader; - { - IFile::success_t success; - file->read(success,&zipHeader,offset,sizeof(zipHeader)); - if (!success) - break; - offset += success.getBytesToProcess(); - } - - if (zipHeader.Sig!=0x04034b50u) - break; - - filename.resize(zipHeader.FilenameLength); - { - IFile::success_t success; - file->read(success,filename.data(),offset,zipHeader.FilenameLength); - if (!success) - break; - offset += success.getBytesToProcess(); - } - - // AES encryption - if ((zipHeader.GeneralBitFlag&ZIP_FILE_ENCRYPTED) && (zipHeader.CompressionMethod==99)) - { - SZipFileExtraHeader extraHeader; - SZipFileAESExtraData data; - - size_t localOffset = offset; - offset += zipHeader.ExtraFieldLength; - while (true) - { - { - IFile::success_t success; - file->read(success,&extraHeader,localOffset,sizeof(extraHeader)); - if (!success) - break; - localOffset += success.getBytesToProcess(); - if (localOffset>offset) - break; - } - - if (extraHeader.ID!=0x9901u) - continue; - - { - IFile::success_t success; - file->read(success,&data,localOffset,sizeof(data)); - if (!success) - break; - localOffset += success.getBytesToProcess(); - if (localOffset>offset) - break; - } - if (data.Vendor[0]=='A' && data.Vendor[1]=='E') - { - #ifdef _NBL_COMPILE_WITH_ZIP_ENCRYPTION_ - // encode values into Sig - // AE-Version | Strength | ActualMode - zipHeader.Sig = - ((data.Version & 0xff) << 24) | - (data.EncryptionStrength << 16) | - (data.CompressionMode); - break; - #else - filename.clear(); // no support, can't decrypt - #endif - } - } - } - else - offset += zipHeader.ExtraFieldLength; - - // if bit 3 was set, use CentralDirectory for setup - if (zipHeader.GeneralBitFlag&ZIP_INFO_IN_DATA_DESCRIPTOR) - { - SZIPFileCentralDirEnd dirEnd; - dirEnd.Sig = 0u; - - // First place where the end record could be stored - offset = file->getSize()-sizeof(SZIPFileCentralDirEnd)+1ull; - while (dirEnd.Sig!=SZIPFileCentralDirEnd::ExpectedSig) - { - IFile::success_t success; - file->read(success,&dirEnd,--offset,sizeof(dirEnd)); - if (!success) - return nullptr; - } - items->reserve(dirEnd.TotalEntries); - itemsMetadata.reserve(dirEnd.TotalEntries); - offset = dirEnd.Offset; - #if 0 - while (scanCentralDirectoryHeader(offset)) {} - #endif - assert(false); // if you ever hit this, msg @devsh - break; - } - - addItem(filename,offset,zipHeader); - // move forward length of data - offset += zipHeader.DataDescriptor.CompressedSize; - } - } - } - - assert(items->size()==itemsMetadata.size()); - if (items->empty()) - return nullptr; - - return core::make_smart_refctd_ptr(std::move(file),core::smart_refctd_ptr(m_logger.get()), items, std::move(itemsMetadata)); -} -#endif CFileArchive::file_buffer_t CArchiveLoaderZip::CArchive::getFileBuffer(const IFileArchive::SFileList::found_t& item) { From 2f33aa03cbcdfaf20df6f26c6b6ffac39fb20dfd Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 6 Jan 2026 16:26:43 +0700 Subject: [PATCH 358/472] some fixes to quaternions --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 91ee4975e3..be3b7b8ede 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -184,21 +184,21 @@ struct quaternion return retval; } - this_t operator*(scalar_type scalar) + this_t operator*(scalar_type scalar) NBL_CONST_MEMBER_FUNC { this_t output; output.data = data * scalar; return output; } - this_t operator*(NBL_CONST_REF_ARG(this_t) other) + this_t operator*(NBL_CONST_REF_ARG(this_t) other) NBL_CONST_MEMBER_FUNC { this_t retval; retval.data = data_type( - data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z, - data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y, - data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x, - data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w + data.w * other.data.x + data.x * other.data.w + data.y * other.data.z - data.z * other.data.y, + data.w * other.data.y - data.x * other.data.z + data.y * other.data.w + data.z * other.data.x, + data.w * other.data.z + data.x * other.data.y - data.y * other.data.x + data.z * other.data.w, + data.w * other.data.w - data.x * other.data.x - data.y * other.data.y - data.z * other.data.z ); return retval; } @@ -270,7 +270,7 @@ struct quaternion mat[0] = mat[0] * scalar_type(2.0); mat[1] = mat[1] * scalar_type(2.0); mat[2] = mat[2] * scalar_type(2.0); - return mat;// hlsl::transpose(mat); // TODO: double check transpose? + return mat; } static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart) @@ -335,7 +335,7 @@ struct static_cast_helper, math::truncated_quaternion > { math::quaternion retval; retval.data.xyz = q.data; - retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(q.data, q.data)); + retval.data.w = hlsl::sqrt(T(1.0) - hlsl::dot(q.data, q.data)); return retval; } }; From be4630bb21d9731a40fac24bcefda40ba8f1efff Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 6 Jan 2026 22:03:07 +0700 Subject: [PATCH 359/472] Fix sqDist calculation --- src/nbl/asset/utils/COBBGenerator.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/nbl/asset/utils/COBBGenerator.cpp b/src/nbl/asset/utils/COBBGenerator.cpp index d869a89cc6..db8d0dc123 100644 --- a/src/nbl/asset/utils/COBBGenerator.cpp +++ b/src/nbl/asset/utils/COBBGenerator.cpp @@ -151,14 +151,18 @@ hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) return result; }; + static auto getSqDist = [](hlsl::float32_t3 a, hlsl::float32_t3 b) -> hlsl::float32_t + { + return hlsl::dot(a - b, a - b); + }; static auto findFurthestPointPair = [](const ExtremalVertices& extremalVertices) -> std::pair { int indexFurthestPair = 0; - auto maxSqDist = hlsl::dot(extremalVertices.maxPtr()[0], extremalVertices.minPtr()[0]); + auto maxSqDist = getSqDist(extremalVertices.maxPtr()[0], extremalVertices.minPtr()[0]); for (int k = 1; k < SAMPLE_DIR_COUNT; k++) { - const auto sqDist = hlsl::dot(extremalVertices.maxPtr()[k], extremalVertices.minPtr()[k]); + const auto sqDist = getSqDist(extremalVertices.maxPtr()[k], extremalVertices.minPtr()[k]); if (sqDist > maxSqDist) { maxSqDist = sqDist; indexFurthestPair = k; } } return { @@ -167,7 +171,7 @@ hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) }; }; - static auto sqDistPointInfiniteEdge = [](const hlsl::float32_t3& q, const hlsl::float32_t3& p0, const hlsl::float32_t3& v) -> hlsl::float32_t + static auto getSqDistPointInfiniteEdge = [](const hlsl::float32_t3& q, const hlsl::float32_t3& p0, const hlsl::float32_t3& v) -> hlsl::float32_t { const auto u0 = q - p0; const auto t = dot(v, u0); @@ -177,11 +181,11 @@ hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) static auto findFurthestPointFromInfiniteEdge = [](const hlsl::float32_t3& p0, const hlsl::float32_t3& e0, const VertexCollection& vertices) { - auto maxSqDist = sqDistPointInfiniteEdge(vertices[0], p0, e0); + auto maxSqDist = getSqDistPointInfiniteEdge(vertices[0], p0, e0); int maxIndex = 0; for (size_t i = 1; i < vertices.size; i++) { - const auto sqDist = sqDistPointInfiniteEdge(vertices[i], p0, e0); + const auto sqDist = getSqDistPointInfiniteEdge(vertices[i], p0, e0); if (sqDist > maxSqDist) { maxSqDist = sqDist; maxIndex = i; @@ -314,7 +318,7 @@ hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) // Degenerate case 1: // If the found furthest points are located very close, return OBB aligned with the initial AABB - if (hlsl::dot(baseTriangleVertices[0], baseTriangleVertices[1]) < eps) + if (getSqDist(baseTriangleVertices[0], baseTriangleVertices[1]) < eps) { return { .vertices = baseTriangleVertices, From 00affbc4073c9418907b0e4bd123b7474c7b2851 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 6 Jan 2026 22:05:40 +0700 Subject: [PATCH 360/472] Delete reimplemented code --- .../utils/CPolygonGeometryManipulator.cpp | 184 ------------------ 1 file changed, 184 deletions(-) diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 74169eb0d8..8941aa68d6 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -584,190 +584,6 @@ core::smart_refctd_ptr IMeshManipulator::calculateSmoothNormals( return outbuffer; } -// Used by createMeshBufferWelded only -static bool cmpVertices(ICPUMeshBuffer* _inbuf, const void* _va, const void* _vb, size_t _vsize, const IMeshManipulator::SErrorMetric* _errMetrics) -{ - auto cmpInteger = [](uint32_t* _a, uint32_t* _b, size_t _n) -> bool { - return !memcmp(_a, _b, _n*4); - }; - - constexpr uint32_t MAX_ATTRIBS = ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; - - const uint8_t* va = reinterpret_cast(_va), *vb = reinterpret_cast(_vb); - for (size_t i = 0u; i < MAX_ATTRIBS; ++i) - { - if (!_inbuf->isAttributeEnabled(i)) - continue; - - const auto atype = _inbuf->getAttribFormat(i); - const auto cpa = getFormatChannelCount(atype); - - if (isIntegerFormat(atype) || isScaledFormat(atype)) - { - uint32_t attr[8]; - ICPUMeshBuffer::getAttribute(attr, va, atype); - ICPUMeshBuffer::getAttribute(attr+4, vb, atype); - if (!cmpInteger(attr, attr+4, cpa)) - return false; - } - else - { - core::vectorSIMDf attr[2]; - ICPUMeshBuffer::getAttribute(attr[0], va, atype); - ICPUMeshBuffer::getAttribute(attr[1], vb, atype); - if (!IMeshManipulator::compareFloatingPointAttribute(attr[0], attr[1], cpa, _errMetrics[i])) - return false; - } - - const uint32_t sz = getTexelOrBlockBytesize(atype); - va += sz; - vb += sz; - } - - return true; -} - -//! Creates a copy of a mesh, which will have identical vertices welded together -core::smart_refctd_ptr IMeshManipulator::createMeshBufferWelded(ICPUMeshBuffer *inbuffer, const SErrorMetric* _errMetrics, const bool& optimIndexType, const bool& makeNewMesh) -{ - if (!inbuffer || !inbuffer->getPipeline()) - return nullptr; - - constexpr uint32_t MAX_ATTRIBS = ICPUMeshBuffer::MAX_VERTEX_ATTRIB_COUNT; - - bool bufferPresent[MAX_ATTRIBS]; - - size_t vertexAttrSize[MAX_ATTRIBS]; - size_t vertexSize = 0; - for (size_t i=0; igetAttribBoundBuffer(i).buffer; - bufferPresent[i] = inbuffer->isAttributeEnabled(i); - if (bufferPresent[i] && buf) - { - const E_FORMAT componentType = inbuffer->getAttribFormat(i); - vertexAttrSize[i] = getTexelOrBlockBytesize(componentType); - vertexSize += vertexAttrSize[i]; - } - } - - auto cmpfunc = [&, inbuffer, vertexSize, _errMetrics](const void* _va, const void* _vb) { - return cmpVertices(inbuffer, _va, _vb, vertexSize, _errMetrics); - }; - - const uint32_t vertexCount = IMeshManipulator::upperBoundVertexID(inbuffer); - const E_INDEX_TYPE oldIndexType = inbuffer->getIndexType(); - - if (!vertexCount) - return nullptr; - - // reset redirect list - uint32_t* redirects = new uint32_t[vertexCount]; - - uint32_t maxRedirect = 0; - - uint8_t* epicData = reinterpret_cast(_NBL_ALIGNED_MALLOC(vertexSize*vertexCount,_NBL_SIMD_ALIGNMENT)); - for (auto i=0u; igetAttribStride(k); - uint8_t* sourcePtr = inbuffer->getAttribPointer(k) + i*stride; - memcpy(currentVertexPtr,sourcePtr,vertexAttrSize[k]); - currentVertexPtr += vertexAttrSize[k]; - } - } - - for (auto i=0u; imaxRedirect) - maxRedirect = redir; - } - _NBL_ALIGNED_FREE(epicData); - - void* oldIndices = inbuffer->getIndices(); - core::smart_refctd_ptr clone; - if (makeNewMesh) - clone = core::smart_refctd_ptr_static_cast(inbuffer->clone(0u)); - else - { - if (!oldIndices) - { - inbuffer->setIndexBufferBinding({ 0u, ICPUBuffer::create({ (maxRedirect >= 0x10000u ? sizeof(uint32_t) : sizeof(uint16_t)) * inbuffer->getIndexCount() }) }); - inbuffer->setIndexType(maxRedirect>=0x10000u ? EIT_32BIT:EIT_16BIT); - } - } - - // TODO: reduce the code duplication via the use of a generic lambda (with a `auto*`) - if (oldIndexType==EIT_16BIT) - { - uint16_t* indicesIn = reinterpret_cast(oldIndices); - if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_32BIT) - { - uint32_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); - for (size_t i=0; igetIndexCount(); i++) - indicesOut[i] = redirects[indicesIn[i]]; - } - else if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_16BIT) - { - uint16_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); - for (size_t i=0; igetIndexCount(); i++) - indicesOut[i] = redirects[indicesIn[i]]; - } - } - else if (oldIndexType==EIT_32BIT) - { - uint32_t* indicesIn = reinterpret_cast(oldIndices); - if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_32BIT) - { - uint32_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); - for (size_t i=0; igetIndexCount(); i++) - indicesOut[i] = redirects[indicesIn[i]]; - } - else if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_16BIT) - { - uint16_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); - for (size_t i=0; igetIndexCount(); i++) - indicesOut[i] = redirects[indicesIn[i]]; - } - } - else if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_32BIT) - { - uint32_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); - for (size_t i=0; igetIndexCount(); i++) - indicesOut[i] = redirects[i]; - } - else if ((makeNewMesh ? clone.get():inbuffer)->getIndexType()==EIT_16BIT) - { - uint16_t* indicesOut = reinterpret_cast((makeNewMesh ? clone.get():inbuffer)->getIndices()); - for (size_t i=0; igetIndexCount(); i++) - indicesOut[i] = redirects[i]; - } - delete [] redirects; - - if (makeNewMesh) - return clone; - else - return core::smart_refctd_ptr(inbuffer); -} - core::smart_refctd_ptr IMeshManipulator::createOptimizedMeshBuffer(const ICPUMeshBuffer* _inbuffer, const SErrorMetric* _errMetric) { if (!_inbuffer) From d79c703bce9ded0271782b9dd339a33682429d3e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 6 Jan 2026 22:06:43 +0700 Subject: [PATCH 361/472] Remove reimplemented code --- .../utils/CPolygonGeometryManipulator.cpp | 56 ------------------- 1 file changed, 56 deletions(-) diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 8941aa68d6..35d64d4d68 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -528,62 +528,6 @@ core::smart_refctd_ptr IMeshManipulator::createMeshBufferUniqueP return clone; } -// -core::smart_refctd_ptr IMeshManipulator::calculateSmoothNormals(ICPUMeshBuffer* inbuffer, bool makeNewMesh, float epsilon, uint32_t normalAttrID, VxCmpFunction vxcmp) -{ - if (inbuffer == nullptr) - { - _NBL_DEBUG_BREAK_IF(true); - return nullptr; - } - - //Mesh has to have unique primitives - if (inbuffer->getIndexType() != E_INDEX_TYPE::EIT_UNKNOWN) - { - _NBL_DEBUG_BREAK_IF(true); - return nullptr; - } - - core::smart_refctd_ptr outbuffer; - if (makeNewMesh) - { - outbuffer = core::move_and_static_cast(inbuffer->clone(0u)); - - const auto normalAttr = inbuffer->getNormalAttributeIx(); - auto normalBinding = inbuffer->getBindingNumForAttribute(normalAttr); - const auto oldPipeline = inbuffer->getPipeline(); - auto vertexParams = oldPipeline->getCachedCreationParams().vertexInput; - bool notUniqueBinding = false; - for (uint16_t attr=0u; attr0 && firstBindingNotUsed(firstBindingNotUsed); - - vertexParams.attributes[normalAttr].binding = normalBinding; - vertexParams.enabledBindingFlags |= 0x1u<getAttribFormat(normalAttr)); - auto normalBuf = ICPUBuffer::create({ normalFormatBytesize*IMeshManipulator::upperBoundVertexID(inbuffer) }); - outbuffer->setVertexBufferBinding({0ull,std::move(normalBuf)},normalBinding); - - auto pipeline = core::move_and_static_cast(oldPipeline->clone(0u)); - vertexParams.bindings[normalBinding].stride = normalFormatBytesize; - vertexParams.attributes[normalAttr].relativeOffset = 0u; - pipeline->getCachedCreationParams().vertexInput = vertexParams; - outbuffer->setPipeline(std::move(pipeline)); - } - else - outbuffer = core::smart_refctd_ptr(inbuffer); - CSmoothNormalGenerator::calculateNormals(outbuffer.get(), epsilon, normalAttrID, vxcmp); - - return outbuffer; -} - core::smart_refctd_ptr IMeshManipulator::createOptimizedMeshBuffer(const ICPUMeshBuffer* _inbuffer, const SErrorMetric* _errMetric) { if (!_inbuffer) From 9f18a1f34606abb4ddab46dd48cfba9d5192e180 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 6 Jan 2026 22:09:06 +0700 Subject: [PATCH 362/472] Fix indentation --- include/nbl/asset/utils/COBBGenerator.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h index 2b0d408342..f36ebbf466 100644 --- a/include/nbl/asset/utils/COBBGenerator.h +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -11,15 +11,17 @@ namespace nbl::asset { - class COBBGenerator - { - public: - using VertexCollection = CPolygonGeometryManipulator::VertexCollection; +class COBBGenerator +{ + public: + + using VertexCollection = CPolygonGeometryManipulator::VertexCollection; + + static hlsl::shapes::OBB<> compute(const VertexCollection& vertices); - static hlsl::shapes::OBB<> compute(const VertexCollection& vertices); +}; - }; } #endif From 27aad5c4d57aa316203120a50874e0f7ea493224 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 7 Jan 2026 00:37:01 +0300 Subject: [PATCH 363/472] Added `SRasterizationParams` to the full screen triangle pipeline creation arguments, also used promote instead of _static_cast(not sure if this is fine), also updated examples submodule --- examples_tests | 2 +- include/nbl/builtin/hlsl/math/functions.hlsl | 2 +- include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples_tests b/examples_tests index 086af9e659..15e4d5d044 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 086af9e6590119bd394f2622db80ab0054445502 +Subproject commit 15e4d5d044d0b682279fcce5486a841e1f3d3541 diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index a1c51d4e51..f7db44b9fb 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -152,7 +152,7 @@ struct conditionalAbsOrMax_helper(x); - const Uint32VectorWithDimensionOfT mask = cond ? _static_cast(numeric_limits::max >> 1) : _static_cast(numeric_limits::max); + const Uint32VectorWithDimensionOfT mask = cond ? promote(numeric_limits::max >> 1) : promote(numeric_limits::max); const Uint32VectorWithDimensionOfT condAbsAsUint = xAsUintVec & mask; T condAbs = bit_cast(condAbsAsUint); diff --git a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h index 1abebf23ea..f537994450 100644 --- a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h +++ b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h @@ -45,6 +45,7 @@ struct ProtoPipeline final video::IGPURenderpass* renderpass, const uint32_t subpassIx=0, asset::SBlendParams blendParams = {}, + asset::SRasterizationParams rasterizationParams = DefaultRasterParams, const hlsl::SurfaceTransform::FLAG_BITS swapchainTransform=hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT ) { @@ -68,7 +69,7 @@ struct ProtoPipeline final params[0].cached = { .vertexInput = {}, // The Full Screen Triangle doesn't use any HW vertex input state .primitiveAssembly = {}, - .rasterization = DefaultRasterParams, + .rasterization = rasterizationParams, .blend = blendParams, .subpassIx = subpassIx }; From a22d46ae9506112f5ba5830cb81a9b7dbe9b3f81 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 7 Jan 2026 12:08:16 +0700 Subject: [PATCH 364/472] implement quaternion slerp (might need optimizing?) --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index be3b7b8ede..b54e1ad619 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -206,10 +206,10 @@ struct quaternion static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle) { // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle) - const data_type adjEnd = ieee754::flipSignIfRHSNegative(end.data, totalPseudoAngle); + const data_type adjEnd = ieee754::flipSignIfRHSNegative(end.data, hlsl::promote(totalPseudoAngle)); this_t retval; - retval.data = hlsl::mix(start.data, adjEnd, fraction); + retval.data = hlsl::mix(start.data, adjEnd, hlsl::promote(fraction)); return retval; } @@ -287,10 +287,33 @@ struct quaternion return precompPart * cosAngle + hlsl::cross(planeNormal, precompPart); } + static this_t slerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type threshold = numeric_limits::epsilon) + { + const scalar_type totalPseudoAngle = hlsl::dot(start.data, end.data); + + // make sure we use the short rotation + const scalar_type cosA = ieee754::flipSignIfRHSNegative(totalPseudoAngle, totalPseudoAngle); + if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation + { + this_t retval; + + const scalar_type A = hlsl::acos(cosA); + const scalar_type sinARcp = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA); + const scalar_type sinAt = hlsl::sin(fraction * A); + const data_type adjEnd = ieee754::flipSignIfRHSNegative(end.data, hlsl::promote(totalPseudoAngle)); + retval.data = (hlsl::sin((scalar_type(1.0) - fraction) * A) * start.data + sinAt * adjEnd) * sinARcp; + + return retval; + } + else + return unnormLerp(start, end, fraction, totalPseudoAngle); + // return hlsl::normalize(unnormLerp(start, end, fraction, totalPseudoAngle)); + } + this_t inverse() NBL_CONST_MEMBER_FUNC { this_t retval; - retval.data.xyz = -retval.data.xyz; + retval.data.xyz = -data.xyz; retval.data.w = data.w; return retval; } From f71cca19d9d18f99cb4265c6d183999cd618c568 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 7 Jan 2026 13:56:40 +0700 Subject: [PATCH 365/472] minor optimization to slerp --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index b54e1ad619..b7f39f19fe 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -296,18 +296,17 @@ struct quaternion if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation { this_t retval; - - const scalar_type A = hlsl::acos(cosA); const scalar_type sinARcp = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA); - const scalar_type sinAt = hlsl::sin(fraction * A); + const scalar_type sinAt = hlsl::sin(fraction * hlsl::acos(cosA)); + const scalar_type sinAt_over_sinA = sinAt*sinARcp; + const scalar_type scale = hlsl::sqrt(scalar_type(1.0)-sinAt*sinAt) - sinAt_over_sinA*cosA; //cosAt-cos(A)sin(tA)/sin(A) = (sin(A)cos(tA)-cos(A)sin(tA))/sin(A) const data_type adjEnd = ieee754::flipSignIfRHSNegative(end.data, hlsl::promote(totalPseudoAngle)); - retval.data = (hlsl::sin((scalar_type(1.0) - fraction) * A) * start.data + sinAt * adjEnd) * sinARcp; + retval.data = scale * start.data + sinAt_over_sinA * adjEnd; return retval; } else return unnormLerp(start, end, fraction, totalPseudoAngle); - // return hlsl::normalize(unnormLerp(start, end, fraction, totalPseudoAngle)); } this_t inverse() NBL_CONST_MEMBER_FUNC From c39c78a8e7b8d9709ddbc9de602d6dff5573d0da Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 7 Jan 2026 15:26:01 +0700 Subject: [PATCH 366/472] fix create from rotation matrix --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index b7f39f19fe..1f720b0247 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -140,42 +140,47 @@ struct quaternion const data_type Qy = data_type(m11, neg_m11, m11, neg_m11); const data_type Qz = data_type(m22, neg_m22, neg_m22, m22); - const data_type tmp = hlsl::promote(1.0) + Qx + Qy + Qz; + // const data_type tmp = hlsl::promote(1.0) + Qx + Qy + Qz; + const data_type tmp = Qx + Qy + Qz; // TODO: speed this up this_t retval; if (tmp.x > scalar_type(0.0)) { - const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.x); + const scalar_type scales = hlsl::sqrt(tmp.x + scalar_type(1.0)); + const scalar_type invscales = scalar_type(0.5) / scales; retval.data.x = (m[2][1] - m[1][2]) * invscales; retval.data.y = (m[0][2] - m[2][0]) * invscales; retval.data.z = (m[1][0] - m[0][1]) * invscales; - retval.data.w = tmp.x * invscales * scalar_type(0.5); + retval.data.w = scales * scalar_type(0.5); } else { if (tmp.y > scalar_type(0.0)) { - const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.y); - retval.data.x = tmp.y * invscales * scalar_type(0.5); + const scalar_type scales = hlsl::sqrt(tmp.y + scalar_type(1.0)); + const scalar_type invscales = scalar_type(0.5) / scales; + retval.data.x = scales * scalar_type(0.5); retval.data.y = (m[0][1] + m[1][0]) * invscales; retval.data.z = (m[2][0] + m[0][2]) * invscales; retval.data.w = (m[2][1] - m[1][2]) * invscales; } else if (tmp.z > scalar_type(0.0)) { - const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.z); + const scalar_type scales = hlsl::sqrt(tmp.z + scalar_type(1.0)); + const scalar_type invscales = scalar_type(0.5) / scales; retval.data.x = (m[0][1] + m[1][0]) * invscales; - retval.data.y = tmp.z * invscales * scalar_type(0.5); - retval.data.z = (m[0][2] - m[2][0]) * invscales; - retval.data.w = (m[1][2] + m[2][1]) * invscales; + retval.data.y = scales * scalar_type(0.5); + retval.data.z = (m[1][2] + m[2][1]) * invscales; + retval.data.w = (m[0][2] - m[2][0]) * invscales; } else { - const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.w); + const scalar_type scales = hlsl::sqrt(tmp.w + scalar_type(1.0)); + const scalar_type invscales = scalar_type(0.5) / scales; retval.data.x = (m[0][2] + m[2][0]) * invscales; retval.data.y = (m[1][2] + m[2][1]) * invscales; - retval.data.z = tmp.w * invscales * scalar_type(0.5); + retval.data.z = scales * scalar_type(0.5); retval.data.w = (m[1][0] - m[0][1]) * invscales; } } From 0b180c88b65d37acf0ccc0817d5d5d97cd6fdf74 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 7 Jan 2026 16:20:40 +0700 Subject: [PATCH 367/472] force constructor type with requires to avoid dxc implicit conversions --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 1f720b0247..4be73482bb 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -51,12 +51,13 @@ struct quaternion // angle: Rotation angle expressed in radians. // axis: Rotation axis, must be normalized. - static this_t create(const vector3_type axis, scalar_type angle) + template && is_same_v) + static this_t create(const U axis, const F angle, const F uniformScale = scalar_type(1.0)) { this_t q; const scalar_type sinTheta = hlsl::sin(angle * 0.5); const scalar_type cosTheta = hlsl::cos(angle * 0.5); - q.data = data_type(axis * sinTheta, cosTheta); + q.data = data_type(axis * sinTheta, cosTheta) * uniformScale; return q; } @@ -301,7 +302,7 @@ struct quaternion if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation { this_t retval; - const scalar_type sinARcp = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA); + const scalar_type sinARcp = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA); const scalar_type sinAt = hlsl::sin(fraction * hlsl::acos(cosA)); const scalar_type sinAt_over_sinA = sinAt*sinARcp; const scalar_type scale = hlsl::sqrt(scalar_type(1.0)-sinAt*sinAt) - sinAt_over_sinA*cosA; //cosAt-cos(A)sin(tA)/sin(A) = (sin(A)cos(tA)-cos(A)sin(tA))/sin(A) From de1b0d1f5aa4cb4e1b3bd73330315f776de1c108 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 7 Jan 2026 17:13:56 +0700 Subject: [PATCH 368/472] fixes to transformVector and other minor fixes --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 4be73482bb..34abb042fb 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -134,9 +134,9 @@ struct quaternion } const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2]; - const scalar_type neg_m00 = bit_cast(bit_cast(m00)^0x80000000u); - const scalar_type neg_m11 = bit_cast(bit_cast(m11)^0x80000000u); - const scalar_type neg_m22 = bit_cast(bit_cast(m22)^0x80000000u); + const scalar_type neg_m00 = -m00; + const scalar_type neg_m11 = -m11; + const scalar_type neg_m22 = -m22; const data_type Qx = data_type(m00, m00, neg_m00, neg_m00); const data_type Qy = data_type(m11, neg_m11, m11, neg_m11); const data_type Qz = data_type(m22, neg_m22, neg_m22, m22); @@ -186,7 +186,7 @@ struct quaternion } } - retval.data = hlsl::normalize(retval.data); + retval.data = hlsl::normalize(retval.data) / hlsl::sqrt(hlsl::dot(m[0], m[0])); // restore uniform scale return retval; } @@ -211,6 +211,8 @@ struct quaternion static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle) { + assert(hlsl::length(start.data) == scalar_type(1.0)); + assert(hlsl::length(end.data) == scalar_type(1.0)); // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle) const data_type adjEnd = ieee754::flipSignIfRHSNegative(end.data, hlsl::promote(totalPseudoAngle)); @@ -241,6 +243,9 @@ struct quaternion static this_t unnormFlerp(const this_t start, const this_t end, const scalar_type fraction) { + assert(hlsl::length(start.data) == scalar_type(1.0)); + assert(hlsl::length(end.data) == scalar_type(1.0)); + const scalar_type pseudoAngle = hlsl::dot(start.data,end.data); const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5); const scalar_type interpolantPrecalcTerm3 = fraction * interpolantPrecalcTerm * (fraction - scalar_type(1.0)); @@ -259,9 +264,10 @@ struct quaternion vector3_type transformVector(const vector3_type v, const bool assumeNoScale=false) NBL_CONST_MEMBER_FUNC { - scalar_type scale = hlsl::mix(hlsl::length(data), scalar_type(1.0), assumeNoScale); - vector3_type direction = data.xyz; - return v * scale + hlsl::cross(direction, v * data.w + hlsl::cross(direction, v)) * scalar_type(2.0); + const scalar_type scaleRcp = scalar_type(1.0) / hlsl::sqrt(hlsl::dot(data, data)); + const vector3_type modV = v * scalar_type(2.0) * scaleRcp; + const vector3_type direction = data.xyz; + return v / scaleRcp + hlsl::cross(direction, modV * data.w + hlsl::cross(direction, modV)); } matrix_type constructMatrix() NBL_CONST_MEMBER_FUNC @@ -336,8 +342,10 @@ struct normalize_helper > { static inline math::truncated_quaternion __call(const math::truncated_quaternion q) { + assert(hlsl::length(q.data) == scalar_type(1.0)); + math::truncated_quaternion retval; - retval.data = hlsl::normalize(q.data); + retval.data = q.data; // should be normalized by definition (dropped component should be 1.0) return retval; } }; From 92f804075932f1db7bd47f207861d4df113a13eb Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 7 Jan 2026 15:10:32 +0100 Subject: [PATCH 369/472] Fixed zip local header data descriptors --- examples_tests | 2 +- src/nbl/system/CArchiveLoaderZip.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/examples_tests b/examples_tests index 4c4e5e803e..194df8aa43 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4c4e5e803e81e043390699f76cc51c6c360908d1 +Subproject commit 194df8aa43f5d753da25fecdff75f72af10d2d96 diff --git a/src/nbl/system/CArchiveLoaderZip.cpp b/src/nbl/system/CArchiveLoaderZip.cpp index e7ac7ceb3e..853287b25a 100644 --- a/src/nbl/system/CArchiveLoaderZip.cpp +++ b/src/nbl/system/CArchiveLoaderZip.cpp @@ -20,9 +20,7 @@ struct SZIPFileCentralDirFileHeader uint16_t CompressionMethod; uint16_t LastModFileTime; uint16_t LastModFileDate; - uint32_t CRC32; - uint32_t CompressedSize; - uint32_t UncompressedSize; + nbl::system::CArchiveLoaderZip::SZIPFileDataDescriptor DataDescriptor; uint16_t FilenameLength; uint16_t ExtraFieldLength; uint16_t FileCommentLength; @@ -354,6 +352,9 @@ core::smart_refctd_ptr CArchiveLoaderZip::createArchiveFromZIP(cor } } + // copying the data descriptor from the central directory header because it is always valid (data descriptor from local file header may be invalid when bit 3 in general purpose bit flag is set) + localFileHeader.DataDescriptor = centralDirectoryHeader.DataDescriptor; + const size_t fileDataOffset = centralDirectoryHeader.RelativeOffsetOfLocalHeader + localFileHeader.calcSize(); addItem(filename, fileDataOffset, localFileHeader); } From 960a9f29a6c2e3e455f707b9581ff4caf66d0b9a Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 7 Jan 2026 16:24:40 +0100 Subject: [PATCH 370/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 194df8aa43..6df2ffb548 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 194df8aa43f5d753da25fecdff75f72af10d2d96 +Subproject commit 6df2ffb548b43a8c69702543cc4a949efe5bc09a From a76f40cdad87abd7b3b05d9f497f7b3e1ff24afe Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 7 Jan 2026 22:32:40 +0700 Subject: [PATCH 371/472] Change calculateOBB interface to use template --- include/nbl/asset/utils/COBBGenerator.h | 501 +++++++++++++++++- .../asset/utils/CPolygonGeometryManipulator.h | 26 +- src/nbl/CMakeLists.txt | 1 - src/nbl/asset/utils/COBBGenerator.cpp | 482 ----------------- .../utils/CPolygonGeometryManipulator.cpp | 4 - 5 files changed, 505 insertions(+), 509 deletions(-) delete mode 100644 src/nbl/asset/utils/COBBGenerator.cpp diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h index f36ebbf466..70180cba77 100644 --- a/include/nbl/asset/utils/COBBGenerator.h +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -6,7 +6,6 @@ #ifndef _NBL_ASSET_C_OBB_GENERATOR_H_INCLUDED_ #define _NBL_ASSET_C_OBB_GENERATOR_H_INCLUDED_ -#include "nbl/asset/utils/CPolygonGeometryManipulator.h" #include "nbl/builtin/hlsl/shapes/obb.hlsl" namespace nbl::asset @@ -14,11 +13,507 @@ namespace nbl::asset class COBBGenerator { + private: + template + struct Extremals + { + std::array values; + + T* minPtr() + { + return values.data(); + } + + const T* minPtr() const + { + return values.data(); + } + + T* maxPtr() + { + return values.data() + CountV; + } + + const T* maxPtr() const + { + return values.data() + CountV; + } + + }; public: - using VertexCollection = CPolygonGeometryManipulator::VertexCollection; + template + requires (std::same_as, hlsl::float32_t3>) + static hlsl::shapes::OBB<> compute(size_t vertexCount, FetchVertexFn&& fetchFn) + { + constexpr size_t SAMPLE_DIR_COUNT = 7; // Number of sample directions + constexpr size_t SAMPLE_COUNT = SAMPLE_DIR_COUNT * 2; + + struct VertexCollection + { + using FetchFn = std::function; + FetchFn fetch; + size_t size; + + static auto fromSpan(std::span vertices) -> VertexCollection + { + return VertexCollection{ + .fetch = [data = vertices.data()](size_t vertexIndex)-> hlsl::float32_t3 + { + return data[vertexIndex]; + }, + .size = vertices.size() + }; + } + + hlsl::float32_t3 operator[](size_t index) const { return fetch(index); } + }; + + VertexCollection vertices = { + .fetch = std::forward(fetchFn), + .size = vertexCount, + }; + + if (vertices.size <= 0) + { + return hlsl::shapes::OBB<>::createAxisAligned({}, {}); + } + + static auto getQualityValue = [](hlsl::float32_t3 len) -> hlsl::float32_t + { + return len.x * len.y + len.x * len.z + len.y * len.z; //half box area + }; + + using ExtremalVertices = Extremals; + using ExtremalProjections = Extremals; + using Axes = std::array; + using Edges = std::array; + + struct ExtremalSamples + { + ExtremalVertices vertices; + ExtremalProjections projections; + }; + + struct LargeBaseTriangle + { + hlsl::float32_t3 normal = {}; + Axes vertices = {}; + Edges edges = {}; + enum Flag + { + NORMAL, + SECOND_POINT_CLOSE, + THIRD_POINT_CLOSE + } flag; + }; + + static auto findExtremals_7FixedDirs = [](const VertexCollection& vertices)-> ExtremalSamples + { + ExtremalSamples result; + hlsl::float32_t proj; + + const auto firstVertex = vertices.fetch(0); + + auto* minProjections = result.projections.minPtr(); + auto* maxProjections = result.projections.maxPtr(); + + auto* minVertices = result.vertices.minPtr(); + auto* maxVertices = result.vertices.maxPtr(); + + // Slab 0: dir {1, 0, 0} + proj = firstVertex.x; + minProjections[0] = minProjections[0] = proj; + minVertices[0] = firstVertex; maxVertices[0] = firstVertex; + // Slab 1: dir {0, 1, 0} + proj = firstVertex.y; + minProjections[1] = maxProjections[1] = proj; + minVertices[1] = firstVertex; maxVertices[1] = firstVertex; + // Slab 2: dir {0, 0, 1} + proj = firstVertex.z; + minProjections[2] = maxProjections[2] = proj; + minVertices[2] = firstVertex; maxVertices[2] = firstVertex; + // Slab 3: dir {1, 1, 1} + proj = firstVertex.x + firstVertex.y + firstVertex.z; + minProjections[3] = maxProjections[3] = proj; + minVertices[3] = firstVertex; maxVertices[3] = firstVertex; + // Slab 4: dir {1, 1, -1} + proj = firstVertex.x + firstVertex.y - firstVertex.z; + minProjections[4] = maxProjections[4] = proj; + minVertices[4] = firstVertex; maxVertices[4] = firstVertex; + // Slab 5: dir {1, -1, 1} + proj = firstVertex.x - firstVertex.y + firstVertex.z; + minProjections[5] = maxProjections[5] = proj; + minVertices[5] = firstVertex; maxVertices[5] = firstVertex; + // Slab 6: dir {1, -1, -1} + proj = firstVertex.x - firstVertex.y - firstVertex.z; + minProjections[6] = maxProjections[6] = proj; + minVertices[6] = firstVertex; maxVertices[6] = firstVertex; + + for (size_t vertex_i = 1; vertex_i < vertices.size; vertex_i++) + { + const auto vertex = vertices.fetch(vertex_i); + // Slab 0: dir {1, 0, 0} + proj = vertices.fetch(vertex_i).x; + if (proj < minProjections[0]) { minProjections[0] = proj; minVertices[0] = vertices.fetch(vertex_i); } + if (proj > maxProjections[0]) { maxProjections[0] = proj; maxVertices[0] = vertices.fetch(vertex_i); } + // Slab 1: dir {0, 1, 0} + proj = vertices.fetch(vertex_i).y; + if (proj < minProjections[1]) { minProjections[1] = proj; minVertices[1] = vertices.fetch(vertex_i); } + if (proj > maxProjections[1]) { maxProjections[1] = proj; maxVertices[1] = vertices.fetch(vertex_i); } + // Slab 2: dir {0, 0, 1} + proj = vertices.fetch(vertex_i).z; + if (proj < minProjections[2]) { minProjections[2] = proj; minVertices[2] = vertices.fetch(vertex_i); } + if (proj > maxProjections[2]) { maxProjections[2] = proj; maxVertices[2] = vertices.fetch(vertex_i); } + // Slab 3: dir {1, 1, 1} + proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; + if (proj < minProjections[3]) { minProjections[3] = proj; minVertices[3] = vertices.fetch(vertex_i); } + if (proj > maxProjections[3]) { maxProjections[3] = proj; maxVertices[3] = vertices.fetch(vertex_i); } + // Slab 4: dir {1, 1, -1} + proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; + if (proj < minProjections[4]) { minProjections[4] = proj; minVertices[4] = vertices.fetch(vertex_i); } + if (proj > maxProjections[4]) { maxProjections[4] = proj; maxVertices[4] = vertices.fetch(vertex_i); } + // Slab 5: dir {1, -1, 1} + proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; + if (proj < minProjections[5]) { minProjections[5] = proj; minVertices[5] = vertices.fetch(vertex_i); } + if (proj > maxProjections[5]) { maxProjections[5] = proj; maxVertices[5] = vertices.fetch(vertex_i); } + // Slab 6: dir {1, -1, -1} + proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; + if (proj < minProjections[6]) { minProjections[6] = proj; minVertices[6] = vertices.fetch(vertex_i); } + if (proj > maxProjections[6]) { maxProjections[6] = proj; maxVertices[6] = vertices.fetch(vertex_i); } + } + + return result; + }; + + static auto getSqDist = [](hlsl::float32_t3 a, hlsl::float32_t3 b) -> hlsl::float32_t + { + return hlsl::dot(a - b, a - b); + }; + + static auto findFurthestPointPair = [](const ExtremalVertices& extremalVertices) -> std::pair + { + int indexFurthestPair = 0; + auto maxSqDist = getSqDist(extremalVertices.maxPtr()[0], extremalVertices.minPtr()[0]); + for (int k = 1; k < SAMPLE_DIR_COUNT; k++) + { + const auto sqDist = getSqDist(extremalVertices.maxPtr()[k], extremalVertices.minPtr()[k]); + if (sqDist > maxSqDist) { maxSqDist = sqDist; indexFurthestPair = k; } + } + return { + extremalVertices.minPtr()[indexFurthestPair], + extremalVertices.maxPtr()[indexFurthestPair] + }; + }; + + static auto getSqDistPointInfiniteEdge = [](const hlsl::float32_t3& q, const hlsl::float32_t3& p0, const hlsl::float32_t3& v) -> hlsl::float32_t + { + const auto u0 = q - p0; + const auto t = dot(v, u0); + const auto sqLen_v = hlsl::dot(v, v); + return hlsl::dot(u0, u0) - (t * t) / sqLen_v; + }; + + static auto findFurthestPointFromInfiniteEdge = [](const hlsl::float32_t3& p0, const hlsl::float32_t3& e0, const VertexCollection& vertices) + { + auto maxSqDist = getSqDistPointInfiniteEdge(vertices[0], p0, e0); + int maxIndex = 0; + for (size_t i = 1; i < vertices.size; i++) + { + const auto sqDist = getSqDistPointInfiniteEdge(vertices[i], p0, e0); + if (sqDist > maxSqDist) + { maxSqDist = sqDist; + maxIndex = i; + } + } + + struct Result + { + hlsl::float32_t3 point; + hlsl::float32_t sqDist; + }; + return Result{ + vertices[maxIndex], + maxSqDist + }; + }; + + static auto findExtremalProjs_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) + { + const auto firstProj = hlsl::dot(vertices[0], normal); + auto tMinProj = firstProj, tMaxProj = firstProj; + + for (int i = 1; i < vertices.size; i++) + { + const auto proj = hlsl::dot(vertices[i], normal); + if (proj < tMinProj) { tMinProj = proj; } + if (proj > tMaxProj) { tMaxProj = proj; } + } + + struct Result + { + hlsl::float32_t minProj; + hlsl::float32_t maxProj; + }; + return Result{ tMinProj, tMaxProj }; + }; + + static auto findExtremalPoints_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) + { + const auto firstProj = dot(vertices[0], normal); + + auto tMinProj = firstProj, tMaxProj = firstProj; + auto tMinVert = vertices[0], tMaxVert = vertices[0]; + + for (int i = 1; i < vertices.size; i++) + { + const auto proj = hlsl::dot(vertices[i], normal); + if (proj < tMinProj) { tMinProj = proj; tMinVert = vertices[i]; } + if (proj > tMaxProj) { tMaxProj = proj; tMaxVert = vertices[i]; } + } + + struct Result + { + hlsl::float32_t minProj; + hlsl::float32_t maxProj; + hlsl::float32_t3 minVert; + hlsl::float32_t3 maxVert; + }; + return Result{ tMinProj, tMaxProj, tMinVert, tMaxVert }; + }; + + static auto findUpperLowerTetraPoints = []( + const hlsl::float32_t3& n, + const VertexCollection& vertices, + const hlsl::float32_t3& p0) + { + const auto eps = 0.000001f; + const auto extremalPoints = findExtremalPoints_OneDir(n, vertices); + const auto triProj = hlsl::dot(p0, n); + + const auto maxVert = extremalPoints.maxProj - eps > triProj ? std::optional(extremalPoints.maxVert) : std::nullopt; + const auto minVert = extremalPoints.minProj + eps < triProj ? std::optional(extremalPoints.minVert) : std::nullopt; + + struct Result + { + std::optional minVert; + std::optional maxVert; + }; + return Result{ + minVert, + maxVert + }; + }; + + static auto findBestObbAxesFromTriangleNormalAndEdgeVectors = []( + const VertexCollection& vertices, + const hlsl::float32_t3 normal, + const std::array& edges, + Axes& bestAxes, + hlsl::float32_t& bestVal) + { + // The operands are assumed to be orthogonal and unit normals + const auto yExtremeProjs = findExtremalProjs_OneDir(normal, vertices); + const auto yLen = yExtremeProjs.maxProj - yExtremeProjs.minProj; + + for (const auto& edge : edges) + { + const auto binormal = hlsl::cross(edge, normal); + + const auto xExtremeProjs = findExtremalProjs_OneDir(edge, vertices); + const auto xLen = xExtremeProjs.maxProj - xExtremeProjs.minProj; + + const auto zExtremeProjs = findExtremalProjs_OneDir(binormal, vertices); + const auto zLen = zExtremeProjs.maxProj - zExtremeProjs.minProj; + + const auto quality = getQualityValue({xLen, yLen, zLen}); + if (quality < bestVal) + { + bestVal = quality; + bestAxes = { + edge, + normal, + binormal + }; + } + } + + }; + + + static auto findBaseTriangle = [](const ExtremalVertices& extremalVertices, const VertexCollection& vertices)-> LargeBaseTriangle + { + constexpr hlsl::float32_t eps = 0.000001f; + + std::array baseTriangleVertices; + Edges edges; + + // Find the furthest point pair among the selected min and max point pairs + std::tie(baseTriangleVertices[0], baseTriangleVertices[1]) = findFurthestPointPair(extremalVertices); + + // Degenerate case 1: + // If the found furthest points are located very close, return OBB aligned with the initial AABB + if (getSqDist(baseTriangleVertices[0], baseTriangleVertices[1]) < eps) + { + return { + .vertices = baseTriangleVertices, + .flag = LargeBaseTriangle::SECOND_POINT_CLOSE + }; + } + + // Compute edge vector of the line segment p0, p1 + edges[0] = hlsl::normalize(baseTriangleVertices[0] - baseTriangleVertices[1]); + + // Find a third point furthest away from line given by p0, e0 to define the large base triangle + const auto furthestPointRes = findFurthestPointFromInfiniteEdge(vertices[0], edges[0], vertices); + + // Degenerate case 2: + // If the third point is located very close to the line, return an OBB aligned with the line + if (furthestPointRes.sqDist < eps) + { + return { + .vertices = baseTriangleVertices, + .edges = edges, + .flag = LargeBaseTriangle::THIRD_POINT_CLOSE + }; + } + + // Compute the two remaining edge vectors and the normal vector of the base triangle + edges[1] = hlsl::normalize(baseTriangleVertices[1] - baseTriangleVertices[2]); + edges[2] = hlsl::normalize(baseTriangleVertices[2] - baseTriangleVertices[0]); + const auto normal = hlsl::normalize(hlsl::cross(edges[1], edges[0])); + + return { + .normal = normal, + .vertices = baseTriangleVertices, + .edges = edges, + .flag = LargeBaseTriangle::NORMAL + }; + }; + + auto findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle = [](const VertexCollection& vertices, + const LargeBaseTriangle& baseTriangle, + Axes& bestAxes, hlsl::float32_t& bestVal) + { + + // Find furthest points above and below the plane of the base triangle for tetra constructions + // For each found valid point, search for the best OBB axes based on the 3 arising triangles + const auto upperLowerTetraVertices = findUpperLowerTetraPoints(baseTriangle.normal, vertices, baseTriangle.vertices[0]); + if (upperLowerTetraVertices.minVert) + { + const auto minVert = *upperLowerTetraVertices.minVert; + const auto f0 = normalize(minVert - baseTriangle.vertices[0]); + const auto f1 = normalize(minVert - baseTriangle.vertices[1]); + const auto f2 = normalize(minVert - baseTriangle.vertices[2]); + const auto n0 = normalize(cross(f1, baseTriangle.edges[0])); + const auto n1 = normalize(cross(f2, baseTriangle.edges[1])); + const auto n2 = normalize(cross(f0, baseTriangle.edges[2])); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); + } + if (upperLowerTetraVertices.maxVert) + { + const auto maxVert = *upperLowerTetraVertices.maxVert; + const auto f0 = normalize(maxVert - baseTriangle.vertices[0]); + const auto f1 = normalize(maxVert - baseTriangle.vertices[1]); + const auto f2 = normalize(maxVert - baseTriangle.vertices[2]); + const auto n0 = normalize(cross(f1, baseTriangle.edges[0])); + const auto n1 = normalize(cross(f2, baseTriangle.edges[1])); + const auto n2 = normalize(cross(f0, baseTriangle.edges[2])); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); + findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); + } + }; + + static auto buildObbFromAxesAndLocalMinMax = []( + const Axes& axes, + const hlsl::float32_t3& localMin, + const hlsl::float32_t3& localMax) -> hlsl::shapes::OBB<3, hlsl::float32_t> + { + const auto localMid = 0.5f * (localMin + localMax); + return { + .mid = axes[0] * localMid.x + axes[1] * localMid.y + axes[2] * localMid.z, + .axes = axes, + .ext = 0.5f * (localMax - localMin) + }; + }; + + static auto computeObb = [](const Axes& axes, const VertexCollection& vertices) + { + const auto extremalX = findExtremalProjs_OneDir(axes[0], vertices); + const auto extremalY = findExtremalProjs_OneDir(axes[1], vertices); + const auto extremalZ = findExtremalProjs_OneDir(axes[2], vertices); + const auto localMin = hlsl::float32_t3{ extremalX.minProj, extremalY.minProj, extremalZ.minProj }; + const auto localMax = hlsl::float32_t3{ extremalX.maxProj, extremalY.maxProj, extremalZ.maxProj }; + return buildObbFromAxesAndLocalMinMax(axes, localMin, localMax); + }; + + static auto computeLineAlignedObb = [](const hlsl::float32_t3& u, const VertexCollection& vertices) + { + // Given u, build any orthonormal base u, v, w + + // Make sure r is not equal to u + auto r = u; + if (fabs(u.x) > fabs(u.y) && fabs(u.x) > fabs(u.z)) { r.x = 0; } + else if (fabs(u.y) > fabs(u.z)) { r.y = 0; } + else { r.z = 0; } + + const auto sqLen = hlsl::dot(r, r); + if (sqLen < FLT_EPSILON) { r.x = r.y = r.z = 1; } + + const auto v = normalize(cross(u, r)); + const auto w = normalize(cross(u, v)); + return computeObb({ u, v, w }, vertices); + }; + + const auto extremals = findExtremals_7FixedDirs(vertices); + + const auto* minProj = extremals.projections.minPtr(); + const auto* maxProj = extremals.projections.maxPtr(); + + // Determine which points to use in the iterations below + const auto selectedVertices = [&] + { + if (vertices.size < SAMPLE_COUNT) { return vertices; } + return VertexCollection::fromSpan(extremals.vertices.values); + }(); + + // Compute size of AABB (max and min projections of vertices are already computed as slabs 0-2) + auto alMid = hlsl::float32_t3((minProj[0] + maxProj[0]) * 0.5f, (minProj[1] + maxProj[1]) * 0.5f, (minProj[2] + maxProj[2]) * 0.5f); + auto alLen = hlsl::float32_t3(maxProj[0] - minProj[0], maxProj[1] - minProj[1], maxProj[2] - minProj[2]); + auto alVal = getQualityValue(alLen); + + + const auto baseTriangle = findBaseTriangle(extremals.vertices, vertices); + + if (baseTriangle.flag == LargeBaseTriangle::SECOND_POINT_CLOSE) + return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); + if (baseTriangle.flag == LargeBaseTriangle::THIRD_POINT_CLOSE) + return computeLineAlignedObb(baseTriangle.edges[0], vertices); + + + Axes bestAxes = { + hlsl::float32_t3{1.f, 0.f, 0.f}, + {0.f, 1.f, 0.f}, + {0.f, 0.f, 1.f}, + }; + auto bestVal = alVal; + // Find best OBB axes based on the base triangle + findBestObbAxesFromTriangleNormalAndEdgeVectors(selectedVertices, baseTriangle.normal, baseTriangle.edges, bestAxes, bestVal); + + // Find improved OBB axes based on constructed di-tetrahedral shape raised from base triangle + findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle(selectedVertices, baseTriangle, bestAxes, bestVal); + + const auto obb = computeObb(bestAxes, vertices); + + // Check if the OBB extent is still smaller than the intial AABB + if (getQualityValue(2.f * obb.ext) < alVal) + return obb; + return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); - static hlsl::shapes::OBB<> compute(const VertexCollection& vertices); + } }; diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 7b953b4fbd..be1167c406 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -10,6 +10,7 @@ #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/asset/utils/CGeometryManipulator.h" #include "nbl/asset/utils/CSmoothNormalGenerator.h" +#include "nbl/asset/utils/COBBGenerator.h" #include "nbl/builtin/hlsl/shapes/obb.hlsl" namespace nbl::asset @@ -232,26 +233,13 @@ class NBL_API2 CPolygonGeometryManipulator EEM_COUNT }; - struct VertexCollection - { - using FetchFn = std::function; - FetchFn fetch; - size_t size; - - static auto fromSpan(std::span vertices) -> VertexCollection - { - return VertexCollection{ - .fetch = [data = vertices.data()](size_t vertexIndex)-> hlsl::float32_t3 - { - return data[vertexIndex]; - }, - .size = vertices.size() - }; - } + template + requires (std::same_as, hlsl::float32_t3>) + static hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(size_t vertexCount, FetchVertexFn&& fetchFn) + { + return COBBGenerator::compute(vertexCount, std::forward(fetchFn)); + } - hlsl::float32_t3 operator[](size_t index) const { return fetch(index); } - }; - static hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(const VertexCollection& vertexCollection); static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); using SSNGVertexData = CSmoothNormalGenerator::VertexData; diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 13e5b44728..76e046848c 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -186,7 +186,6 @@ set(NBL_ASSET_SOURCES # Meshes asset/utils/CForsythVertexCacheOptimizer.cpp asset/utils/CSmoothNormalGenerator.cpp - asset/utils/COBBGenerator.cpp asset/utils/CGeometryCreator.cpp asset/utils/CPolygonGeometryManipulator.cpp asset/utils/COverdrawPolygonGeometryOptimizer.cpp diff --git a/src/nbl/asset/utils/COBBGenerator.cpp b/src/nbl/asset/utils/COBBGenerator.cpp deleted file mode 100644 index db8d0dc123..0000000000 --- a/src/nbl/asset/utils/COBBGenerator.cpp +++ /dev/null @@ -1,482 +0,0 @@ -#include "nbl/asset/utils/COBBGenerator.h" - -namespace nbl::asset -{ - -namespace -{ - -template -struct Extremals -{ - std::array values; - - T* minPtr() - { - return values.data(); - } - - const T* minPtr() const - { - return values.data(); - } - - T* maxPtr() - { - return values.data() + CountV; - } - - const T* maxPtr() const - { - return values.data() + CountV; - } - -}; -} - -hlsl::shapes::OBB<> COBBGenerator::compute(const VertexCollection& vertices) -{ - constexpr size_t SAMPLE_DIR_COUNT = 7; // Number of sample directions - constexpr size_t SAMPLE_COUNT = SAMPLE_DIR_COUNT * 2; - - if (vertices.size <= 0) - { - return hlsl::shapes::OBB<>::createAxisAligned({}, {}); - } - - static auto getQualityValue = [](hlsl::float32_t3 len) -> hlsl::float32_t - { - return len.x * len.y + len.x * len.z + len.y * len.z; //half box area - }; - - using ExtremalVertices = Extremals; - using ExtremalProjections = Extremals; - using Axes = std::array; - using Edges = std::array; - - struct ExtremalSamples - { - ExtremalVertices vertices; - ExtremalProjections projections; - }; - - struct LargeBaseTriangle - { - hlsl::float32_t3 normal = {}; - Axes vertices = {}; - Edges edges = {}; - enum Flag - { - NORMAL, - SECOND_POINT_CLOSE, - THIRD_POINT_CLOSE - } flag; - }; - - static auto findExtremals_7FixedDirs = [](const VertexCollection& vertices)-> ExtremalSamples - { - ExtremalSamples result; - hlsl::float32_t proj; - - const auto firstVertex = vertices.fetch(0); - - auto* minProjections = result.projections.minPtr(); - auto* maxProjections = result.projections.maxPtr(); - - auto* minVertices = result.vertices.minPtr(); - auto* maxVertices = result.vertices.maxPtr(); - - // Slab 0: dir {1, 0, 0} - proj = firstVertex.x; - minProjections[0] = minProjections[0] = proj; - minVertices[0] = firstVertex; maxVertices[0] = firstVertex; - // Slab 1: dir {0, 1, 0} - proj = firstVertex.y; - minProjections[1] = maxProjections[1] = proj; - minVertices[1] = firstVertex; maxVertices[1] = firstVertex; - // Slab 2: dir {0, 0, 1} - proj = firstVertex.z; - minProjections[2] = maxProjections[2] = proj; - minVertices[2] = firstVertex; maxVertices[2] = firstVertex; - // Slab 3: dir {1, 1, 1} - proj = firstVertex.x + firstVertex.y + firstVertex.z; - minProjections[3] = maxProjections[3] = proj; - minVertices[3] = firstVertex; maxVertices[3] = firstVertex; - // Slab 4: dir {1, 1, -1} - proj = firstVertex.x + firstVertex.y - firstVertex.z; - minProjections[4] = maxProjections[4] = proj; - minVertices[4] = firstVertex; maxVertices[4] = firstVertex; - // Slab 5: dir {1, -1, 1} - proj = firstVertex.x - firstVertex.y + firstVertex.z; - minProjections[5] = maxProjections[5] = proj; - minVertices[5] = firstVertex; maxVertices[5] = firstVertex; - // Slab 6: dir {1, -1, -1} - proj = firstVertex.x - firstVertex.y - firstVertex.z; - minProjections[6] = maxProjections[6] = proj; - minVertices[6] = firstVertex; maxVertices[6] = firstVertex; - - for (size_t vertex_i = 1; vertex_i < vertices.size; vertex_i++) - { - const auto vertex = vertices.fetch(vertex_i); - // Slab 0: dir {1, 0, 0} - proj = vertices.fetch(vertex_i).x; - if (proj < minProjections[0]) { minProjections[0] = proj; minVertices[0] = vertices.fetch(vertex_i); } - if (proj > maxProjections[0]) { maxProjections[0] = proj; maxVertices[0] = vertices.fetch(vertex_i); } - // Slab 1: dir {0, 1, 0} - proj = vertices.fetch(vertex_i).y; - if (proj < minProjections[1]) { minProjections[1] = proj; minVertices[1] = vertices.fetch(vertex_i); } - if (proj > maxProjections[1]) { maxProjections[1] = proj; maxVertices[1] = vertices.fetch(vertex_i); } - // Slab 2: dir {0, 0, 1} - proj = vertices.fetch(vertex_i).z; - if (proj < minProjections[2]) { minProjections[2] = proj; minVertices[2] = vertices.fetch(vertex_i); } - if (proj > maxProjections[2]) { maxProjections[2] = proj; maxVertices[2] = vertices.fetch(vertex_i); } - // Slab 3: dir {1, 1, 1} - proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; - if (proj < minProjections[3]) { minProjections[3] = proj; minVertices[3] = vertices.fetch(vertex_i); } - if (proj > maxProjections[3]) { maxProjections[3] = proj; maxVertices[3] = vertices.fetch(vertex_i); } - // Slab 4: dir {1, 1, -1} - proj = vertices.fetch(vertex_i).x + vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; - if (proj < minProjections[4]) { minProjections[4] = proj; minVertices[4] = vertices.fetch(vertex_i); } - if (proj > maxProjections[4]) { maxProjections[4] = proj; maxVertices[4] = vertices.fetch(vertex_i); } - // Slab 5: dir {1, -1, 1} - proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y + vertices.fetch(vertex_i).z; - if (proj < minProjections[5]) { minProjections[5] = proj; minVertices[5] = vertices.fetch(vertex_i); } - if (proj > maxProjections[5]) { maxProjections[5] = proj; maxVertices[5] = vertices.fetch(vertex_i); } - // Slab 6: dir {1, -1, -1} - proj = vertices.fetch(vertex_i).x - vertices.fetch(vertex_i).y - vertices.fetch(vertex_i).z; - if (proj < minProjections[6]) { minProjections[6] = proj; minVertices[6] = vertices.fetch(vertex_i); } - if (proj > maxProjections[6]) { maxProjections[6] = proj; maxVertices[6] = vertices.fetch(vertex_i); } - } - - return result; - }; - - static auto getSqDist = [](hlsl::float32_t3 a, hlsl::float32_t3 b) -> hlsl::float32_t - { - return hlsl::dot(a - b, a - b); - }; - - static auto findFurthestPointPair = [](const ExtremalVertices& extremalVertices) -> std::pair - { - int indexFurthestPair = 0; - auto maxSqDist = getSqDist(extremalVertices.maxPtr()[0], extremalVertices.minPtr()[0]); - for (int k = 1; k < SAMPLE_DIR_COUNT; k++) - { - const auto sqDist = getSqDist(extremalVertices.maxPtr()[k], extremalVertices.minPtr()[k]); - if (sqDist > maxSqDist) { maxSqDist = sqDist; indexFurthestPair = k; } - } - return { - extremalVertices.minPtr()[indexFurthestPair], - extremalVertices.maxPtr()[indexFurthestPair] - }; - }; - - static auto getSqDistPointInfiniteEdge = [](const hlsl::float32_t3& q, const hlsl::float32_t3& p0, const hlsl::float32_t3& v) -> hlsl::float32_t - { - const auto u0 = q - p0; - const auto t = dot(v, u0); - const auto sqLen_v = hlsl::dot(v, v); - return hlsl::dot(u0, u0) - (t * t) / sqLen_v; - }; - - static auto findFurthestPointFromInfiniteEdge = [](const hlsl::float32_t3& p0, const hlsl::float32_t3& e0, const VertexCollection& vertices) - { - auto maxSqDist = getSqDistPointInfiniteEdge(vertices[0], p0, e0); - int maxIndex = 0; - for (size_t i = 1; i < vertices.size; i++) - { - const auto sqDist = getSqDistPointInfiniteEdge(vertices[i], p0, e0); - if (sqDist > maxSqDist) - { maxSqDist = sqDist; - maxIndex = i; - } - } - - struct Result - { - hlsl::float32_t3 point; - hlsl::float32_t sqDist; - }; - return Result{ - vertices[maxIndex], - maxSqDist - }; - }; - - static auto findExtremalProjs_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) - { - const auto firstProj = hlsl::dot(vertices[0], normal); - auto tMinProj = firstProj, tMaxProj = firstProj; - - for (int i = 1; i < vertices.size; i++) - { - const auto proj = hlsl::dot(vertices[i], normal); - if (proj < tMinProj) { tMinProj = proj; } - if (proj > tMaxProj) { tMaxProj = proj; } - } - - struct Result - { - hlsl::float32_t minProj; - hlsl::float32_t maxProj; - }; - return Result{ tMinProj, tMaxProj }; - }; - - static auto findExtremalPoints_OneDir = [](const hlsl::float32_t3& normal, const VertexCollection& vertices) - { - const auto firstProj = dot(vertices[0], normal); - - auto tMinProj = firstProj, tMaxProj = firstProj; - auto tMinVert = vertices[0], tMaxVert = vertices[0]; - - for (int i = 1; i < vertices.size; i++) - { - const auto proj = hlsl::dot(vertices[i], normal); - if (proj < tMinProj) { tMinProj = proj; tMinVert = vertices[i]; } - if (proj > tMaxProj) { tMaxProj = proj; tMaxVert = vertices[i]; } - } - - struct Result - { - hlsl::float32_t minProj; - hlsl::float32_t maxProj; - hlsl::float32_t3 minVert; - hlsl::float32_t3 maxVert; - }; - return Result{ tMinProj, tMaxProj, tMinVert, tMaxVert }; - }; - - static auto findUpperLowerTetraPoints = []( - const hlsl::float32_t3& n, - const VertexCollection& vertices, - const hlsl::float32_t3& p0) - { - const auto eps = 0.000001f; - const auto extremalPoints = findExtremalPoints_OneDir(n, vertices); - const auto triProj = hlsl::dot(p0, n); - - const auto maxVert = extremalPoints.maxProj - eps > triProj ? std::optional(extremalPoints.maxVert) : std::nullopt; - const auto minVert = extremalPoints.minProj + eps < triProj ? std::optional(extremalPoints.minVert) : std::nullopt; - - struct Result - { - std::optional minVert; - std::optional maxVert; - }; - return Result{ - minVert, - maxVert - }; - }; - - static auto findBestObbAxesFromTriangleNormalAndEdgeVectors = []( - const VertexCollection& vertices, - const hlsl::float32_t3 normal, - const std::array& edges, - Axes& bestAxes, - hlsl::float32_t& bestVal) - { - // The operands are assumed to be orthogonal and unit normals - const auto yExtremeProjs = findExtremalProjs_OneDir(normal, vertices); - const auto yLen = yExtremeProjs.maxProj - yExtremeProjs.minProj; - - for (const auto& edge : edges) - { - const auto binormal = hlsl::cross(edge, normal); - - const auto xExtremeProjs = findExtremalProjs_OneDir(edge, vertices); - const auto xLen = xExtremeProjs.maxProj - xExtremeProjs.minProj; - - const auto zExtremeProjs = findExtremalProjs_OneDir(binormal, vertices); - const auto zLen = zExtremeProjs.maxProj - zExtremeProjs.minProj; - - const auto quality = getQualityValue({xLen, yLen, zLen}); - if (quality < bestVal) - { - bestVal = quality; - bestAxes = { - edge, - normal, - binormal - }; - } - } - - }; - - - static auto findBaseTriangle = [](const ExtremalVertices& extremalVertices, const VertexCollection& vertices)-> LargeBaseTriangle - { - constexpr hlsl::float32_t eps = 0.000001f; - - std::array baseTriangleVertices; - Edges edges; - - // Find the furthest point pair among the selected min and max point pairs - std::tie(baseTriangleVertices[0], baseTriangleVertices[1]) = findFurthestPointPair(extremalVertices); - - // Degenerate case 1: - // If the found furthest points are located very close, return OBB aligned with the initial AABB - if (getSqDist(baseTriangleVertices[0], baseTriangleVertices[1]) < eps) - { - return { - .vertices = baseTriangleVertices, - .flag = LargeBaseTriangle::SECOND_POINT_CLOSE - }; - } - - // Compute edge vector of the line segment p0, p1 - edges[0] = hlsl::normalize(baseTriangleVertices[0] - baseTriangleVertices[1]); - - // Find a third point furthest away from line given by p0, e0 to define the large base triangle - const auto furthestPointRes = findFurthestPointFromInfiniteEdge(vertices[0], edges[0], vertices); - - // Degenerate case 2: - // If the third point is located very close to the line, return an OBB aligned with the line - if (furthestPointRes.sqDist < eps) - { - return { - .vertices = baseTriangleVertices, - .edges = edges, - .flag = LargeBaseTriangle::THIRD_POINT_CLOSE - }; - } - - // Compute the two remaining edge vectors and the normal vector of the base triangle - edges[1] = hlsl::normalize(baseTriangleVertices[1] - baseTriangleVertices[2]); - edges[2] = hlsl::normalize(baseTriangleVertices[2] - baseTriangleVertices[0]); - const auto normal = hlsl::normalize(hlsl::cross(edges[1], edges[0])); - - return { - .normal = normal, - .vertices = baseTriangleVertices, - .edges = edges, - .flag = LargeBaseTriangle::NORMAL - }; - }; - - auto findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle = [](const VertexCollection& vertices, - const LargeBaseTriangle& baseTriangle, - Axes& bestAxes, hlsl::float32_t& bestVal) - { - - // Find furthest points above and below the plane of the base triangle for tetra constructions - // For each found valid point, search for the best OBB axes based on the 3 arising triangles - const auto upperLowerTetraVertices = findUpperLowerTetraPoints(baseTriangle.normal, vertices, baseTriangle.vertices[0]); - if (upperLowerTetraVertices.minVert) - { - const auto minVert = *upperLowerTetraVertices.minVert; - const auto f0 = normalize(minVert - baseTriangle.vertices[0]); - const auto f1 = normalize(minVert - baseTriangle.vertices[1]); - const auto f2 = normalize(minVert - baseTriangle.vertices[2]); - const auto n0 = normalize(cross(f1, baseTriangle.edges[0])); - const auto n1 = normalize(cross(f2, baseTriangle.edges[1])); - const auto n2 = normalize(cross(f0, baseTriangle.edges[2])); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); - } - if (upperLowerTetraVertices.maxVert) - { - const auto maxVert = *upperLowerTetraVertices.maxVert; - const auto f0 = normalize(maxVert - baseTriangle.vertices[0]); - const auto f1 = normalize(maxVert - baseTriangle.vertices[1]); - const auto f2 = normalize(maxVert - baseTriangle.vertices[2]); - const auto n0 = normalize(cross(f1, baseTriangle.edges[0])); - const auto n1 = normalize(cross(f2, baseTriangle.edges[1])); - const auto n2 = normalize(cross(f0, baseTriangle.edges[2])); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n0, { baseTriangle.edges[0], f1, f0 }, bestAxes, bestVal); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n1, { baseTriangle.edges[1], f2, f1 }, bestAxes, bestVal); - findBestObbAxesFromTriangleNormalAndEdgeVectors(vertices, n2, { baseTriangle.edges[2], f0, f2 }, bestAxes, bestVal); - } - }; - - static auto buildObbFromAxesAndLocalMinMax = []( - const Axes& axes, - const hlsl::float32_t3& localMin, - const hlsl::float32_t3& localMax) -> hlsl::shapes::OBB<3, hlsl::float32_t> - { - const auto localMid = 0.5f * (localMin + localMax); - return { - .mid = axes[0] * localMid.x + axes[1] * localMid.y + axes[2] * localMid.z, - .axes = axes, - .ext = 0.5f * (localMax - localMin) - }; - }; - - static auto computeObb = [](const Axes& axes, const VertexCollection& vertices) - { - const auto extremalX = findExtremalProjs_OneDir(axes[0], vertices); - const auto extremalY = findExtremalProjs_OneDir(axes[1], vertices); - const auto extremalZ = findExtremalProjs_OneDir(axes[2], vertices); - const auto localMin = hlsl::float32_t3{ extremalX.minProj, extremalY.minProj, extremalZ.minProj }; - const auto localMax = hlsl::float32_t3{ extremalX.maxProj, extremalY.maxProj, extremalZ.maxProj }; - return buildObbFromAxesAndLocalMinMax(axes, localMin, localMax); - }; - - static auto computeLineAlignedObb = [](const hlsl::float32_t3& u, const VertexCollection& vertices) - { - // Given u, build any orthonormal base u, v, w - - // Make sure r is not equal to u - auto r = u; - if (fabs(u.x) > fabs(u.y) && fabs(u.x) > fabs(u.z)) { r.x = 0; } - else if (fabs(u.y) > fabs(u.z)) { r.y = 0; } - else { r.z = 0; } - - const auto sqLen = hlsl::dot(r, r); - if (sqLen < FLT_EPSILON) { r.x = r.y = r.z = 1; } - - const auto v = normalize(cross(u, r)); - const auto w = normalize(cross(u, v)); - return computeObb({ u, v, w }, vertices); - }; - - const auto extremals = findExtremals_7FixedDirs(vertices); - - const auto* minProj = extremals.projections.minPtr(); - const auto* maxProj = extremals.projections.maxPtr(); - - // Determine which points to use in the iterations below - const auto selectedVertices = [&] - { - if (vertices.size < SAMPLE_COUNT) { return vertices; } - return VertexCollection::fromSpan(extremals.vertices.values); - }(); - - // Compute size of AABB (max and min projections of vertices are already computed as slabs 0-2) - auto alMid = hlsl::float32_t3((minProj[0] + maxProj[0]) * 0.5f, (minProj[1] + maxProj[1]) * 0.5f, (minProj[2] + maxProj[2]) * 0.5f); - auto alLen = hlsl::float32_t3(maxProj[0] - minProj[0], maxProj[1] - minProj[1], maxProj[2] - minProj[2]); - auto alVal = getQualityValue(alLen); - - - const auto baseTriangle = findBaseTriangle(extremals.vertices, vertices); - - if (baseTriangle.flag == LargeBaseTriangle::SECOND_POINT_CLOSE) - return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); - if (baseTriangle.flag == LargeBaseTriangle::THIRD_POINT_CLOSE) - return computeLineAlignedObb(baseTriangle.edges[0], vertices); - - - Axes bestAxes = { - hlsl::float32_t3{1.f, 0.f, 0.f}, - {0.f, 1.f, 0.f}, - {0.f, 0.f, 1.f}, - }; - auto bestVal = alVal; - // Find best OBB axes based on the base triangle - findBestObbAxesFromTriangleNormalAndEdgeVectors(selectedVertices, baseTriangle.normal, baseTriangle.edges, bestAxes, bestVal); - - // Find improved OBB axes based on constructed di-tetrahedral shape raised from base triangle - findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle(selectedVertices, baseTriangle, bestAxes, bestVal); - - const auto obb = computeObb(bestAxes, vertices); - - // Check if the OBB extent is still smaller than the intial AABB - if (getQualityValue(2.f * obb.ext) < alVal) - return obb; - return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); -} - -} diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 35d64d4d68..0e3c425e78 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -19,10 +19,6 @@ namespace nbl::asset { -hlsl::shapes::OBB<> CPolygonGeometryManipulator::calculateOBB(const VertexCollection& vertices) -{ - return COBBGenerator::compute(vertices); -} core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo) { From 2adc7f4cb2dfc9dcac1beb6440cf21ca8a04e73e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 7 Jan 2026 23:17:47 +0700 Subject: [PATCH 372/472] Add inline specifier --- include/nbl/asset/utils/CPolygonGeometryManipulator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index be1167c406..71aecc3356 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -235,7 +235,7 @@ class NBL_API2 CPolygonGeometryManipulator template requires (std::same_as, hlsl::float32_t3>) - static hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(size_t vertexCount, FetchVertexFn&& fetchFn) + static inline hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(size_t vertexCount, FetchVertexFn&& fetchFn) { return COBBGenerator::compute(vertexCount, std::forward(fetchFn)); } From d23cde3780077172c656fd7a25f767b8d4cede97 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 8 Jan 2026 01:58:30 +0700 Subject: [PATCH 373/472] Change obb to use matrix as member --- include/nbl/asset/utils/COBBGenerator.h | 22 +++++++----- include/nbl/builtin/hlsl/shapes/obb.hlsl | 43 +++++++++++++++++++----- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 10 +----- 3 files changed, 48 insertions(+), 27 deletions(-) diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h index 70180cba77..573ce30232 100644 --- a/include/nbl/asset/utils/COBBGenerator.h +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -433,20 +433,22 @@ class COBBGenerator const hlsl::float32_t3& localMax) -> hlsl::shapes::OBB<3, hlsl::float32_t> { const auto localMid = 0.5f * (localMin + localMax); - return { - .mid = axes[0] * localMid.x + axes[1] * localMid.y + axes[2] * localMid.z, - .axes = axes, - .ext = 0.5f * (localMax - localMin) - }; + const hlsl::float32_t3 axesArray[3] = {axes[0], axes[1], axes[2]}; + return hlsl::shapes::OBB<3, hlsl::float32_t>::create( + axes[0] * localMid.x + axes[1] * localMid.y + axes[2] * localMid.z, + 0.5f * (localMax - localMin), + axesArray + ); }; - static auto computeObb = [](const Axes& axes, const VertexCollection& vertices) + static auto computeObb = [](const Axes& axes, const VertexCollection& vertices, hlsl::float32_t& quality) { const auto extremalX = findExtremalProjs_OneDir(axes[0], vertices); const auto extremalY = findExtremalProjs_OneDir(axes[1], vertices); const auto extremalZ = findExtremalProjs_OneDir(axes[2], vertices); const auto localMin = hlsl::float32_t3{ extremalX.minProj, extremalY.minProj, extremalZ.minProj }; const auto localMax = hlsl::float32_t3{ extremalX.maxProj, extremalY.maxProj, extremalZ.maxProj }; + quality = getQualityValue(localMax - localMin); return buildObbFromAxesAndLocalMinMax(axes, localMin, localMax); }; @@ -465,7 +467,8 @@ class COBBGenerator const auto v = normalize(cross(u, r)); const auto w = normalize(cross(u, v)); - return computeObb({ u, v, w }, vertices); + hlsl::float32_t quality; + return computeObb({ u, v, w }, vertices, quality); }; const auto extremals = findExtremals_7FixedDirs(vertices); @@ -506,10 +509,11 @@ class COBBGenerator // Find improved OBB axes based on constructed di-tetrahedral shape raised from base triangle findImprovedObbAxesFromUpperAndLowerTetrasOfBaseTriangle(selectedVertices, baseTriangle, bestAxes, bestVal); - const auto obb = computeObb(bestAxes, vertices); + hlsl::float32_t improvedObbQuality; + const auto obb = computeObb(bestAxes, vertices, improvedObbQuality); // Check if the OBB extent is still smaller than the intial AABB - if (getQualityValue(2.f * obb.ext) < alVal) + if (improvedObbQuality < alVal) return obb; return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); diff --git a/include/nbl/builtin/hlsl/shapes/obb.hlsl b/include/nbl/builtin/hlsl/shapes/obb.hlsl index 45873cbc7b..bdddc48ebf 100644 --- a/include/nbl/builtin/hlsl/shapes/obb.hlsl +++ b/include/nbl/builtin/hlsl/shapes/obb.hlsl @@ -17,22 +17,47 @@ struct OBB using scalar_t = Scalar; using point_t = vector; - static OBB createAxisAligned(point_t mid, point_t len) + NBL_CONSTEXPR_STATIC_INLINE OBB create(point_t mid, point_t ext, const point_t axes[D]) { + point_t obbScale = ext * 2.0f; + point_t axesScale[D]; + for (int dim_i = 0; dim_i < D; dim_i++) + { + axesScale[dim_i] = axes[dim_i] * obbScale[dim_i]; + } OBB ret; - ret.mid = mid; - ret.ext = len * 0.5f; - for (auto dim_i = 0; dim_i < D; dim_i++) + for (int row_i = 0; row_i < D; row_i++) { - ret.axes[dim_i] = point_t(0); - ret.axes[dim_i][dim_i] = 1; + for (int col_i = 0; col_i < D; col_i++) + { + ret.transform[row_i][col_i] = axesScale[col_i][row_i]; + } + } + for (int dim_i = 0; dim_i < D; dim_i++) + { + scalar_t sum = 0; + for (int dim_j = 0; dim_j < D; dim_j++) + { + sum += axesScale[dim_j][dim_i]; + } + ret.transform[dim_i][D] = mid[dim_i] - (0.5 * sum); } return ret; + + } + + NBL_CONSTEXPR_STATIC_INLINE OBB createAxisAligned(point_t mid, point_t len) + { + point_t axes[D]; + for (auto dim_i = 0; dim_i < D; dim_i++) + { + axes[dim_i] = point_t(0); + axes[dim_i][dim_i] = 1; + } + return create(mid, len * 0.5f, axes); } - point_t mid; - std::array axes; - point_t ext; + matrix transform; }; } diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index a6a1cb1bce..5e26f0a257 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -369,15 +369,7 @@ bool DrawAABB::renderSingle(const DrawParameters& params, const hlsl::shapes::AA hlsl::float32_t3x4 DrawAABB::getTransformFromOBB(const hlsl::shapes::OBB<3, float>& obb) { - const auto obbScale = obb.ext * 2.0f; - const auto axesScaleX = obb.axes[0] * obbScale.x; - const auto axesScaleY = obb.axes[1] * obbScale.y; - const auto axesScaleZ = obb.axes[2] * obbScale.z; - return float32_t3x4{ - axesScaleX.x, axesScaleY.x, axesScaleZ.x, obb.mid.x - (0.5 * (axesScaleX.x + axesScaleY.x + axesScaleZ.x)), - axesScaleX.y, axesScaleY.y, axesScaleZ.y, obb.mid.y - (0.5 * (axesScaleX.y + axesScaleY.y + axesScaleZ.y)), - axesScaleX.z, axesScaleY.z, axesScaleZ.z, obb.mid.z - (0.5 * (axesScaleX.z + axesScaleY.z + axesScaleZ.z)), - }; + return obb.transform; } } From 02f5bfe689764e79eca9fc892d5516ac60f9d992 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 8 Jan 2026 12:07:35 +0700 Subject: [PATCH 374/472] Remove CDrawAABB::getTransformFromOBB --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 -- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 5 ----- 2 files changed, 7 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 6263378024..126731f425 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -208,8 +208,6 @@ namespace nbl::ext::debug_draw return transform; } - static hlsl::float32_t3x4 getTransformFromOBB(const hlsl::shapes::OBB<3, float>& aabb); - protected: struct ConstructorParams { diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 5e26f0a257..ca82da688a 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -367,9 +367,4 @@ bool DrawAABB::renderSingle(const DrawParameters& params, const hlsl::shapes::AA return true; } -hlsl::float32_t3x4 DrawAABB::getTransformFromOBB(const hlsl::shapes::OBB<3, float>& obb) -{ - return obb.transform; -} - } From 96ef95d82251abfcf85f194afa43b2e46982b87a Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 8 Jan 2026 14:09:22 +0700 Subject: [PATCH 375/472] added matrix runtime traits for checking orthogonality, uniform scale --- .../math/linalg/matrix_runtime_traits.hlsl | 66 +++++++++++++++++++ .../nbl/builtin/hlsl/math/quaternions.hlsl | 24 +------ src/nbl/builtin/CMakeLists.txt | 1 + 3 files changed, 70 insertions(+), 21 deletions(-) create mode 100644 include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl new file mode 100644 index 0000000000..fc19b2cb3e --- /dev/null +++ b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl @@ -0,0 +1,66 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_RUNTIME_TRAITS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_RUNTIME_TRAITS_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/tgmath.hlsl" +#include "nbl/builtin/hlsl/testing/relative_approx_compare.hlsl" +#include "nbl/builtin/hlsl/concepts/matrix.hlsl" +#include "nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace math +{ +namespace linalg +{ + +template && matrix_traits::Square) +struct RuntimeTraits +{ + using matrix_t = T; + using scalar_t = typename matrix_traits::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t N = matrix_traits::RowCount; + + static RuntimeTraits create(const matrix_t m) + { + RuntimeTraits retval; + retval.invertible = !testing::relativeApproxCompare(hlsl::determinant(m), scalar_t(0.0), 1e-5); + { + bool orthogonal = true; + NBL_UNROLL for (uint16_t i = 0; i < N; i++) + orthogonal = testing::relativeApproxCompare(hlsl::dot(m[i], m[(i+1)%N]), scalar_t(0.0), 1e-4) && orthogonal; + retval.orthogonal = orthogonal; + } + { + const matrix_t m_T = hlsl::transpose(m); + scalar_t dots[N]; + NBL_UNROLL for (uint16_t i = 0; i < N; i++) + dots[i] = hlsl::dot(m[i], m[i]); + + bool uniformScale = true; + NBL_UNROLL for (uint16_t i = 0; i < N-1; i++) + uniformScale = testing::relativeApproxCompare(dots[i], dots[i+1], 1e-4) && uniformScale; + + retval.uniformScale = uniformScale; + retval.orthonormal = uniformScale && retval.orthogonal && testing::relativeApproxCompare(dots[0], scalar_t(1.0), 1e-5); + } + return retval; + } + + bool invertible; + bool orthogonal; + bool uniformScale; + bool orthonormal; +}; + +} +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 34abb042fb..59f2eea243 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -6,6 +6,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/tgmath.hlsl" +#include "nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl" namespace nbl { @@ -96,31 +97,12 @@ struct quaternion ); } - static bool __isEqual(const scalar_type a, const scalar_type b) - { - return hlsl::max(a/b, b/a) <= scalar_type(1e-4); - } - static bool __dotIsZero(const vector3_type a, const vector3_type b) - { - const scalar_type ab = hlsl::dot(a, b); - return hlsl::abs(ab) <= scalar_type(1e-4); - } - static this_t create(NBL_CONST_REF_ARG(matrix_type) m, const bool dontAssertValidMatrix=false) { { // only orthogonal and uniform scale mats can be converted - bool valid = __dotIsZero(m[0], m[1]); - valid = __dotIsZero(m[1], m[2]) && valid; - valid = __dotIsZero(m[0], m[2]) && valid; - - const matrix_type m_T = hlsl::transpose(m); - const scalar_type dotCol0 = hlsl::dot(m_T[0],m_T[0]); - const scalar_type dotCol1 = hlsl::dot(m_T[1],m_T[1]); - const scalar_type dotCol2 = hlsl::dot(m_T[2],m_T[2]); - valid = __isEqual(dotCol0, dotCol1) && valid; - valid = __isEqual(dotCol1, dotCol2) && valid; - valid = __isEqual(dotCol0, dotCol2) && valid; + linalg::RuntimeTraits traits = linalg::RuntimeTraits::create(m); + bool valid = traits.orthogonal && traits.uniformScale; if (dontAssertValidMatrix) if (!valid) diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 085ed3c923..038ac2573d 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -223,6 +223,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format.hlsl") #linear algebra LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transform.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/matrix_runtime_traits.hlsl") # TODO: rename `equations` to `polynomials` probably LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") From e775eac513058bff10898952416f42bcb3eb871b Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 8 Jan 2026 14:34:52 +0100 Subject: [PATCH 376/472] Fixed preprocessor bug which caused incorrect handling of the -D option --- src/nbl/asset/utils/CWaveStringResolver.cpp | 9 ++++- tools/nsc/main.cpp | 43 +++++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/src/nbl/asset/utils/CWaveStringResolver.cpp b/src/nbl/asset/utils/CWaveStringResolver.cpp index a2165972e5..8d8b81da79 100644 --- a/src/nbl/asset/utils/CWaveStringResolver.cpp +++ b/src/nbl/asset/utils/CWaveStringResolver.cpp @@ -58,15 +58,20 @@ namespace nbl::wave // now define them as "NBL_GLSL_LIMIT_MAX_IMAGE_DIMENSION_1D=32768" // to match boost wave syntax // https://www.boost.org/doc/libs/1_82_0/libs/wave/doc/class_reference_context.html#:~:text=Maintain%20defined%20macros-,add_macro_definition,-bool%20add_macro_definition + for (const auto& define : preprocessOptions.extraDefines) - context.add_macro_definition(define.identifier.data() + core::string("=") + define.definition.data()); + { + const std::string macroDefinition = define.identifier.data() + core::string("=") + define.definition.data(); + const bool isMacroAdded = context.add_macro_definition(macroDefinition); + assert(isMacroAdded); + } // preprocess core::string resolvedString; try { auto stream = std::stringstream(); - for (auto i= context.begin(); i!= context.end(); i++) + for (auto i = context.begin(); i != context.end(); i++) stream << i->get_value(); resolvedString = stream.str(); } diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 5ab01d72e5..806af7f6e7 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -500,6 +500,49 @@ class ShaderCompiler final : public IApplicationFramework opt.debugInfoFlags = bitflag(IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT); opt.dxcOptions = std::span(m_arguments); + // need this struct becuase fields of IShaderCompiler::SMacroDefinition are string views + struct SMacroDefinitionBuffer + { + std::string identifier; + std::string definition; + }; + + core::vector macroDefinitionBuffers; + core::vector macroDefinitions; + { + for (const auto& argument : m_arguments) + { + if (argument.rfind("-D", 0) != 0) + continue; + + std::string argumentTmp = argument.substr(2); + + std::string identifier; + std::string definition; + + const size_t equalPos = argumentTmp.find('='); + if (equalPos == std::string::npos) + { + identifier = argumentTmp; + definition = ""; + } + else + { + identifier = argumentTmp.substr(0, equalPos); + definition = argumentTmp.substr(equalPos + 1); + } + + macroDefinitionBuffers.emplace_back(identifier, definition); + } + + macroDefinitions.reserve(macroDefinitionBuffers.size()); + + for (const auto& macroDefinitionBuffer : macroDefinitionBuffers) + macroDefinitions.emplace_back(macroDefinitionBuffer.identifier, macroDefinitionBuffer.definition); + } + + opt.preprocessorOptions.extraDefines = macroDefinitions; + r.compiled = hlslcompiler->compileToSPIRV((const char*)shader->getContent()->getPointer(), opt); r.ok = bool(r.compiled); if (r.ok) From c53d237e84951392470e20a96b7cb3ec565ec938 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 8 Jan 2026 16:36:31 +0100 Subject: [PATCH 377/472] Updated examples_tests --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 4c4e5e803e..671d1f16b0 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4c4e5e803e81e043390699f76cc51c6c360908d1 +Subproject commit 671d1f16b0837a70c3016c2472864528f35db0bc From df94ffe689ccdc6f273dddd0f6710e9c310f0733 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 9 Jan 2026 10:25:45 +0700 Subject: [PATCH 378/472] changed unormconstant template params to include float --- .../hlsl/sampling/quantized_sequence.hlsl | 83 +++++++++---------- 1 file changed, 40 insertions(+), 43 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 8929609c34..b6f1e0d2e5 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -22,22 +22,22 @@ struct QuantizedSequence; namespace impl { -template +template struct unorm_constant; template<> -struct unorm_constant<4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; template<> -struct unorm_constant<5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; template<> -struct unorm_constant<8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; template<> -struct unorm_constant<10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; template<> -struct unorm_constant<16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; template<> -struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; template<> -struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; template struct decode_helper; @@ -46,30 +46,28 @@ template struct decode_helper { using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; + using uvec_type = vector; using sequence_type = QuantizedSequence; - using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) seqVal[i] = val.get(i) ^ scrambleKey[i]; - return return_type(seqVal) * bit_cast(UNormConstant); + return return_type(seqVal) * bit_cast >(UNormConstant); } }; template struct decode_helper { using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; + using uvec_type = vector; using sequence_type = QuantizedSequence; using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; - using return_type = vector; + using return_type = vector; NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) @@ -82,25 +80,26 @@ struct decode_helper uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) seqVal[i] = scramble.get(i); - return return_type(seqVal) * bit_cast(UNormConstant); + return return_type(seqVal) * bit_cast >(UNormConstant); } }; + +template +NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4; } template -vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +vector::scalar_type)>, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) { return impl::decode_helper::__call(val, scrambleKey); } -#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 - // all Dim=1 -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) -struct QuantizedSequence +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization) +struct QuantizedSequence) > { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -109,15 +108,15 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) -struct QuantizedSequence::Dimension == 1 && Dim > 1 && Dim < 5) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) +struct QuantizedSequence && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) > { using store_type = T; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; store_type get(const uint16_t idx) { @@ -137,12 +136,12 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) -struct QuantizedSequence::Dimension == Dim && Dim > 1 && Dim < 5) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) +struct QuantizedSequence && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } @@ -151,8 +150,8 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 3) -struct QuantizedSequence::Dimension == 2 && Dim == 3) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 2 && Dim == 3) +struct QuantizedSequence && vector_traits::Dimension == 2 && Dim == 3) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; @@ -160,7 +159,7 @@ struct QuantizedSequence) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -204,8 +203,8 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 4) -struct QuantizedSequence::Dimension == 2 && Dim == 4) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 2 && Dim == 4) +struct QuantizedSequence && vector_traits::Dimension == 2 && Dim == 4) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; @@ -213,7 +212,7 @@ struct QuantizedSequence) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -235,13 +234,13 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 2) -struct QuantizedSequence::Dimension == 4 && Dim == 2) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 4 && Dim == 2) +struct QuantizedSequence && vector_traits::Dimension == 4 && Dim == 2) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; base_type get(const uint16_t idx) { @@ -266,8 +265,8 @@ struct QuantizedSequence returns uint16_t2 - 21 bits per component: 16 in x, 5 in y // uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 3) -struct QuantizedSequence::Dimension == 4 && Dim == 3) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 4 && Dim == 3) +struct QuantizedSequence && vector_traits::Dimension == 4 && Dim == 3) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; @@ -277,7 +276,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; base_type get(const uint16_t idx) { @@ -299,8 +298,6 @@ struct QuantizedSequence Date: Fri, 9 Jan 2026 12:20:35 +0700 Subject: [PATCH 379/472] separate pre and post decode scramble into different structs, no unormconstant in quantized sequence --- .../hlsl/sampling/quantized_sequence.hlsl | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index b6f1e0d2e5..f669a9fd3f 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -39,43 +39,37 @@ struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0 template<> struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; -template -struct decode_helper; - template -struct decode_helper +struct decode_before_scramble_helper { using scalar_type = typename vector_traits::scalar_type; using uvec_type = vector; using sequence_type = QuantizedSequence; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - seqVal[i] = val.get(i) ^ scrambleKey[i]; + seqVal[i] = val.get(i); + seqVal ^= scrambleKey; return return_type(seqVal) * bit_cast >(UNormConstant); } }; template -struct decode_helper +struct decode_after_scramble_helper { using scalar_type = typename vector_traits::scalar_type; using uvec_type = vector; using sequence_type = QuantizedSequence; - using sequence_store_type = typename sequence_type::store_type; - using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, NBL_CONST_REF_ARG(sequence_type) scrambleKey) { sequence_type scramble; - NBL_UNROLL for(uint16_t i = 0; i < D; i++) - scramble.set(i, scrambleKey[i]); - scramble.data ^= val.data; + scramble.data = val.data ^ scrambleKey.data; uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) @@ -88,10 +82,18 @@ template NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4; } -template -vector::scalar_type)>, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +// post-decode scramble +template +vector decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector,D> scrambleKey) +{ + return impl::decode_before_scramble_helper::__call(val, scrambleKey); +} + +// pre-decode scramble +template +vector decode(NBL_CONST_REF_ARG(QuantizedSequence) val, NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) { - return impl::decode_helper::__call(val, scrambleKey); + return impl::decode_after_scramble_helper::__call(val, scrambleKey); } // all Dim=1 @@ -99,7 +101,7 @@ template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization) struct QuantizedSequence) > { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -116,7 +118,6 @@ struct QuantizedSequence::value; store_type get(const uint16_t idx) { @@ -141,7 +142,7 @@ struct QuantizedSequence::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } @@ -159,7 +160,6 @@ struct QuantizedSequence) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -212,7 +212,6 @@ struct QuantizedSequence) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -240,7 +239,7 @@ struct QuantizedSequence::scalar_type; using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; base_type get(const uint16_t idx) { @@ -276,7 +275,6 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; base_type get(const uint16_t idx) { From 8894dd10863ead92a9f54271281a03bf7ef8d49a Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 9 Jan 2026 15:28:12 +0700 Subject: [PATCH 380/472] change quantized sequence to set/get with bitfieldInsert/Extract, also order stored values from LSB to MSB --- .../hlsl/sampling/quantized_sequence.hlsl | 176 +++++++++--------- 1 file changed, 88 insertions(+), 88 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index f669a9fd3f..025e506fb4 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -116,21 +116,17 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; store_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); - return (data >> (BitsPerComponent * idx)) & Mask; + return glsl::bitfieldExtract(data, BitsPerComponent * idx, BitsPerComponent); } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < Dim); - const uint16_t bits = (BitsPerComponent * idx); - data &= ~(Mask << bits); - data |= ((value >> DiscardBits) & Mask) << bits; + glsl::bitfieldInsert(data, value, BitsPerComponent * idx, BitsPerComponent); } store_type data; @@ -150,150 +146,154 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 2 && Dim == 3) -struct QuantizedSequence && vector_traits::Dimension == 2 && Dim == 3) > +// uint32_t2; Dim=3 -- should never use uint16_t2 instead of uint32_t +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 2 && Dim == 3) +struct QuantizedSequence && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 2 && Dim == 3) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 3); - if (idx < 2) + if (idx == 0) // x + return glsl::bitfieldExtract(data[0], 0u, BitsPerComponent); + else if (idx == 1) // y { - return data[idx] & Mask; - } - else - { - const scalar_type zbits = scalar_type(DiscardBits); - const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); - scalar_type z = (data[0] >> BitsPerComponent) & zmask; - z |= ((data[1] >> BitsPerComponent) & zmask) << DiscardBits; - return z; + scalar_type y = glsl::bitfieldExtract(data[0], BitsPerComponent, DiscardBits); + y |= glsl::bitfieldExtract(data[1], 0u, DiscardBits - 1u) << DiscardBits; + return y; } + else // z + return glsl::bitfieldExtract(data[1], DiscardBits - 1u, BitsPerComponent); } void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < 3); - if (idx < 2) + if (idx == 0) // x + glsl::bitfieldInsert(data[0], value, 0u, BitsPerComponent); + else if (idx == 1) // y { - const scalar_type trunc_val = value >> DiscardBits; - data[idx] &= ~Mask; - data[idx] |= trunc_val & Mask; - } - else - { - const scalar_type zbits = scalar_type(DiscardBits); - const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); - const scalar_type trunc_val = value >> DiscardBits; - data[0] &= Mask; - data[1] &= Mask; - data[0] |= (trunc_val & zmask) << BitsPerComponent; - data[1] |= ((trunc_val >> zbits) & zmask) << BitsPerComponent; + glsl::bitfieldInsert(data[0], value, BitsPerComponent, DiscardBits); + glsl::bitfieldInsert(data[1], value >> DiscardBits, 0u, DiscardBits - 1u); } + else // z + glsl::bitfieldInsert(data[1], value, DiscardBits - 1u, BitsPerComponent); } store_type data; }; -// uint16_t2, uint32_t2; Dim=4 -template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 2 && Dim == 4) -struct QuantizedSequence && vector_traits::Dimension == 2 && Dim == 4) > +// uint16_t2; Dim=4 -- should use uint16_t4 instead of uint32_t2 +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 2 && vector_traits::Dimension == 2 && Dim == 4) +struct QuantizedSequence && size_of_v::scalar_type> == 2 && vector_traits::Dimension == 2 && Dim == 4) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 4); - const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); - return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask; + if (idx >= 0 && idx < 2) // x y + { + return glsl::bitfieldExtract(data[0], BitsPerComponent * idx, BitsPerComponent); + } + else // z w + { + return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent); + } } void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < 4); - const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); - const uint16_t odd = idx & uint16_t(1u); - data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); - data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd); + if (idx >= 0 && idx < 2) // x y + { + glsl::bitfieldInsert(data[0], value, BitsPerComponent * idx, BitsPerComponent); + } + else // z w + { + glsl::bitfieldInsert(data[1], value, BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent); + } } store_type data; }; -// uint16_t4, uint32_t4; Dim=2 -template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 4 && Dim == 2) -struct QuantizedSequence && vector_traits::Dimension == 4 && Dim == 2) > -{ - using store_type = T; - using scalar_type = typename vector_traits::scalar_type; - using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; +// no uint16_t4, uint32_t4; Dim=2 - base_type get(const uint16_t idx) - { - assert(idx >= 0 && idx < 2); - base_type a; - a[0] = data[uint16_t(2u) * idx]; - a[1] = data[uint16_t(2u) * idx + 1]; - return a; - } - - void set(const uint16_t idx, const base_type value) - { - assert(idx >= 0 && idx < 2); - base_type a; - data[uint16_t(2u) * idx] = value[0]; - data[uint16_t(2u) * idx + 1] = value[1]; - } - - store_type data; -}; - -// uint16_t4, uint32_t4; Dim=3 -// uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y -// uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y -template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 4 && Dim == 3) -struct QuantizedSequence && vector_traits::Dimension == 4 && Dim == 3) > +// uint32_t4; Dim=3 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y +// use uint32_t2 instead of uint16_t4 +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 4 && Dim == 3) +struct QuantizedSequence && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 4 && Dim == 3) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; using base_type = vector; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; base_type get(const uint16_t idx) { assert(idx >= 0 && idx < 3); - base_type a; - a[0] = data[idx]; - a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask; - return a; + if (idx == 0) // x + { + base_type x; + x[0] = data[0]; + x[1] = glsl::bitfieldExtract(data[1], 0u, 10u); + return x; + } + else if (idx == 1) // y + { + base_type y; + y[0] = glsl::bitfieldExtract(data[1], 10u, 22u); + y[0] |= glsl::bitfieldExtract(data[2], 0u, 10u) << 22u; + y[1] = glsl::bitfieldExtract(data[2], 10u, 10u); + return y; + } + else // z + { + base_type z; + z[0] = glsl::bitfieldInsert(data[2], 20u, 12u); + z[0] |= glsl::bitfieldInsert(data[3], 0u, 20u) << 12u; + z[1] = glsl::bitfieldInsert(data[3], 20u, 10u); + return z; + } } void set(const uint16_t idx, const base_type value) { assert(idx >= 0 && idx < 3); - data[idx] = value[0]; - data[3] &= ~Mask; - data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx); + if (idx == 0) // x + { + data[0] = value[0]; + glsl::bitfieldInsert(data[1], value[1], 0u, 10u); + } + else if (idx == 1) // y + { + glsl::bitfieldInsert(data[1], value[0], 10u, 22u); + glsl::bitfieldInsert(data[2], value[0] >> 22u, 0u, 10u); + glsl::bitfieldInsert(data[2], value[1], 10u, 10u); + } + else // z + { + glsl::bitfieldInsert(data[2], value[0], 20u, 12u); + glsl::bitfieldInsert(data[3], value[0] >> 12u, 0u, 20u); + glsl::bitfieldInsert(data[3], value[1], 20u, 10u); + } } store_type data; + // data[0] = | -- x 32 bits -- | + // data[1] = MSB | -- y 22 bits -- | -- x 10 bits -- | LSB + // data[2] = MSB | -- z 12 bits -- | -- y 20 bits -- | LSB + // data[3] = | -- z 30 bits -- | }; } From 639f464a1788117f8c5bb32646f487380098c836 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 9 Jan 2026 15:42:44 +0700 Subject: [PATCH 381/472] added create functions that should not preserve existing bits --- .../hlsl/sampling/quantized_sequence.hlsl | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 025e506fb4..bd763ad6d7 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -117,6 +117,14 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + static QuantizedSequence create(const vector value) + { + QuantizedSequence seq; + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + seq.set(i, value[i]); + return seq; + } + store_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); @@ -156,6 +164,14 @@ struct QuantizedSequence) - BitsPerComponent; + static QuantizedSequence create(const vector value) + { + QuantizedSequence seq; + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + seq.set(i, value[i]); + return seq; + } + scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 3); @@ -197,6 +213,14 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + static QuantizedSequence create(const vector value) + { + QuantizedSequence seq; + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + seq.set(i, value[i]); + return seq; + } + scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 4); From 60435145a73b124976e63bfaadb67f58a1cfc33c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 9 Jan 2026 18:24:58 +0100 Subject: [PATCH 382/472] fix asan --- 3rdparty/CMakeLists.txt | 6 ++-- 3rdparty/dxc/CMakeLists.txt | 6 +++- CMakeLists.txt | 4 +++ cmake/adjust/flags.cmake | 11 ++++++-- cmake/adjust/template/vendor/impl/Clang.cmake | 6 ++-- .../template/vendor/impl/frontend/MSVC.cmake | 16 +++++++---- cmake/common.cmake | 28 +++++++++++++++++++ src/nbl/builtin/utils.cmake | 3 -- 8 files changed, 61 insertions(+), 19 deletions(-) diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index a6228b01de..705158b15a 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -149,6 +149,8 @@ if(CMAKE_TOOLCHAIN_FILE) list(APPEND NBL_JPEG_CMAKE_OPTIONS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") endif() +nbl_append_sanitize_address_cmake_options(NBL_JPEG_CMAKE_OPTIONS) + # TODO: might find an alternative library which supports add_subdirectory, untill then we need to switch to a workaround just like # we do for DXC due to: https://github.com/libjpeg-turbo/libjpeg-turbo/blob/0b742742c873025e2a127918d4969238ace7ae5b/CMakeLists.txt#L69 execute_process(COMMAND "${CMAKE_COMMAND}" -S "${CMAKE_CURRENT_SOURCE_DIR}/libjpeg-turbo" -B "${CMAKE_CURRENT_BINARY_DIR}/libjpeg-turbo" -G "${CMAKE_GENERATOR}" ${NBL_JPEG_CMAKE_OPTIONS} @@ -492,10 +494,6 @@ endif() foreach(trgt IN LISTS NBL_3RDPARTY_TARGETS) set_property(TARGET ${trgt} PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>$<$:DLL>") - if(MSVC AND NBL_SANITIZE_ADDRESS) - set_property(TARGET ${trgt} PROPERTY COMPILE_OPTIONS /fsanitize=address) - endif() - get_target_property(NBL_TARGET_TYPE ${trgt} TYPE) if(NOT "${NBL_TARGET_TYPE}" STREQUAL "INTERFACE_LIBRARY") # maybe explicit global mapping would be better, to discuss diff --git a/3rdparty/dxc/CMakeLists.txt b/3rdparty/dxc/CMakeLists.txt index d6ea0d0554..506dd9cc60 100644 --- a/3rdparty/dxc/CMakeLists.txt +++ b/3rdparty/dxc/CMakeLists.txt @@ -43,6 +43,8 @@ list(APPEND NBL_DXC_CMAKE_OPTIONS "-DDXC_SPIRV_TOOLS_DIR=${DXC_SPIRV_TOOLS_DIR}" list(APPEND NBL_DXC_CMAKE_OPTIONS "-DDXC_SPIRV_HEADERS_DIR=${DXC_SPIRV_HEADERS_DIR}") list(APPEND NBL_DXC_CMAKE_OPTIONS "-DDXC_ENABLE_ETW=OFF") +nbl_append_sanitize_address_cmake_options(NBL_DXC_CMAKE_OPTIONS) + if(NOT NBL_IS_MULTI_CONFIG) list(APPEND NBL_DXC_CMAKE_OPTIONS "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}") endif() @@ -54,7 +56,9 @@ endif() #else() # NBL_EXT_P_APPEND_COMPILE_OPTIONS(NBL_DXC_CMAKE_OPTIONS Release RelWithDebInfo Debug) #endif() -#list(TRANSFORM NBL_DXC_CMAKE_OPTIONS REPLACE "/fp:fast" "/fp:precise") +if(MSVC AND NBL_SANITIZE_ADDRESS) + list(TRANSFORM NBL_DXC_CMAKE_OPTIONS REPLACE "/fp:fast" "/fp:precise") +endif() if(WIN32) if(NOT DEFINED HLSL_AUTOCRLF) diff --git a/CMakeLists.txt b/CMakeLists.txt index 773c9c3563..9bd05a7f8f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,10 @@ # For conditions of distribution and use, see copyright notice in nabla.h.in or nabla.h cmake_minimum_required(VERSION 3.31..4.2.0) +if(NOT DEFINED CMAKE_POLICY_VERSION_MINIMUM) + set(CMAKE_POLICY_VERSION_MINIMUM 3.31 CACHE STRING "Minimum policy version for third-party projects") +endif() + # TODO: Yas - once we deploy 4.x we will fire `cmake_policy(VERSION [...])` instead of manually picking policies # https://cmake.org/cmake/help/latest/command/cmake_minimum_required.html#policy-version # also we should update deps which throw warnings about < 3.10 compatibility diff --git a/cmake/adjust/flags.cmake b/cmake/adjust/flags.cmake index 1e67914ae0..564f5f7ff7 100644 --- a/cmake/adjust/flags.cmake +++ b/cmake/adjust/flags.cmake @@ -281,6 +281,7 @@ function(nbl_adjust_flags) # global compile options list(APPEND _D_NBL_COMPILE_OPTIONS_ ${NBL_COMPILE_OPTIONS}) + list(APPEND _D_NBL_LINK_OPTIONS_ ${NBL_LINK_OPTIONS}) foreach(CONFIG ${CMAKE_CONFIGURATION_TYPES}) string(TOUPPER "${CONFIG}" CONFIG_U) @@ -316,15 +317,21 @@ function(nbl_adjust_flags) # global compile options list(APPEND _D_NBL_COMPILE_OPTIONS_ ${NBL_COMPILE_OPTIONS}) + list(APPEND _D_NBL_LINK_OPTIONS_ ${NBL_LINK_OPTIONS}) foreach(_NBL_OPTION_IMPL_ ${_NBL_OPTIONS_IMPL_}) string(REPLACE "NBL_MAP_" "" NBL_MAP_CONFIGURATION_FROM "NBL_${_NBL_OPTION_IMPL_}") string(TOUPPER "${NBL_${_NBL_OPTION_IMPL_}}" NBL_MAP_CONFIGURATION_TO) set(NBL_TO_CONFIG_COMPILE_OPTIONS ${NBL_${NBL_MAP_CONFIGURATION_TO}_COMPILE_OPTIONS}) + set(NBL_TO_CONFIG_LINK_OPTIONS ${NBL_${NBL_MAP_CONFIGURATION_TO}_LINK_OPTIONS}) # per configuration compile options with mapping list(APPEND _D_NBL_COMPILE_OPTIONS_ $<$:${NBL_TO_CONFIG_COMPILE_OPTIONS}>) + list(APPEND _D_NBL_LINK_OPTIONS_ $<$:${NBL_TO_CONFIG_LINK_OPTIONS}>) endforeach() - set_directory_properties(PROPERTIES COMPILE_OPTIONS "${_D_NBL_COMPILE_OPTIONS_}") + set_directory_properties(PROPERTIES + COMPILE_OPTIONS "${_D_NBL_COMPILE_OPTIONS_}" + LINK_OPTIONS "${_D_NBL_LINK_OPTIONS_}" + ) endif() -endfunction() \ No newline at end of file +endfunction() diff --git a/cmake/adjust/template/vendor/impl/Clang.cmake b/cmake/adjust/template/vendor/impl/Clang.cmake index 0b00294411..984dc94681 100644 --- a/cmake/adjust/template/vendor/impl/Clang.cmake +++ b/cmake/adjust/template/vendor/impl/Clang.cmake @@ -77,11 +77,11 @@ else() endif() if(NBL_SANITIZE_ADDRESS) - NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} COMPILE_OPTIONS -fsanitize=address) + NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} COMPILE_OPTIONS -fsanitize=address LINK_OPTIONS -fsanitize=address) endif() if(NBL_SANITIZE_THREAD) - NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} COMPILE_OPTIONS -fsanitize=thread) + NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} COMPILE_OPTIONS -fsanitize=thread LINK_OPTIONS -fsanitize=thread) endif() NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} CONFIG DEBUG COMPILE_OPTIONS @@ -106,4 +106,4 @@ else() -mno-incremental-linker-compatible # https://clang.llvm.org/docs/ClangCommandLineReference.html#cmdoption-clang-mincremental-linker-compatible -DNDEBUG ) -endif() \ No newline at end of file +endif() diff --git a/cmake/adjust/template/vendor/impl/frontend/MSVC.cmake b/cmake/adjust/template/vendor/impl/frontend/MSVC.cmake index 5b5e8c8f50..c5fd0c4038 100644 --- a/cmake/adjust/template/vendor/impl/frontend/MSVC.cmake +++ b/cmake/adjust/template/vendor/impl/frontend/MSVC.cmake @@ -23,20 +23,24 @@ if(NBL_SANITIZE_ADDRESS) NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} COMPILE_OPTIONS /fsanitize=address # https://learn.microsoft.com/en-us/cpp/build/reference/fsanitize?view=msvc-170 ) - - NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} CONFIG DEBUG COMPILE_OPTIONS - /RTC1 # https://learn.microsoft.com/en-us/cpp/build/reference/rtc-run-time-error-checks?view=msvc-170 - ) endif() -NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} CONFIG DEBUG COMPILE_OPTIONS +set(_NBL_MSVC_DEBUG_COMPILE_OPTIONS /Ob0 # https://learn.microsoft.com/en-us/cpp/build/reference/ob-inline-function-expansion?view=msvc-170 /Od # https://learn.microsoft.com/en-us/cpp/build/reference/od-disable-debug?view=msvc-170 /Oy- # https://learn.microsoft.com/en-us/cpp/build/reference/oy-frame-pointer-omission?view=msvc-170 +) +if(NOT NBL_SANITIZE_ADDRESS) + list(APPEND _NBL_MSVC_DEBUG_COMPILE_OPTIONS /RTC1) +endif() + +NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} CONFIG DEBUG COMPILE_OPTIONS + ${_NBL_MSVC_DEBUG_COMPILE_OPTIONS} LINK_OPTIONS /INCREMENTAL # https://learn.microsoft.com/en-us/cpp/build/reference/incremental-link-incrementally?view=msvc-170 ) +unset(_NBL_MSVC_DEBUG_COMPILE_OPTIONS) NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} CONFIG RELEASE COMPILE_OPTIONS /O2 # https://learn.microsoft.com/en-us/cpp/build/reference/o1-o2-minimize-size-maximize-speed?view=msvc-170 @@ -63,4 +67,4 @@ NBL_REQUEST_COMPILE_OPTION_SUPPORT(LANG ${LANG} CONFIG RELWITHDEBINFO COMPILE_OP LINK_OPTIONS /INCREMENTAL # https://learn.microsoft.com/en-us/cpp/build/reference/incremental-link-incrementally?view=msvc-170 -) \ No newline at end of file +) diff --git a/cmake/common.cmake b/cmake/common.cmake index 2de6dc758f..19bad18182 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -16,6 +16,34 @@ include_guard(GLOBAL) include(ProcessorCount) +# tmp for external projects, to be removed later when I get rid of them (dxc + jpeg currently) +function(nbl_append_sanitize_address_cmake_options out_list) + if(NOT NBL_SANITIZE_ADDRESS) + return() + endif() + + if(MSVC) + set(_NBL_ASAN_FLAG "/fsanitize=address") + else() + set(_NBL_ASAN_FLAG "-fsanitize=address") + set(_NBL_ASAN_LINK_SUFFIX " ${_NBL_ASAN_FLAG}") + endif() + + list(APPEND ${out_list} + "-DCMAKE_C_FLAGS:STRING=${CMAKE_C_FLAGS} ${_NBL_ASAN_FLAG}" + "-DCMAKE_CXX_FLAGS:STRING=${CMAKE_CXX_FLAGS} ${_NBL_ASAN_FLAG}" + ) + if(DEFINED _NBL_ASAN_LINK_SUFFIX) + list(APPEND ${out_list} + "-DCMAKE_EXE_LINKER_FLAGS:STRING=${CMAKE_EXE_LINKER_FLAGS}${_NBL_ASAN_LINK_SUFFIX}" + "-DCMAKE_SHARED_LINKER_FLAGS:STRING=${CMAKE_SHARED_LINKER_FLAGS}${_NBL_ASAN_LINK_SUFFIX}" + ) + endif() + unset(_NBL_ASAN_FLAG) + unset(_NBL_ASAN_LINK_SUFFIX) + set(${out_list} "${${out_list}}" PARENT_SCOPE) +endfunction() + # Macro creating project for an executable # Project and target get its name from directory when this macro gets executed (truncating number in the beginning of the name and making all lower case) # Created because of common cmake code for examples and tools diff --git a/src/nbl/builtin/utils.cmake b/src/nbl/builtin/utils.cmake index 6465c2ac6d..f8cdafab77 100644 --- a/src/nbl/builtin/utils.cmake +++ b/src/nbl/builtin/utils.cmake @@ -253,7 +253,4 @@ function(ADD_CUSTOM_BUILTIN_RESOURCES _TARGET_NAME_ _BUNDLE_NAME_ _BUNDLE_SEARCH _ADD_PROPERTY_(BUILTIN_RESOURCES_HEADERS NBL_BUILTIN_RESOURCES_HEADERS) _ADD_PROPERTY_(BUILTIN_RESOURCES_INCLUDE_SEARCH_DIRECTORY _OUTPUT_INCLUDE_SEARCH_DIRECTORY_CONFIG) - if(MSVC AND NBL_SANITIZE_ADDRESS) - set_property(TARGET ${_TARGET_NAME_} PROPERTY COMPILE_OPTIONS /fsanitize=address) - endif() endfunction() From 3b82984f407aac6c5267f732c5446c4a2b93c683 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 12 Jan 2026 12:38:50 +0100 Subject: [PATCH 383/472] NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION off when NBL_SANITIZE_ADDRESS on --- src/nbl/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 512633536f..5e59d6fa9c 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -394,6 +394,9 @@ nbl_adjust_flags(TARGET Nabla MAP_RELEASE Release MAP_RELWITHDEBINFO RelWithDebI nbl_adjust_definitions() option(NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION "Enable to optimise CWaveStringResolver.cpp in Debug configuration, uses RWDI compile options for the TU" ON) +if(NBL_SANITIZE_ADDRESS) + set(NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION OFF CACHE BOOL "" FORCE) +endif() if(NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION) set_source_files_properties(asset/utils/CWaveStringResolver.cpp PROPERTIES # just enabling inlining and optimisations will help a lot @@ -868,4 +871,4 @@ source_group(TREE "${NBL_ROOT_PATH}" source_group(TREE "${NBL_ROOT_PATH}" PREFIX "Source Files" FILES ${NABLA_SOURCE_FILES} -) \ No newline at end of file +) From 634260f1cfc5d306dbab171915ace791c44e6708 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 12 Jan 2026 14:14:45 +0100 Subject: [PATCH 384/472] NBL_DEBUG_RTC_ENABLED default OFF and implies NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION OFF --- CMakeLists.txt | 1 + cmake/adjust/template/vendor/impl/frontend/MSVC.cmake | 2 +- src/nbl/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9bd05a7f8f..c21da262c0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,7 @@ option(NBL_STATIC_BUILD "" OFF) # ON for static builds, OFF for shared option(NBL_COMPILER_DYNAMIC_RUNTIME "" ON) option(NBL_SANITIZE_ADDRESS OFF) +option(NBL_DEBUG_RTC_ENABLED "Enable Runtime Checks for Debug builds" OFF) set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT $<$:ProgramDatabase>) # ignored on non xMSVC-ABI targets diff --git a/cmake/adjust/template/vendor/impl/frontend/MSVC.cmake b/cmake/adjust/template/vendor/impl/frontend/MSVC.cmake index c5fd0c4038..f433b553e3 100644 --- a/cmake/adjust/template/vendor/impl/frontend/MSVC.cmake +++ b/cmake/adjust/template/vendor/impl/frontend/MSVC.cmake @@ -30,7 +30,7 @@ set(_NBL_MSVC_DEBUG_COMPILE_OPTIONS /Od # https://learn.microsoft.com/en-us/cpp/build/reference/od-disable-debug?view=msvc-170 /Oy- # https://learn.microsoft.com/en-us/cpp/build/reference/oy-frame-pointer-omission?view=msvc-170 ) -if(NOT NBL_SANITIZE_ADDRESS) +if(NBL_DEBUG_RTC_ENABLED AND NOT NBL_SANITIZE_ADDRESS) list(APPEND _NBL_MSVC_DEBUG_COMPILE_OPTIONS /RTC1) endif() diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 5e59d6fa9c..abadd07912 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -394,7 +394,7 @@ nbl_adjust_flags(TARGET Nabla MAP_RELEASE Release MAP_RELWITHDEBINFO RelWithDebI nbl_adjust_definitions() option(NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION "Enable to optimise CWaveStringResolver.cpp in Debug configuration, uses RWDI compile options for the TU" ON) -if(NBL_SANITIZE_ADDRESS) +if(NBL_SANITIZE_ADDRESS OR NBL_DEBUG_RTC_ENABLED) set(NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION OFF CACHE BOOL "" FORCE) endif() if(NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION) From a0acd3512dbf594ea31077d1f9a4b5786b89e08f Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 13 Jan 2026 14:00:40 +0700 Subject: [PATCH 385/472] matrix runtime traits stores uniform scale squared, changed calculations slightly --- .../math/linalg/matrix_runtime_traits.hlsl | 22 ++++++++++++------- .../nbl/builtin/hlsl/math/quaternions.hlsl | 2 +- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl index fc19b2cb3e..43b05d56ba 100644 --- a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl @@ -33,28 +33,34 @@ struct RuntimeTraits { bool orthogonal = true; NBL_UNROLL for (uint16_t i = 0; i < N; i++) - orthogonal = testing::relativeApproxCompare(hlsl::dot(m[i], m[(i+1)%N]), scalar_t(0.0), 1e-4) && orthogonal; + orthogonal = orthogonal && testing::relativeApproxCompare(hlsl::dot(m[i], m[(i+1)%N]), scalar_t(0.0), 1e-4); retval.orthogonal = orthogonal; } { const matrix_t m_T = hlsl::transpose(m); scalar_t dots[N]; NBL_UNROLL for (uint16_t i = 0; i < N; i++) - dots[i] = hlsl::dot(m[i], m[i]); + dots[i] = hlsl::dot(m[i], m_T[i]); - bool uniformScale = true; - NBL_UNROLL for (uint16_t i = 0; i < N-1; i++) - uniformScale = testing::relativeApproxCompare(dots[i], dots[i+1], 1e-4) && uniformScale; + scalar_t uniformScaleSq = hlsl::dot(m[0], m_T[0]); + NBL_UNROLL for (uint16_t i = 1; i < N; i++) + { + if (!testing::relativeApproxCompare(hlsl::dot(m[i], m_T[i]), uniformScaleSq, 1e-4)) + { + uniformScaleSq = bit_cast(numeric_limits::quiet_NaN); + break; + } + } - retval.uniformScale = uniformScale; - retval.orthonormal = uniformScale && retval.orthogonal && testing::relativeApproxCompare(dots[0], scalar_t(1.0), 1e-5); + retval.uniformScaleSq = uniformScaleSq; + retval.orthonormal = retval.orthogonal && testing::relativeApproxCompare(uniformScaleSq, scalar_t(1.0), 1e-5); } return retval; } bool invertible; bool orthogonal; - bool uniformScale; + scalar_t uniformScaleSq; bool orthonormal; }; diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 59f2eea243..966463b5e4 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -102,7 +102,7 @@ struct quaternion { // only orthogonal and uniform scale mats can be converted linalg::RuntimeTraits traits = linalg::RuntimeTraits::create(m); - bool valid = traits.orthogonal && traits.uniformScale; + bool valid = traits.orthogonal && !hlsl::isnan(traits.uniformScaleSq); if (dontAssertValidMatrix) if (!valid) From d1c4a89881934f6123ddfff225e452acc8c312dc Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 13 Jan 2026 14:51:54 +0700 Subject: [PATCH 386/472] added more static_casts and new partial spec for flipIfRHSNegative --- include/nbl/builtin/hlsl/ieee754.hlsl | 36 ++++++++++++--- .../nbl/builtin/hlsl/math/quaternions.hlsl | 45 ++++++++++++------- 2 files changed, 58 insertions(+), 23 deletions(-) diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index a3930a362a..af23d6f07d 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -204,12 +204,12 @@ struct flipSign_helper +template struct flipSignIfRHSNegative_helper; template NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeScalar) -struct flipSignIfRHSNegative_helper) > +struct flipSignIfRHSNegative_helper) > { static FloatingPoint __call(FloatingPoint val, FloatingPoint flip) { @@ -222,7 +222,7 @@ struct flipSignIfRHSNegative_helper NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial) -struct flipSignIfRHSNegative_helper) > +struct flipSignIfRHSNegative_helper) > { static Vectorial __call(Vectorial val, Vectorial flip) { @@ -232,7 +232,29 @@ struct flipSignIfRHSNegative_helper::__call(getter_v(val, i), getter_v(flip, i))); + setter(output, i, flipSignIfRHSNegative_helper::__call(getter_v(val, i), getter_v(flip, i))); + + return output; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial && concepts::FloatingPointLikeScalar) +struct flipSignIfRHSNegative_helper && concepts::FloatingPointLikeScalar) > +{ + static Vectorial __call(Vectorial val, FloatingPoint flip) + { + using traits_v = hlsl::vector_traits; + array_get getter_v; + array_set setter; + + using AsFloat = typename float_of_size::type; + using AsUint = typename unsigned_integer_of_size::type; + const AsUint signBitFlip = ieee754::traits::signMask & ieee754::impl::bitCastToUintType(flip); + + Vectorial output; + for (uint32_t i = 0; i < traits_v::Dimension; ++i) + setter(output, i, bit_cast(ieee754::impl::bitCastToUintType(getter_v(val, i)) ^ signBitFlip)); return output; } @@ -245,10 +267,10 @@ NBL_CONSTEXPR_FUNC T flipSign(T val, U flip) return impl::flipSign_helper::__call(val, flip); } -template -NBL_CONSTEXPR_FUNC T flipSignIfRHSNegative(T val, T flip) +template +NBL_CONSTEXPR_FUNC T flipSignIfRHSNegative(T val, U flip) { - return impl::flipSignIfRHSNegative_helper::__call(val, flip); + return impl::flipSignIfRHSNegative_helper::__call(val, flip); } template ) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 966463b5e4..2133490f2d 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -52,16 +52,19 @@ struct quaternion // angle: Rotation angle expressed in radians. // axis: Rotation axis, must be normalized. - template && is_same_v) - static this_t create(const U axis, const F angle, const F uniformScale = scalar_type(1.0)) + template && is_same_v::scalar_type,F>) + static this_t create(const U axis, const F angle, const F uniformScale = F(1.0)) { + using scalar_t = typename vector_traits::scalar_type; this_t q; - const scalar_type sinTheta = hlsl::sin(angle * 0.5); - const scalar_type cosTheta = hlsl::cos(angle * 0.5); + const scalar_t halfAngle = angle * scalar_t(0.5); + const scalar_t sinTheta = hlsl::sin(halfAngle); + const scalar_t cosTheta = hlsl::cos(halfAngle); q.data = data_type(axis * sinTheta, cosTheta) * uniformScale; return q; } + // applies rotation equivalent to 3x3 matrix in order of pitch * yaw * roll template NBL_FUNC_REQUIRES(is_same_v,U>) static this_t create(const U halfPitchCosSin, const U halfYawCosSin, const U halfRollCosSin) { @@ -99,10 +102,12 @@ struct quaternion static this_t create(NBL_CONST_REF_ARG(matrix_type) m, const bool dontAssertValidMatrix=false) { + scalar_type uniformScaleSq; { // only orthogonal and uniform scale mats can be converted linalg::RuntimeTraits traits = linalg::RuntimeTraits::create(m); bool valid = traits.orthogonal && !hlsl::isnan(traits.uniformScaleSq); + uniformScaleSq = traits.uniformScaleSq; if (dontAssertValidMatrix) if (!valid) @@ -168,7 +173,7 @@ struct quaternion } } - retval.data = hlsl::normalize(retval.data) / hlsl::sqrt(hlsl::dot(m[0], m[0])); // restore uniform scale + retval.data = hlsl::normalize(retval.data) * hlsl::rsqrt(uniformScaleSq); // restore uniform scale return retval; } @@ -193,10 +198,9 @@ struct quaternion static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle) { - assert(hlsl::length(start.data) == scalar_type(1.0)); - assert(hlsl::length(end.data) == scalar_type(1.0)); - // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle) - const data_type adjEnd = ieee754::flipSignIfRHSNegative(end.data, hlsl::promote(totalPseudoAngle)); + assert(testing::relativeApproxCompare(hlsl::length(start.data), scalar_type(1.0), scalar_type(1e-4))); + assert(testing::relativeApproxCompare(hlsl::length(end.data), scalar_type(1.0), scalar_type(1e-4))); + const data_type adjEnd = ieee754::flipSignIfRHSNegative(end.data, totalPseudoAngle); this_t retval; retval.data = hlsl::mix(start.data, adjEnd, hlsl::promote(fraction)); @@ -225,8 +229,8 @@ struct quaternion static this_t unnormFlerp(const this_t start, const this_t end, const scalar_type fraction) { - assert(hlsl::length(start.data) == scalar_type(1.0)); - assert(hlsl::length(end.data) == scalar_type(1.0)); + assert(testing::relativeApproxCompare(hlsl::length(start.data), scalar_type(1.0), scalar_type(1e-4))); + assert(testing::relativeApproxCompare(hlsl::length(end.data), scalar_type(1.0), scalar_type(1e-4))); const scalar_type pseudoAngle = hlsl::dot(start.data,end.data); const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5); @@ -252,7 +256,7 @@ struct quaternion return v / scaleRcp + hlsl::cross(direction, modV * data.w + hlsl::cross(direction, modV)); } - matrix_type constructMatrix() NBL_CONST_MEMBER_FUNC + matrix_type __constructMatrix() NBL_CONST_MEMBER_FUNC { matrix_type mat; mat[0] = data.yzx * data.ywz + data.zxy * data.zyw * vector3_type( 1.0, 1.0,-1.0); @@ -294,7 +298,7 @@ struct quaternion const scalar_type sinAt = hlsl::sin(fraction * hlsl::acos(cosA)); const scalar_type sinAt_over_sinA = sinAt*sinARcp; const scalar_type scale = hlsl::sqrt(scalar_type(1.0)-sinAt*sinAt) - sinAt_over_sinA*cosA; //cosAt-cos(A)sin(tA)/sin(A) = (sin(A)cos(tA)-cos(A)sin(tA))/sin(A) - const data_type adjEnd = ieee754::flipSignIfRHSNegative(end.data, hlsl::promote(totalPseudoAngle)); + const data_type adjEnd = ieee754::flipSignIfRHSNegative(end.data, totalPseudoAngle); retval.data = scale * start.data + sinAt_over_sinA * adjEnd; return retval; @@ -324,8 +328,7 @@ struct normalize_helper > { static inline math::truncated_quaternion __call(const math::truncated_quaternion q) { - assert(hlsl::length(q.data) == scalar_type(1.0)); - + assert(testing::relativeApproxCompare(hlsl::length(q.data), scalar_type(1.0), scalar_type(1e-4))); math::truncated_quaternion retval; retval.data = q.data; // should be normalized by definition (dropped component should be 1.0) return retval; @@ -363,6 +366,7 @@ struct static_cast_helper, math::quaternion > { static inline math::truncated_quaternion cast(const math::quaternion q) { + assert(testing::relativeApproxCompare(hlsl::length(q.data), scalar_type(1.0), scalar_type(1e-4))); math::truncated_quaternion t; t.data.x = t.data.x; t.data.y = t.data.y; @@ -376,7 +380,16 @@ struct static_cast_helper, math::quaternion > { static inline matrix cast(const math::quaternion q) { - return q.constructMatrix(); + return q.__constructMatrix(); + } +}; + +template +struct static_cast_helper, matrix > +{ + static inline math::quaternion cast(const matrix m) + { + return math::quaternion::create(m, true); } }; } From e8a6488d5dc36e6cd578416669e1c7c37fd08393 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 13 Jan 2026 15:08:17 +0700 Subject: [PATCH 387/472] account for no scale in transform vector --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 2133490f2d..7e10a5c710 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -250,10 +250,17 @@ struct quaternion vector3_type transformVector(const vector3_type v, const bool assumeNoScale=false) NBL_CONST_MEMBER_FUNC { - const scalar_type scaleRcp = scalar_type(1.0) / hlsl::sqrt(hlsl::dot(data, data)); - const vector3_type modV = v * scalar_type(2.0) * scaleRcp; + const scalar_type scaleRcp = hlsl::rsqrt(hlsl::dot(data, data)); + vector3_type retV = v; + scalar_type modVScale = scalar_type(2.0); + if (!assumeNoScale) + { + retV /= scaleRcp; + modVScale *= scaleRcp; + } + const vector3_type modV = v * modVScale; const vector3_type direction = data.xyz; - return v / scaleRcp + hlsl::cross(direction, modV * data.w + hlsl::cross(direction, modV)); + return retV + hlsl::cross(direction, modV * data.w + hlsl::cross(direction, modV)); } matrix_type __constructMatrix() NBL_CONST_MEMBER_FUNC @@ -293,6 +300,9 @@ struct quaternion const scalar_type cosA = ieee754::flipSignIfRHSNegative(totalPseudoAngle, totalPseudoAngle); if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation { + assert(testing::relativeApproxCompare(hlsl::length(start.data), scalar_type(1.0), scalar_type(1e-4))); + assert(testing::relativeApproxCompare(hlsl::length(end.data), scalar_type(1.0), scalar_type(1e-4))); + this_t retval; const scalar_type sinARcp = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA); const scalar_type sinAt = hlsl::sin(fraction * hlsl::acos(cosA)); From 6f8b0f0e82173cab4090ab23cb2ab76310b0f5df Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 13 Jan 2026 16:23:24 +0100 Subject: [PATCH 388/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 6df2ffb548..20cdd2aeae 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 6df2ffb548b43a8c69702543cc4a949efe5bc09a +Subproject commit 20cdd2aeae15fbe9ac284775f366770c2aae3cb3 From 2e497cb727f15f52019e2437c7775b776461e0eb Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 13 Jan 2026 21:47:26 +0100 Subject: [PATCH 389/472] polish bounds checks for EOCD, central & local headers, slightly safer AES field parsing to avoid OOB or infinite loops --- examples_tests | 2 +- src/nbl/system/CArchiveLoaderZip.cpp | 77 ++++++++++++++++++++++------ 2 files changed, 61 insertions(+), 18 deletions(-) diff --git a/examples_tests b/examples_tests index 20cdd2aeae..55afb97d73 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 20cdd2aeae15fbe9ac284775f366770c2aae3cb3 +Subproject commit 55afb97d734ac99a95c92da526244cd9990cd905 diff --git a/src/nbl/system/CArchiveLoaderZip.cpp b/src/nbl/system/CArchiveLoaderZip.cpp index 853287b25a..c820f12cb6 100644 --- a/src/nbl/system/CArchiveLoaderZip.cpp +++ b/src/nbl/system/CArchiveLoaderZip.cpp @@ -224,26 +224,42 @@ core::smart_refctd_ptr CArchiveLoaderZip::createArchiveFromGZIP(co } core::smart_refctd_ptr CArchiveLoaderZip::createArchiveFromZIP(core::smart_refctd_ptr&& file, const std::string_view& password) const { + const size_t fileSize = file->getSize(); + if (fileSize < sizeof(SZIPFileCentralDirEnd)) + return nullptr; + SZIPFileCentralDirEnd dirEnd; { dirEnd.Sig = 0u; - // First place where the end record could be stored - size_t endOfCentralDirectoryOffset = file->getSize() - sizeof(SZIPFileCentralDirEnd) + 1ull; - while (dirEnd.Sig != SZIPFileCentralDirEnd::ExpectedSig) + constexpr size_t kMaxZipCommentSize = 0xffffu; + size_t endOfCentralDirectoryOffset = fileSize - sizeof(SZIPFileCentralDirEnd); + const size_t minEndOffset = (fileSize > sizeof(SZIPFileCentralDirEnd) + kMaxZipCommentSize) ? (fileSize - sizeof(SZIPFileCentralDirEnd) - kMaxZipCommentSize) : 0u; + bool found = false; + while (true) { IFile::success_t success; - file->read(success, &dirEnd, --endOfCentralDirectoryOffset, sizeof(dirEnd)); - if (!success) - return nullptr; + file->read(success, &dirEnd, endOfCentralDirectoryOffset, sizeof(dirEnd)); + if (success && dirEnd.Sig == SZIPFileCentralDirEnd::ExpectedSig) + { + found = true; + break; + } + if (endOfCentralDirectoryOffset == minEndOffset) + break; + --endOfCentralDirectoryOffset; } + if (!found) + return nullptr; } // multiple disks are not supported - if (dirEnd.NumberDisk != 0) + if (dirEnd.NumberDisk != 0 || dirEnd.NumberStart != 0) { assert(false); return nullptr; } + if (dirEnd.Offset > fileSize || dirEnd.Size > fileSize - dirEnd.Offset) + return nullptr; std::shared_ptr> items = std::make_shared>(); core::vector itemsMetadata; @@ -270,12 +286,13 @@ core::smart_refctd_ptr CArchiveLoaderZip::createArchiveFromZIP(cor { SZIPFileCentralDirFileHeader centralDirectoryHeader; { + if (centralDirectoryOffset > fileSize || fileSize - centralDirectoryOffset < sizeof(SZIPFileCentralDirFileHeader)) + return nullptr; IFile::success_t success; file->read(success, ¢ralDirectoryHeader, centralDirectoryOffset, sizeof(SZIPFileCentralDirFileHeader)); if (!success) return nullptr; } - centralDirectoryOffset += centralDirectoryHeader.calcSize(); if (centralDirectoryHeader.Sig != SZIPFileCentralDirFileHeader::ExpectedSignature) { @@ -284,13 +301,30 @@ core::smart_refctd_ptr CArchiveLoaderZip::createArchiveFromZIP(cor return nullptr; } + const size_t centralHeaderSize = centralDirectoryHeader.calcSize(); + if (centralHeaderSize < sizeof(SZIPFileCentralDirFileHeader)) + return nullptr; + if (centralDirectoryOffset + centralHeaderSize > fileSize) + return nullptr; + centralDirectoryOffset += centralHeaderSize; + SZIPFileHeader localFileHeader; { + const size_t localHeaderOffset = centralDirectoryHeader.RelativeOffsetOfLocalHeader; + if (localHeaderOffset > fileSize || fileSize - localHeaderOffset < sizeof(SZIPFileHeader)) + return nullptr; IFile::success_t success; - file->read(success, &localFileHeader, centralDirectoryHeader.RelativeOffsetOfLocalHeader, sizeof(SZIPFileHeader)); + file->read(success, &localFileHeader, localHeaderOffset, sizeof(SZIPFileHeader)); if (!success) return nullptr; } + if (localFileHeader.Sig != SZIPFileHeader::ExpectedSignature) + return nullptr; + const size_t localHeaderSize = localFileHeader.calcSize(); + if (localHeaderSize < sizeof(SZIPFileHeader)) + return nullptr; + if (centralDirectoryHeader.RelativeOffsetOfLocalHeader + localHeaderSize > fileSize) + return nullptr; std::string filename; filename.resize(localFileHeader.FilenameLength); @@ -311,29 +345,37 @@ core::smart_refctd_ptr CArchiveLoaderZip::createArchiveFromZIP(cor size_t localOffset = centralDirectoryHeader.RelativeOffsetOfLocalHeader + sizeof(SZIPFileHeader) + localFileHeader.FilenameLength; size_t offset = localOffset + localFileHeader.ExtraFieldLength; - while (true) + while (localOffset + sizeof(extraHeader) <= offset) { { IFile::success_t success; file->read(success, &extraHeader, localOffset, sizeof(extraHeader)); if (!success) break; - localOffset += success.getBytesToProcess(); - if (localOffset > offset) - break; + localOffset += sizeof(extraHeader); } + if (extraHeader.Size < 0) + break; + const size_t extraSize = static_cast(extraHeader.Size); + if (extraSize == 0) + break; + if (localOffset + extraSize > offset) + break; + if (extraHeader.ID != 0x9901u) + { + localOffset += extraSize; continue; + } + if (extraSize < sizeof(SZipFileAESExtraData)) + break; { IFile::success_t success; file->read(success, &data, localOffset, sizeof(data)); if (!success) break; - localOffset += success.getBytesToProcess(); - if (localOffset > offset) - break; } if (data.Vendor[0] == 'A' && data.Vendor[1] == 'E') { @@ -349,6 +391,7 @@ core::smart_refctd_ptr CArchiveLoaderZip::createArchiveFromZIP(cor filename.clear(); // no support, can't decrypt #endif } + localOffset += extraSize; } } @@ -641,4 +684,4 @@ namespace void SzFree(void *p, void *address) { p = p; _NBL_ALIGNED_FREE(address); } ISzAlloc lzmaAlloc = { SzAlloc, SzFree }; } -#endif \ No newline at end of file +#endif From f11aca1841dafeaf6e3517c1aba102cfb18f5bc0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 13 Jan 2026 21:54:21 +0100 Subject: [PATCH 390/472] update openexr submodule with oneliner fix (started to hit some shit with enforeced min policy) --- 3rdparty/openexr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/openexr b/3rdparty/openexr index c8a74d9ac9..aaf5f750d7 160000 --- a/3rdparty/openexr +++ b/3rdparty/openexr @@ -1 +1 @@ -Subproject commit c8a74d9ac97dd579a47a7913f361a87349c0fffd +Subproject commit aaf5f750d7a5fd117d79932d209f0e9816cbff1f From c07365fbe5097dfe40cb5480fdf7209c7aa045bf Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 14 Jan 2026 10:54:19 +0700 Subject: [PATCH 391/472] more comment info for pitch yaw roll, removed normalize truncated quat --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 7e10a5c710..9efe359298 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -52,8 +52,8 @@ struct quaternion // angle: Rotation angle expressed in radians. // axis: Rotation axis, must be normalized. - template && is_same_v::scalar_type,F>) - static this_t create(const U axis, const F angle, const F uniformScale = F(1.0)) + template) + static this_t create(const U axis, const typename vector_traits::scalar_type angle, const typename vector_traits::scalar_type uniformScale = typename vector_traits::scalar_type(1.0)) { using scalar_t = typename vector_traits::scalar_type; this_t q; @@ -64,7 +64,7 @@ struct quaternion return q; } - // applies rotation equivalent to 3x3 matrix in order of pitch * yaw * roll + // applies rotation equivalent to 3x3 matrix in order of pitch * yaw * roll (X * Y * Z) -- mul(roll,mul(yaw,mul(pitch,v))) template NBL_FUNC_REQUIRES(is_same_v,U>) static this_t create(const U halfPitchCosSin, const U halfYawCosSin, const U halfRollCosSin) { @@ -333,18 +333,6 @@ struct quaternion namespace cpp_compat_intrinsics_impl { -template -struct normalize_helper > -{ - static inline math::truncated_quaternion __call(const math::truncated_quaternion q) - { - assert(testing::relativeApproxCompare(hlsl::length(q.data), scalar_type(1.0), scalar_type(1e-4))); - math::truncated_quaternion retval; - retval.data = q.data; // should be normalized by definition (dropped component should be 1.0) - return retval; - } -}; - template struct normalize_helper > { From 2aa275e6a811aa01961bac2eb5c72374ed652d7c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 14 Jan 2026 12:35:55 +0700 Subject: [PATCH 392/472] create from matrix restore scale correctly --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 9efe359298..b001052810 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -173,7 +173,7 @@ struct quaternion } } - retval.data = hlsl::normalize(retval.data) * hlsl::rsqrt(uniformScaleSq); // restore uniform scale + retval.data = hlsl::normalize(retval.data) * hlsl::sqrt(uniformScaleSq); // restore uniform scale return retval; } From 0e747381b2558e13e6371f5077311a0c29b225e1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 14 Jan 2026 15:12:21 +0700 Subject: [PATCH 393/472] Fix bug --- include/nbl/asset/utils/COBBGenerator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h index 573ce30232..ccbc7c1426 100644 --- a/include/nbl/asset/utils/COBBGenerator.h +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -123,7 +123,7 @@ class COBBGenerator // Slab 0: dir {1, 0, 0} proj = firstVertex.x; - minProjections[0] = minProjections[0] = proj; + minProjections[0] = maxProjections[0] = proj; minVertices[0] = firstVertex; maxVertices[0] = firstVertex; // Slab 1: dir {0, 1, 0} proj = firstVertex.y; From 153ba1a805b1eee9896c64d25b7c0931eb4c549e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 14 Jan 2026 15:31:09 +0700 Subject: [PATCH 394/472] Fix bug regarding LargeBaseTriangle computation --- include/nbl/asset/utils/COBBGenerator.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h index ccbc7c1426..95311d64b0 100644 --- a/include/nbl/asset/utils/COBBGenerator.h +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -345,14 +345,14 @@ class COBBGenerator { constexpr hlsl::float32_t eps = 0.000001f; - std::array baseTriangleVertices; + std::array baseTriangleVertices = {}; Edges edges; // Find the furthest point pair among the selected min and max point pairs std::tie(baseTriangleVertices[0], baseTriangleVertices[1]) = findFurthestPointPair(extremalVertices); // Degenerate case 1: - // If the found furthest points are located very close, return OBB aligned with the initial AABB + // no need to compute third vertices, since base triangle is invalid if (getSqDist(baseTriangleVertices[0], baseTriangleVertices[1]) < eps) { return { @@ -366,9 +366,9 @@ class COBBGenerator // Find a third point furthest away from line given by p0, e0 to define the large base triangle const auto furthestPointRes = findFurthestPointFromInfiniteEdge(vertices[0], edges[0], vertices); + baseTriangleVertices[2] = furthestPointRes.point; // Degenerate case 2: - // If the third point is located very close to the line, return an OBB aligned with the line if (furthestPointRes.sqDist < eps) { return { @@ -491,8 +491,13 @@ class COBBGenerator const auto baseTriangle = findBaseTriangle(extremals.vertices, vertices); + // Degenerate case 1: + // If the found furthest points are located very close, return OBB aligned with the initial AABB if (baseTriangle.flag == LargeBaseTriangle::SECOND_POINT_CLOSE) return hlsl::shapes::OBB<>::createAxisAligned(alMid, alLen); + + // Degenerate case 2: + // If the third point is located very close to the line, return an OBB aligned with the line if (baseTriangle.flag == LargeBaseTriangle::THIRD_POINT_CLOSE) return computeLineAlignedObb(baseTriangle.edges[0], vertices); From cb71a4b48fc90fbd0fd84e56138e4a83e5fec7e7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 14 Jan 2026 15:31:47 +0700 Subject: [PATCH 395/472] Add some comment regarding references and credit to the original author of the method --- include/nbl/asset/utils/COBBGenerator.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h index 95311d64b0..777965111e 100644 --- a/include/nbl/asset/utils/COBBGenerator.h +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -46,6 +46,9 @@ class COBBGenerator requires (std::same_as, hlsl::float32_t3>) static hlsl::shapes::OBB<> compute(size_t vertexCount, FetchVertexFn&& fetchFn) { + // Algorithm from Game Engine Gems 2, Fast Computation of Tight-Fitting Oriented Bounding Box + // Credit to Thomas Larsson and Linus Källberg + constexpr size_t SAMPLE_DIR_COUNT = 7; // Number of sample directions constexpr size_t SAMPLE_COUNT = SAMPLE_DIR_COUNT * 2; From 03cb423afc811b27609dd5d9444c1c0d33f787d9 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 14 Jan 2026 15:38:26 +0700 Subject: [PATCH 396/472] Fix bug regarding finding the furthest point from base triangle --- include/nbl/asset/utils/COBBGenerator.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h index 777965111e..8eca30e8e8 100644 --- a/include/nbl/asset/utils/COBBGenerator.h +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -225,7 +225,8 @@ class COBBGenerator { const auto sqDist = getSqDistPointInfiniteEdge(vertices[i], p0, e0); if (sqDist > maxSqDist) - { maxSqDist = sqDist; + { + maxSqDist = sqDist; maxIndex = i; } } @@ -368,7 +369,7 @@ class COBBGenerator edges[0] = hlsl::normalize(baseTriangleVertices[0] - baseTriangleVertices[1]); // Find a third point furthest away from line given by p0, e0 to define the large base triangle - const auto furthestPointRes = findFurthestPointFromInfiniteEdge(vertices[0], edges[0], vertices); + const auto furthestPointRes = findFurthestPointFromInfiniteEdge(baseTriangleVertices[0], edges[0], vertices); baseTriangleVertices[2] = furthestPointRes.point; // Degenerate case 2: From 0dc95ef5357e7b1dc004d26caf274b04d85eddd5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 14 Jan 2026 15:39:16 +0700 Subject: [PATCH 397/472] Add some comment --- include/nbl/asset/utils/COBBGenerator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h index 8eca30e8e8..2b384da41d 100644 --- a/include/nbl/asset/utils/COBBGenerator.h +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -349,7 +349,7 @@ class COBBGenerator { constexpr hlsl::float32_t eps = 0.000001f; - std::array baseTriangleVertices = {}; + std::array baseTriangleVertices = {}; // p0, p1, p2 Edges edges; // Find the furthest point pair among the selected min and max point pairs From 6d1cfb2a6ed50881d2adaca95ef76397a13e87ae Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 14 Jan 2026 16:19:08 +0700 Subject: [PATCH 398/472] Add epsilon as argument for computeOBB --- include/nbl/asset/utils/COBBGenerator.h | 21 ++++++++----------- .../asset/utils/CPolygonGeometryManipulator.h | 4 ++-- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/include/nbl/asset/utils/COBBGenerator.h b/include/nbl/asset/utils/COBBGenerator.h index 2b384da41d..cf2afcecaa 100644 --- a/include/nbl/asset/utils/COBBGenerator.h +++ b/include/nbl/asset/utils/COBBGenerator.h @@ -44,7 +44,7 @@ class COBBGenerator template requires (std::same_as, hlsl::float32_t3>) - static hlsl::shapes::OBB<> compute(size_t vertexCount, FetchVertexFn&& fetchFn) + static hlsl::shapes::OBB<> compute(size_t vertexCount, FetchVertexFn&& fetchFn, float epsilon) { // Algorithm from Game Engine Gems 2, Fast Computation of Tight-Fitting Oriented Bounding Box // Credit to Thomas Larsson and Linus Källberg @@ -286,17 +286,16 @@ class COBBGenerator return Result{ tMinProj, tMaxProj, tMinVert, tMaxVert }; }; - static auto findUpperLowerTetraPoints = []( + static auto findUpperLowerTetraPoints = [epsilon]( const hlsl::float32_t3& n, const VertexCollection& vertices, const hlsl::float32_t3& p0) { - const auto eps = 0.000001f; const auto extremalPoints = findExtremalPoints_OneDir(n, vertices); const auto triProj = hlsl::dot(p0, n); - const auto maxVert = extremalPoints.maxProj - eps > triProj ? std::optional(extremalPoints.maxVert) : std::nullopt; - const auto minVert = extremalPoints.minProj + eps < triProj ? std::optional(extremalPoints.minVert) : std::nullopt; + const auto maxVert = extremalPoints.maxProj - epsilon > triProj ? std::optional(extremalPoints.maxVert) : std::nullopt; + const auto minVert = extremalPoints.minProj + epsilon < triProj ? std::optional(extremalPoints.minVert) : std::nullopt; struct Result { @@ -345,10 +344,8 @@ class COBBGenerator }; - static auto findBaseTriangle = [](const ExtremalVertices& extremalVertices, const VertexCollection& vertices)-> LargeBaseTriangle + static auto findBaseTriangle = [epsilon](const ExtremalVertices& extremalVertices, const VertexCollection& vertices)-> LargeBaseTriangle { - constexpr hlsl::float32_t eps = 0.000001f; - std::array baseTriangleVertices = {}; // p0, p1, p2 Edges edges; @@ -357,7 +354,7 @@ class COBBGenerator // Degenerate case 1: // no need to compute third vertices, since base triangle is invalid - if (getSqDist(baseTriangleVertices[0], baseTriangleVertices[1]) < eps) + if (getSqDist(baseTriangleVertices[0], baseTriangleVertices[1]) < epsilon) { return { .vertices = baseTriangleVertices, @@ -373,7 +370,7 @@ class COBBGenerator baseTriangleVertices[2] = furthestPointRes.point; // Degenerate case 2: - if (furthestPointRes.sqDist < eps) + if (furthestPointRes.sqDist < epsilon) { return { .vertices = baseTriangleVertices, @@ -456,7 +453,7 @@ class COBBGenerator return buildObbFromAxesAndLocalMinMax(axes, localMin, localMax); }; - static auto computeLineAlignedObb = [](const hlsl::float32_t3& u, const VertexCollection& vertices) + static auto computeLineAlignedObb = [epsilon](const hlsl::float32_t3& u, const VertexCollection& vertices) { // Given u, build any orthonormal base u, v, w @@ -467,7 +464,7 @@ class COBBGenerator else { r.z = 0; } const auto sqLen = hlsl::dot(r, r); - if (sqLen < FLT_EPSILON) { r.x = r.y = r.z = 1; } + if (sqLen < epsilon) { r.x = r.y = r.z = 1; } const auto v = normalize(cross(u, r)); const auto w = normalize(cross(u, v)); diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 71aecc3356..59a82a04e3 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -235,9 +235,9 @@ class NBL_API2 CPolygonGeometryManipulator template requires (std::same_as, hlsl::float32_t3>) - static inline hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(size_t vertexCount, FetchVertexFn&& fetchFn) + static inline hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(size_t vertexCount, FetchVertexFn&& fetchFn, float epsilon = 1.525e-5f) { - return COBBGenerator::compute(vertexCount, std::forward(fetchFn)); + return COBBGenerator::compute(vertexCount, std::forward(fetchFn), epsilon); } static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); From b5daf1965e9c57022027689c525fc3daf618d5b1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 14 Jan 2026 20:59:02 +0700 Subject: [PATCH 399/472] Remove old commented code regarding obb calculation --- .../asset/utils/CPolygonGeometryManipulator.h | 1 - .../utils/CPolygonGeometryManipulator.cpp | 198 ------------------ 2 files changed, 199 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 59a82a04e3..4a31bd6a95 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -507,7 +507,6 @@ class NBL_API2 CPolygonGeometryManipulator static float DistanceToLine(core::vectorSIMDf P0, core::vectorSIMDf P1, core::vectorSIMDf InPoint); static float DistanceToPlane(core::vectorSIMDf InPoint, core::vectorSIMDf PlanePoint, core::vectorSIMDf PlaneNormal); - static core::matrix3x4SIMD calculateOBB(const nbl::asset::ICPUMeshBuffer* meshbuffer); //! Calculates bounding box of the meshbuffer static inline core::aabbox3df calculateBoundingBox( diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 0e3c425e78..b4f2f2ef06 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -1431,203 +1431,5 @@ float IMeshManipulator::DistanceToPlane(core::vectorSIMDf InPoint, core::vectorS return (core::dot(PointToPlane, PlaneNormal).x >= 0) ? core::abs(core::dot(PointToPlane, PlaneNormal).x) : 0; } -core::matrix3x4SIMD IMeshManipulator::calculateOBB(const nbl::asset::ICPUMeshBuffer* meshbuffer) -{ - auto FindMinMaxProj = [&](const core::vectorSIMDf& Dir, const core::vectorSIMDf Extrema[]) -> core::vectorSIMDf - { - float MinPoint, MaxPoint; - MinPoint = MaxPoint = core::dot(Dir, Extrema[0]).x; - - for (int i = 1; i < 12; i++) { - float Proj = core::dot(Dir, Extrema[i]).x; - if (MinPoint > Proj) MinPoint = Proj; - if (MaxPoint < Proj) MaxPoint = Proj; - } - - return core::vectorSIMDf(MaxPoint, MinPoint, 0); - }; - - auto ComputeAxis = [&](const core::vectorSIMDf& P0, const core::vectorSIMDf& P1, const core::vectorSIMDf& P2, core::vectorSIMDf* AxesEdge, float& PrevQuality, const core::vectorSIMDf Extrema[]) -> void - { - core::vectorSIMDf e0 = P1 - P0; - core::vectorSIMDf Edges[3]; - Edges[0] = e0 / core::length(e0); - Edges[1] = core::cross(P2 - P1, P1 - P0); - Edges[1] = Edges[1] / core::length(Edges[1]); - Edges[2] = core::cross(Edges[0], Edges[1]); - - core::vectorSIMDf Edge10Proj = FindMinMaxProj(Edges[0], Extrema); - core::vectorSIMDf Edge20Proj = FindMinMaxProj(Edges[1], Extrema); - core::vectorSIMDf Edge30Proj = FindMinMaxProj(Edges[2], Extrema); - core::vectorSIMDf Max2 = core::vectorSIMDf(Edge10Proj.x, Edge20Proj.x, Edge30Proj.x); - core::vectorSIMDf Min2 = core::vectorSIMDf(Edge10Proj.y, Edge20Proj.y, Edge30Proj.y); - core::vectorSIMDf Diff = Max2 - Min2; - float Quality = Diff.x * Diff.y + Diff.x * Diff.z + Diff.y * Diff.z; - - if (Quality < PrevQuality) { - PrevQuality = Quality; - for (int i = 0; i < 3; i++) { - AxesEdge[i] = Edges[i]; - } - } - }; - - core::vectorSIMDf Extrema[12]; - float A = (core::sqrt(5.0f) - 1.0f) / 2.0f; - core::vectorSIMDf N[6]; - N[0] = core::vectorSIMDf(0, 1, A); - N[1] = core::vectorSIMDf(0, 1, -A); - N[2] = core::vectorSIMDf(1, A, 0); - N[3] = core::vectorSIMDf(1, -A, 0); - N[4] = core::vectorSIMDf(A, 0, 1); - N[5] = core::vectorSIMDf(A, 0, -1); - float Bs[12]; - float B; - int indexcount = meshbuffer->getIndexCount(); - core::vectorSIMDf CachedVertex = meshbuffer->getPosition(meshbuffer->getIndexValue(0)); - core::vectorSIMDf AABBMax = CachedVertex; - core::vectorSIMDf AABBMin = CachedVertex; - for (int k = 0; k < 12; k += 2) { - B = core::dot(N[k / 2], CachedVertex).x; - Extrema[k] = core::vectorSIMDf(CachedVertex.x, CachedVertex.y, CachedVertex.z); Bs[k] = B; - Extrema[k + 1] = core::vectorSIMDf(CachedVertex.x, CachedVertex.y, CachedVertex.z); Bs[k + 1] = B; - } - for (uint32_t j = 1u; j < indexcount; j += 1u) { - CachedVertex = meshbuffer->getPosition(meshbuffer->getIndexValue(j)); - for (int k = 0; k < 12; k += 2) { - B = core::dot(N[k / 2], CachedVertex).x; - if (B > Bs[k] || j == 0) { Extrema[k] = core::vectorSIMDf(CachedVertex.x, CachedVertex.y, CachedVertex.z); Bs[k] = B; } - if (B < Bs[k + 1] || j == 0) { Extrema[k + 1] = core::vectorSIMDf(CachedVertex.x, CachedVertex.y, CachedVertex.z); Bs[k + 1] = B; } - } - AABBMax = core::max(AABBMax, CachedVertex); - AABBMin = core::min(AABBMin, CachedVertex); - } - - int LBTE1 = -1; - float MaxDiff = 0; - for (int i = 0; i < 12; i += 2) { - core::vectorSIMDf C = (Extrema[i]) - (Extrema[i + 1]); float TempDiff = core::dot(C, C).x; if (TempDiff > MaxDiff) { MaxDiff = TempDiff; LBTE1 = i; } - } - assert(LBTE1 != -1); - - core::vectorSIMDf P0 = Extrema[LBTE1]; - core::vectorSIMDf P1 = Extrema[LBTE1 + 1]; - - int LBTE3 = 0; - float MaxDist = 0; - int RemoveAt = 0; - - for (int i = 0; i < 10; i++) { - int index = i; - if (index >= LBTE1) index += 2; - float TempDist = DistanceToLine(P0, P1, core::vectorSIMDf(Extrema[index].x, Extrema[index].y, Extrema[index].z)); - if (TempDist > MaxDist || i == 0) { - MaxDist = TempDist; - LBTE3 = index; - RemoveAt = i; - } - } - - core::vectorSIMDf P2 = Extrema[LBTE3]; - core::vectorSIMDf ExtremaRemainingTemp[9]; - for (int i = 0; i < 9; i++) { - int index = i; - if (index >= RemoveAt) index += 1; - if (index >= LBTE1) index += 2; - ExtremaRemainingTemp[i] = core::vectorSIMDf(Extrema[index].x, Extrema[index].y, Extrema[index].z, index); - } - - float MaxDistPlane = -9999999.0f; - float MinDistPlane = -9999999.0f; - float TempDistPlane = 0; - core::vectorSIMDf Q0 = core::vectorSIMDf(0, 0, 0); - core::vectorSIMDf Q1 = core::vectorSIMDf(0, 0, 0); - core::vectorSIMDf Norm = core::cross(P2 - P1, P2 - P0); - Norm /= core::length(Norm); - for (int i = 0; i < 9; i++) { - TempDistPlane = DistanceToPlane(core::vectorSIMDf(ExtremaRemainingTemp[i].x, ExtremaRemainingTemp[i].y, ExtremaRemainingTemp[i].z), P0, Norm); - if (TempDistPlane > MaxDistPlane || i == 0) { - MaxDistPlane = TempDistPlane; - Q0 = Extrema[(int)ExtremaRemainingTemp[i].w]; - } - TempDistPlane = DistanceToPlane(core::vectorSIMDf(ExtremaRemainingTemp[i].x, ExtremaRemainingTemp[i].y, ExtremaRemainingTemp[i].z), P0, -Norm); - if (TempDistPlane > MinDistPlane || i == 0) { - MinDistPlane = TempDistPlane; - Q1 = Extrema[(int)ExtremaRemainingTemp[i].w]; - } - } - - float BestQuality = 99999999999999.0f; - core::vectorSIMDf BestAxis[3]; - ComputeAxis(P0, P1, P2, BestAxis, BestQuality, Extrema); - ComputeAxis(P2, P0, P1, BestAxis, BestQuality, Extrema); - ComputeAxis(P1, P2, P0, BestAxis, BestQuality, Extrema); - - ComputeAxis(P1, Q0, P0, BestAxis, BestQuality, Extrema); - ComputeAxis(P0, P1, Q0, BestAxis, BestQuality, Extrema); - ComputeAxis(Q0, P0, P1, BestAxis, BestQuality, Extrema); - - ComputeAxis(P2, Q0, P0, BestAxis, BestQuality, Extrema); - ComputeAxis(P0, P2, Q0, BestAxis, BestQuality, Extrema); - ComputeAxis(Q0, P0, P2, BestAxis, BestQuality, Extrema); - - ComputeAxis(P1, Q0, P2, BestAxis, BestQuality, Extrema); - ComputeAxis(P2, P1, Q0, BestAxis, BestQuality, Extrema); - ComputeAxis(Q0, P2, P1, BestAxis, BestQuality, Extrema); - - ComputeAxis(P1, Q1, P0, BestAxis, BestQuality, Extrema); - ComputeAxis(P0, P1, Q1, BestAxis, BestQuality, Extrema); - ComputeAxis(Q1, P0, P1, BestAxis, BestQuality, Extrema); - - ComputeAxis(P2, Q1, P0, BestAxis, BestQuality, Extrema); - ComputeAxis(P0, P2, Q1, BestAxis, BestQuality, Extrema); - ComputeAxis(Q1, P0, P2, BestAxis, BestQuality, Extrema); - - ComputeAxis(P1, Q1, P2, BestAxis, BestQuality, Extrema); - ComputeAxis(P2, P1, Q1, BestAxis, BestQuality, Extrema); - ComputeAxis(Q1, P2, P1, BestAxis, BestQuality, Extrema); - - core::matrix3x4SIMD TransMat = core::matrix3x4SIMD( - BestAxis[0].x, BestAxis[1].x, BestAxis[2].x, 0, - BestAxis[0].y, BestAxis[1].y, BestAxis[2].y, 0, - BestAxis[0].z, BestAxis[1].z, BestAxis[2].z, 0); - - core::vectorSIMDf MinPoint; - core::vectorSIMDf MaxPoint; - CachedVertex = meshbuffer->getPosition(meshbuffer->getIndexValue(0)); - MinPoint = core::vectorSIMDf(core::dot(BestAxis[0], CachedVertex).x, core::dot(BestAxis[1], CachedVertex).x, core::dot(BestAxis[2], CachedVertex).x); - MaxPoint = MinPoint; - for (uint32_t j = 1u; j < indexcount; j += 1u) - { - CachedVertex = meshbuffer->getPosition(meshbuffer->getIndexValue(j)); - core::vectorSIMDf Proj = core::vectorSIMDf(core::dot(BestAxis[0], CachedVertex).x, core::dot(BestAxis[1], CachedVertex).x, core::dot(BestAxis[2], CachedVertex).x); - MinPoint = core::min(MinPoint, Proj); - MaxPoint = core::max(MaxPoint, Proj); - } - - core::vectorSIMDf OBBDiff = MaxPoint - MinPoint; - float OBBQuality = OBBDiff.x * OBBDiff.y + OBBDiff.y * OBBDiff.z + OBBDiff.z * OBBDiff.x; - - core::vectorSIMDf ABBDiff = AABBMax - AABBMin; - float ABBQuality = ABBDiff.x * ABBDiff.y + ABBDiff.y * ABBDiff.z + ABBDiff.z * ABBDiff.x; - core::matrix3x4SIMD scaleMat; - core::matrix3x4SIMD translationMat; - translationMat.setTranslation(-(MinPoint) / OBBDiff); - scaleMat.setScale(OBBDiff); - TransMat = core::concatenateBFollowedByA(TransMat, scaleMat); - TransMat = core::concatenateBFollowedByA(TransMat, translationMat); - if (ABBQuality < OBBQuality) { - translationMat.setTranslation(-(AABBMin) / ABBDiff); - scaleMat.setScale(ABBDiff); - TransMat = core::matrix3x4SIMD( - 1, 0, 0, 0, - 0, 1, 0, 0, - 0, 0, 1, 0); - TransMat = core::concatenateBFollowedByA(TransMat, scaleMat); - TransMat = core::concatenateBFollowedByA(TransMat, translationMat); - } - - return TransMat; -} #endif } // end namespace nbl::asset From ccdf991bd10765ce789b328fcdad075456061f0d Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 14 Jan 2026 17:29:41 +0100 Subject: [PATCH 400/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 55afb97d73..a2b692ec62 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 55afb97d734ac99a95c92da526244cd9990cd905 +Subproject commit a2b692ec62606fd60e59dccf19b381d8400e83d3 From b589759b107ec35e2a041ea728a968b12a7d3cfd Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Wed, 14 Jan 2026 19:36:43 +0100 Subject: [PATCH 401/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index a2b692ec62..9795b5d3a3 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit a2b692ec62606fd60e59dccf19b381d8400e83d3 +Subproject commit 9795b5d3a35621126247076333f9ab17406e3ff3 From 3869cb105e554dfb7b036094941f96d3f583879d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 15 Jan 2026 11:31:51 +0700 Subject: [PATCH 402/472] minor bug fixes to quaternions --- .../builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl | 8 ++------ include/nbl/builtin/hlsl/math/quaternions.hlsl | 9 ++++----- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl index 43b05d56ba..dc74c45ddd 100644 --- a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl @@ -38,14 +38,10 @@ struct RuntimeTraits } { const matrix_t m_T = hlsl::transpose(m); - scalar_t dots[N]; - NBL_UNROLL for (uint16_t i = 0; i < N; i++) - dots[i] = hlsl::dot(m[i], m_T[i]); - - scalar_t uniformScaleSq = hlsl::dot(m[0], m_T[0]); + scalar_t uniformScaleSq = hlsl::dot(m_T[0], m_T[0]); NBL_UNROLL for (uint16_t i = 1; i < N; i++) { - if (!testing::relativeApproxCompare(hlsl::dot(m[i], m_T[i]), uniformScaleSq, 1e-4)) + if (!testing::relativeApproxCompare(hlsl::dot(m_T[i], m_T[i]), uniformScaleSq, 1e-4)) { uniformScaleSq = bit_cast(numeric_limits::quiet_NaN); break; diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index b001052810..31fb97a51a 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -128,7 +128,6 @@ struct quaternion const data_type Qy = data_type(m11, neg_m11, m11, neg_m11); const data_type Qz = data_type(m22, neg_m22, neg_m22, m22); - // const data_type tmp = hlsl::promote(1.0) + Qx + Qy + Qz; const data_type tmp = Qx + Qy + Qz; // TODO: speed this up @@ -364,11 +363,11 @@ struct static_cast_helper, math::quaternion > { static inline math::truncated_quaternion cast(const math::quaternion q) { - assert(testing::relativeApproxCompare(hlsl::length(q.data), scalar_type(1.0), scalar_type(1e-4))); + assert(testing::relativeApproxCompare(hlsl::length(q.data), T(1.0), T(1e-4))); math::truncated_quaternion t; - t.data.x = t.data.x; - t.data.y = t.data.y; - t.data.z = t.data.z; + t.data.x = q.data.x; + t.data.y = q.data.y; + t.data.z = q.data.z; return t; } }; From d3958e59f15e520b09df7e9984882c0993f9652a Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 15 Jan 2026 08:06:02 +0100 Subject: [PATCH 403/472] add some operators to hlsl::matrix in C++ --- include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index 712ce5e979..c0b5023990 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -28,8 +28,15 @@ struct matrix final : private glm::mat return *this; } - friend matrix operator+(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) + reinterpret_cast(rhs)); } - friend matrix operator-(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) - reinterpret_cast(rhs)); } + // not sure how to forward this + //inline friend matrix operator*(matrix const& lhs, T rhs) {return matrix(reinterpret_cast(lhs)*rhs);} + + // scalar compound assignment multiply and divide + inline matrix& operator*=(const T rhs) {return reinterpret_cast(Base::template operator*=(rhs));} + inline matrix& operator/=(const T rhs) {return reinterpret_cast(Base::template operator/=(rhs));} + + inline friend matrix operator+(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) + reinterpret_cast(rhs)); } + inline friend matrix operator-(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) - reinterpret_cast(rhs)); } template inline friend matrix mul(matrix const& lhs, matrix const& rhs) From a602fba54c90d1473c5d2267a54e1a25d03e5ae6 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 15 Jan 2026 08:06:02 +0100 Subject: [PATCH 404/472] add some operators to hlsl::matrix in C++ --- include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index 712ce5e979..c0b5023990 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -28,8 +28,15 @@ struct matrix final : private glm::mat return *this; } - friend matrix operator+(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) + reinterpret_cast(rhs)); } - friend matrix operator-(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) - reinterpret_cast(rhs)); } + // not sure how to forward this + //inline friend matrix operator*(matrix const& lhs, T rhs) {return matrix(reinterpret_cast(lhs)*rhs);} + + // scalar compound assignment multiply and divide + inline matrix& operator*=(const T rhs) {return reinterpret_cast(Base::template operator*=(rhs));} + inline matrix& operator/=(const T rhs) {return reinterpret_cast(Base::template operator/=(rhs));} + + inline friend matrix operator+(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) + reinterpret_cast(rhs)); } + inline friend matrix operator-(matrix const& lhs, matrix const& rhs){ return matrix(reinterpret_cast(lhs) - reinterpret_cast(rhs)); } template inline friend matrix mul(matrix const& lhs, matrix const& rhs) From 4aa236838791995f0dc309ca3b1f27e3c44cf968 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 15 Jan 2026 14:35:09 +0700 Subject: [PATCH 405/472] factor out uniform scale from mat before convert --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 31fb97a51a..00200d903a 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -100,12 +100,12 @@ struct quaternion ); } - static this_t create(NBL_CONST_REF_ARG(matrix_type) m, const bool dontAssertValidMatrix=false) + static this_t create(NBL_CONST_REF_ARG(matrix_type) _m, const bool dontAssertValidMatrix=false) { scalar_type uniformScaleSq; { // only orthogonal and uniform scale mats can be converted - linalg::RuntimeTraits traits = linalg::RuntimeTraits::create(m); + linalg::RuntimeTraits traits = linalg::RuntimeTraits::create(_m); bool valid = traits.orthogonal && !hlsl::isnan(traits.uniformScaleSq); uniformScaleSq = traits.uniformScaleSq; @@ -120,6 +120,10 @@ struct quaternion assert(valid); } + const scalar_type uniformScale = hlsl::sqrt(uniformScaleSq); + matrix_type m = _m; + m /= uniformScale; + const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2]; const scalar_type neg_m00 = -m00; const scalar_type neg_m11 = -m11; @@ -172,7 +176,7 @@ struct quaternion } } - retval.data = hlsl::normalize(retval.data) * hlsl::sqrt(uniformScaleSq); // restore uniform scale + retval.data = hlsl::normalize(retval.data) * uniformScale; // restore uniform scale return retval; } From 5f02325d67e768e7f724b19fe9e0ec0a2ceaf312 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 15 Jan 2026 17:25:56 +0700 Subject: [PATCH 406/472] new vector comparison by orientation --- .../hlsl/testing/orientation_compare.hlsl | 44 +++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 45 insertions(+) create mode 100644 include/nbl/builtin/hlsl/testing/orientation_compare.hlsl diff --git a/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl b/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl new file mode 100644 index 0000000000..7884cf1b21 --- /dev/null +++ b/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl @@ -0,0 +1,44 @@ +#ifndef _NBL_BUILTIN_HLSL_TESTING_ORIENTATION_COMPARE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TESTING_ORIENTATION_COMPARE_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace testing +{ +namespace impl +{ + +template) +struct OrientationCompareHelper +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPointVector) lhs, NBL_CONST_REF_ARG(FloatingPointVector) rhs, const float64_t maxAllowedDifference) + { + using traits = nbl::hlsl::vector_traits; + using scalar_t = typename traits::scalar_type; + + const scalar_t dotLR = hlsl::dot(lhs, rhs); + if (dotLR < scalar_t(0.0)) + return false; + + const scalar_t scale = hlsl::sqrt(hlsl::dot(lhs,lhs) * hlsl::dot(rhs,rhs)); + return relativeApproxCompare(dotLR, scale, maxAllowedDifference); + } +}; + +} + +template +bool orientationCompare(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs, const float64_t maxAllowedDifference) +{ + return impl::OrientationCompareHelper::__call(lhs, rhs, maxAllowedDifference); +} + +} +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index e44f41b29e..86a0ddf9b9 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -377,5 +377,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/ResolveParameters.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") #testing LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/relative_approx_compare.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/orientation_compare.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From 11f7f2eb72fa4286b9a6cab5fa696b83429720a1 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 15 Jan 2026 18:14:44 +0700 Subject: [PATCH 407/472] return nan quaternion if uniform scale is 0 --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 00200d903a..ed0f796fe8 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -119,6 +119,12 @@ struct quaternion else assert(valid); } + if (uniformScaleSq < numeric_limits::min) + { + this_t retval; + retval.data = hlsl::promote(bit_cast(numeric_limits::quiet_NaN)); + return retval; + } const scalar_type uniformScale = hlsl::sqrt(uniformScaleSq); matrix_type m = _m; From 266cd710e7fae965def0f6c79a3488a28db8ba53 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 16 Jan 2026 10:25:08 +0700 Subject: [PATCH 408/472] account for negative orientation, added check for 0 length vectors --- include/nbl/builtin/hlsl/testing/orientation_compare.hlsl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl b/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl index 7884cf1b21..3228872b4d 100644 --- a/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl +++ b/include/nbl/builtin/hlsl/testing/orientation_compare.hlsl @@ -20,11 +20,13 @@ struct OrientationCompareHelper using traits = nbl::hlsl::vector_traits; using scalar_t = typename traits::scalar_type; - const scalar_t dotLR = hlsl::dot(lhs, rhs); - if (dotLR < scalar_t(0.0)) + const scalar_t dotLR = hlsl::abs(hlsl::dot(lhs, rhs)); + const scalar_t dotLL = hlsl::dot(lhs,lhs); + const scalar_t dotRR = hlsl::dot(rhs,rhs); + if (dotLL < numeric_limits::min || dotRR < numeric_limits::min) return false; - const scalar_t scale = hlsl::sqrt(hlsl::dot(lhs,lhs) * hlsl::dot(rhs,rhs)); + const scalar_t scale = hlsl::sqrt(dotLL * dotRR); return relativeApproxCompare(dotLR, scale, maxAllowedDifference); } }; From ce371c732aba74e6038075b3ba5f96dc12303daf Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 16 Jan 2026 15:03:17 +0700 Subject: [PATCH 409/472] fixes create from matrix by using correct row-column indexing --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index ed0f796fe8..b56c6024cd 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -146,9 +146,9 @@ struct quaternion { const scalar_type scales = hlsl::sqrt(tmp.x + scalar_type(1.0)); const scalar_type invscales = scalar_type(0.5) / scales; - retval.data.x = (m[2][1] - m[1][2]) * invscales; - retval.data.y = (m[0][2] - m[2][0]) * invscales; - retval.data.z = (m[1][0] - m[0][1]) * invscales; + retval.data.x = (m[1][2] - m[2][1]) * invscales; + retval.data.y = (m[2][0] - m[0][2]) * invscales; + retval.data.z = (m[0][1] - m[1][0]) * invscales; retval.data.w = scales * scalar_type(0.5); } else @@ -158,31 +158,31 @@ struct quaternion const scalar_type scales = hlsl::sqrt(tmp.y + scalar_type(1.0)); const scalar_type invscales = scalar_type(0.5) / scales; retval.data.x = scales * scalar_type(0.5); - retval.data.y = (m[0][1] + m[1][0]) * invscales; - retval.data.z = (m[2][0] + m[0][2]) * invscales; - retval.data.w = (m[2][1] - m[1][2]) * invscales; + retval.data.y = (m[1][0] + m[0][1]) * invscales; + retval.data.z = (m[0][2] + m[2][0]) * invscales; + retval.data.w = (m[1][2] - m[2][1]) * invscales; } else if (tmp.z > scalar_type(0.0)) { const scalar_type scales = hlsl::sqrt(tmp.z + scalar_type(1.0)); const scalar_type invscales = scalar_type(0.5) / scales; - retval.data.x = (m[0][1] + m[1][0]) * invscales; + retval.data.x = (m[1][0] + m[0][1]) * invscales; retval.data.y = scales * scalar_type(0.5); - retval.data.z = (m[1][2] + m[2][1]) * invscales; - retval.data.w = (m[0][2] - m[2][0]) * invscales; + retval.data.z = (m[2][1] + m[1][2]) * invscales; + retval.data.w = (m[2][0] - m[0][2]) * invscales; } else { const scalar_type scales = hlsl::sqrt(tmp.w + scalar_type(1.0)); const scalar_type invscales = scalar_type(0.5) / scales; - retval.data.x = (m[0][2] + m[2][0]) * invscales; - retval.data.y = (m[1][2] + m[2][1]) * invscales; + retval.data.x = (m[2][0] + m[0][2]) * invscales; + retval.data.y = (m[2][1] + m[1][2]) * invscales; retval.data.z = scales * scalar_type(0.5); - retval.data.w = (m[1][0] - m[0][1]) * invscales; + retval.data.w = (m[0][1] - m[1][0]) * invscales; } } - retval.data = hlsl::normalize(retval.data) * uniformScale; // restore uniform scale + retval.data = retval.data * uniformScale; // restore uniform scale return retval; } @@ -326,7 +326,7 @@ struct quaternion return unnormLerp(start, end, fraction, totalPseudoAngle); } - this_t inverse() NBL_CONST_MEMBER_FUNC + this_t operator-() NBL_CONST_MEMBER_FUNC { this_t retval; retval.data.xyz = -data.xyz; From c8df31adf40358139f49e6d9caa52234993a8727 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 16 Jan 2026 15:09:33 +0700 Subject: [PATCH 410/472] fix glm not liking unary - on swizzle --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index b56c6024cd..9208bc2256 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -329,7 +329,9 @@ struct quaternion this_t operator-() NBL_CONST_MEMBER_FUNC { this_t retval; - retval.data.xyz = -data.xyz; + retval.data.x = -data.x; + retval.data.y = -data.y; + retval.data.z = -data.z; retval.data.w = data.w; return retval; } From f4b0b43d8aa65d83bc2559da24c694920cfa141a Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 16 Jan 2026 15:29:13 +0700 Subject: [PATCH 411/472] do quaternion inverse not as member function --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 9208bc2256..2d294cd4be 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -326,16 +326,6 @@ struct quaternion return unnormLerp(start, end, fraction, totalPseudoAngle); } - this_t operator-() NBL_CONST_MEMBER_FUNC - { - this_t retval; - retval.data.x = -data.x; - retval.data.y = -data.y; - retval.data.z = -data.z; - retval.data.w = data.w; - return retval; - } - data_type data; }; @@ -403,6 +393,18 @@ struct static_cast_helper, matrix > }; } +template +math::quaternion inverse(const math::quaternion q) +{ + math::quaternion retval; + retval.data.x = -q.data.x; + retval.data.y = -q.data.y; + retval.data.z = -q.data.z; + retval.data.w = q.data.w; + retval.data /= hlsl::dot(q.data,q.data); + return retval; +} + } } From 5c53ae58d707341401f161fc1969400d53bf9ab1 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 16 Jan 2026 20:30:15 +0700 Subject: [PATCH 412/472] added a vector length compare --- .../hlsl/testing/vector_length_compare.hlsl | 45 +++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 46 insertions(+) create mode 100644 include/nbl/builtin/hlsl/testing/vector_length_compare.hlsl diff --git a/include/nbl/builtin/hlsl/testing/vector_length_compare.hlsl b/include/nbl/builtin/hlsl/testing/vector_length_compare.hlsl new file mode 100644 index 0000000000..03bf72b006 --- /dev/null +++ b/include/nbl/builtin/hlsl/testing/vector_length_compare.hlsl @@ -0,0 +1,45 @@ +#ifndef _NBL_BUILTIN_HLSL_TESTING_VECTOR_LENGTH_COMPARE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TESTING_VECTOR_LENGTH_COMPARE_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace testing +{ +namespace impl +{ + +template) +struct LengthCompareHelper +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPointVector) lhs, NBL_CONST_REF_ARG(FloatingPointVector) rhs, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference) + { + using traits = nbl::hlsl::vector_traits; + using scalar_t = typename traits::scalar_type; + + const scalar_t dotLL = hlsl::dot(lhs,lhs); + const scalar_t dotRR = hlsl::dot(rhs,rhs); + const scalar_t diff = hlsl::abs(dotLL-dotRR); + const scalar_t sc = hlsl::max(dotLL,dotRR); + return diff <= maxAbsoluteDifference || diff <= maxRelativeDifference*sc; + } +}; + +} + +template +bool vectorLengthCompare(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs, const float64_t maxAbsoluteDifference, const float64_t maxRelativeDifference) +{ + return impl::LengthCompareHelper::__call(lhs, rhs, maxAbsoluteDifference, maxRelativeDifference); +} + +} +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 86a0ddf9b9..7a2a2e27c2 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -378,5 +378,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") #testing LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/relative_approx_compare.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/orientation_compare.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/vector_length_compare.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From 3dbe301b39c2ac4eb73ed3132b0828451ce1f137 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 16 Jan 2026 15:55:52 +0100 Subject: [PATCH 413/472] update examples_tests submodule post merge --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 9795b5d3a3..2fee54acd4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 9795b5d3a35621126247076333f9ab17406e3ff3 +Subproject commit 2fee54acd4c69579e96b1fa66c22fcbb8d359432 From f2a4c48e70bcaba2aebc6053d22a035d1fa0e9a4 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 17 Jan 2026 16:14:34 +0100 Subject: [PATCH 414/472] Fixed quaternion to matrix cast --- examples_tests | 2 +- include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 2 +- include/nbl/builtin/hlsl/math/quaternions.hlsl | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/examples_tests b/examples_tests index 726e7393d6..4b65e7086e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 726e7393d6f4d6cd21d59ba3eaa9b9cd2d9cc91e +Subproject commit 4b65e7086e3e2f532c6d5fc258fc1c21eb983a7f diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index 97d2976c69..3ec7afbbc4 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -60,7 +60,7 @@ struct matrix final : private glm::mat inline friend matrix mul(matrix const& lhs, const ScalarT rhs) { return matrix(glm::operator*(reinterpret_cast(lhs), rhs)); - + } inline friend bool operator==(matrix const& lhs, matrix const& rhs) { return glm::operator==(reinterpret_cast(lhs), reinterpret_cast(rhs)); diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 2d294cd4be..5be9f6d36f 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -281,10 +281,9 @@ struct quaternion mat[0][0] = scalar_type(0.5) - mat[0][0]; mat[1][1] = scalar_type(0.5) - mat[1][1]; mat[2][2] = scalar_type(0.5) - mat[2][2]; - mat[0] = mat[0] * scalar_type(2.0); - mat[1] = mat[1] * scalar_type(2.0); - mat[2] = mat[2] * scalar_type(2.0); - return mat; + mat = mul(mat, scalar_type(2.0)); + + return transpose(mat); } static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart) From eb77184b633eddf741350b6b8c80ae94d2dc26a2 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 17 Jan 2026 20:17:57 +0100 Subject: [PATCH 415/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 4b65e7086e..6ec25cc953 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4b65e7086e3e2f532c6d5fc258fc1c21eb983a7f +Subproject commit 6ec25cc9536672cc62d7befb2041c200fd238264 From 1e13e63bbc84d1d24f7fba1ec238a421a3a67c35 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 19 Jan 2026 10:35:15 +0700 Subject: [PATCH 416/472] fix create from matrix indexing, construct matrix transpose --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 2d294cd4be..566f30f22f 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -146,9 +146,9 @@ struct quaternion { const scalar_type scales = hlsl::sqrt(tmp.x + scalar_type(1.0)); const scalar_type invscales = scalar_type(0.5) / scales; - retval.data.x = (m[1][2] - m[2][1]) * invscales; - retval.data.y = (m[2][0] - m[0][2]) * invscales; - retval.data.z = (m[0][1] - m[1][0]) * invscales; + retval.data.x = (m[2][1] - m[1][2]) * invscales; + retval.data.y = (m[0][2] - m[2][0]) * invscales; + retval.data.z = (m[1][0] - m[0][1]) * invscales; retval.data.w = scales * scalar_type(0.5); } else @@ -158,27 +158,27 @@ struct quaternion const scalar_type scales = hlsl::sqrt(tmp.y + scalar_type(1.0)); const scalar_type invscales = scalar_type(0.5) / scales; retval.data.x = scales * scalar_type(0.5); - retval.data.y = (m[1][0] + m[0][1]) * invscales; - retval.data.z = (m[0][2] + m[2][0]) * invscales; - retval.data.w = (m[1][2] - m[2][1]) * invscales; + retval.data.y = (m[0][1] + m[1][0]) * invscales; + retval.data.z = (m[2][0] + m[0][2]) * invscales; + retval.data.w = (m[2][1] - m[1][2]) * invscales; } else if (tmp.z > scalar_type(0.0)) { const scalar_type scales = hlsl::sqrt(tmp.z + scalar_type(1.0)); const scalar_type invscales = scalar_type(0.5) / scales; - retval.data.x = (m[1][0] + m[0][1]) * invscales; + retval.data.x = (m[0][1] + m[1][0]) * invscales; retval.data.y = scales * scalar_type(0.5); - retval.data.z = (m[2][1] + m[1][2]) * invscales; - retval.data.w = (m[2][0] - m[0][2]) * invscales; + retval.data.z = (m[1][2] + m[2][1]) * invscales; + retval.data.w = (m[0][2] - m[2][0]) * invscales; } else { const scalar_type scales = hlsl::sqrt(tmp.w + scalar_type(1.0)); const scalar_type invscales = scalar_type(0.5) / scales; - retval.data.x = (m[2][0] + m[0][2]) * invscales; - retval.data.y = (m[2][1] + m[1][2]) * invscales; + retval.data.x = (m[0][2] + m[2][0]) * invscales; + retval.data.y = (m[1][2] + m[2][1]) * invscales; retval.data.z = scales * scalar_type(0.5); - retval.data.w = (m[0][1] - m[1][0]) * invscales; + retval.data.w = (m[1][0] - m[0][1]) * invscales; } } @@ -284,7 +284,7 @@ struct quaternion mat[0] = mat[0] * scalar_type(2.0); mat[1] = mat[1] * scalar_type(2.0); mat[2] = mat[2] * scalar_type(2.0); - return mat; + return hlsl::transpose(mat); } static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart) From e014eee94c32841bae5cb187efd89373409b3f43 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 19 Jan 2026 14:34:43 +0700 Subject: [PATCH 417/472] added braces to silence warning --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 566f30f22f..44624fb2d5 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -117,7 +117,9 @@ struct quaternion return retval; } else + { assert(valid); + } } if (uniformScaleSq < numeric_limits::min) { From 85d169e19168d21807f9d7f74152a3c19c1ddafa Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 19 Jan 2026 15:25:48 +0700 Subject: [PATCH 418/472] make decode a quantized sequence member instead --- .../hlsl/sampling/quantized_sequence.hlsl | 156 ++++++++++++++---- 1 file changed, 123 insertions(+), 33 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index bd763ad6d7..2769d6e8dc 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -39,61 +39,53 @@ struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0 template<> struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; -template +template struct decode_before_scramble_helper { - using scalar_type = typename vector_traits::scalar_type; - using uvec_type = vector; - using sequence_type = QuantizedSequence; - using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; - - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) + using scalar_type = typename Q::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; + using uvec_type = vector; + using sequence_type = Q; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; + + return_type operator()(const uvec_type scrambleKey) { uvec_type seqVal; - NBL_UNROLL for(uint16_t i = 0; i < D; i++) + NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) seqVal[i] = val.get(i); seqVal ^= scrambleKey; return return_type(seqVal) * bit_cast >(UNormConstant); } + + sequence_type val; }; -template +template struct decode_after_scramble_helper { - using scalar_type = typename vector_traits::scalar_type; - using uvec_type = vector; - using sequence_type = QuantizedSequence; - using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; - - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, NBL_CONST_REF_ARG(sequence_type) scrambleKey) + using scalar_type = typename Q::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; + using uvec_type = vector; + using sequence_type = Q; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; + + return_type operator()(NBL_CONST_REF_ARG(sequence_type) scrambleKey) { sequence_type scramble; scramble.data = val.data ^ scrambleKey.data; uvec_type seqVal; - NBL_UNROLL for(uint16_t i = 0; i < D; i++) + NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) seqVal[i] = scramble.get(i); return return_type(seqVal) * bit_cast >(UNormConstant); } + + sequence_type val; }; template -NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4; -} - -// post-decode scramble -template -vector decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector,D> scrambleKey) -{ - return impl::decode_before_scramble_helper::__call(val, scrambleKey); -} - -// pre-decode scramble -template -vector decode(NBL_CONST_REF_ARG(QuantizedSequence) val, NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) -{ - return impl::decode_after_scramble_helper::__call(val, scrambleKey); +NBL_BOOL_CONCEPT SequenceSpecialization = concepts::IntVector && size_of_v::scalar_type> <= 4; } // all Dim=1 @@ -101,11 +93,28 @@ template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization) struct QuantizedSequence) > { using store_type = T; + using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = uint16_t(1u); store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } + template + vector decode(const vector,Dimension> scrambleKey) + { + impl::decode_before_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + template + vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + { + impl::decode_after_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + store_type data; }; @@ -114,8 +123,10 @@ template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecializat struct QuantizedSequence && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) > { using store_type = T; + using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; static QuantizedSequence create(const vector value) { @@ -137,6 +148,21 @@ struct QuantizedSequence + vector decode(const vector,Dimension> scrambleKey) + { + impl::decode_before_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + template + vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + { + impl::decode_after_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + store_type data; }; @@ -147,10 +173,26 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } + template + vector decode(const vector,Dimension> scrambleKey) + { + impl::decode_before_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + template + vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + { + impl::decode_after_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + store_type data; }; @@ -163,6 +205,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; static QuantizedSequence create(const vector value) { @@ -201,6 +244,21 @@ struct QuantizedSequence + vector decode(const vector,Dimension> scrambleKey) + { + impl::decode_before_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + template + vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + { + impl::decode_after_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + store_type data; }; @@ -212,6 +270,7 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; static QuantizedSequence create(const vector value) { @@ -247,6 +306,21 @@ struct QuantizedSequence + vector decode(const vector,Dimension> scrambleKey) + { + impl::decode_before_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + template + vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + { + impl::decode_after_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + store_type data; }; @@ -262,6 +336,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; base_type get(const uint16_t idx) { @@ -313,6 +388,21 @@ struct QuantizedSequence + vector decode(const vector,Dimension> scrambleKey) + { + impl::decode_before_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + template + vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + { + impl::decode_after_scramble_helper,F> helper; + helper.val.data = data; + return helper(scrambleKey); + } + store_type data; // data[0] = | -- x 32 bits -- | // data[1] = MSB | -- y 22 bits -- | -- x 10 bits -- | LSB From 64fb4d6f457f68cf08eb8731d38c442a898f9ed7 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 19 Jan 2026 17:03:44 +0700 Subject: [PATCH 419/472] added create factory for all quantized sequences, added encode method for taking unorm input --- .../hlsl/sampling/quantized_sequence.hlsl | 140 +++++++++++++----- 1 file changed, 106 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 2769d6e8dc..dfc22634f6 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -39,15 +39,34 @@ struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0 template<> struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; +template +struct encode_helper +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; + using sequence_type = Q; + using unorm_vec_type = vector; + using unsigned_scalar_type = unsigned_integer_of_size_t; + using uvec_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << (8u * size_of_v - 1u)) - 1u; + + static sequence_type __call(const unorm_vec_type unormvec) + { + uvec_type asuint; + NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) + asuint[i] = unsigned_scalar_type(unormvec[i] * UNormMultiplier); + return sequence_type::create(asuint); + } +}; + template struct decode_before_scramble_helper { - using scalar_type = typename Q::scalar_type; + using unsigned_scalar_type = typename Q::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; using uvec_type = vector; using sequence_type = Q; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; return_type operator()(const uvec_type scrambleKey) { @@ -55,7 +74,7 @@ struct decode_before_scramble_helper NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) seqVal[i] = val.get(i); seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast >(UNormConstant); + return return_type(seqVal) * bit_cast >(UNormConstant); } sequence_type val; @@ -63,7 +82,7 @@ struct decode_before_scramble_helper template struct decode_after_scramble_helper { - using scalar_type = typename Q::scalar_type; + using unsigned_scalar_type = typename Q::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; using uvec_type = vector; using sequence_type = Q; @@ -78,39 +97,53 @@ struct decode_after_scramble_helper uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) seqVal[i] = scramble.get(i); - return return_type(seqVal) * bit_cast >(UNormConstant); + return return_type(seqVal) * bit_cast >(UNormConstant); } sequence_type val; }; template -NBL_BOOL_CONCEPT SequenceSpecialization = concepts::IntVector && size_of_v::scalar_type> <= 4; +NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4; } // all Dim=1 template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization) struct QuantizedSequence) > { + using this_t = QuantizedSequence; using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = uint16_t(1u); + static this_t create(const store_type value) + { + this_t seq; + seq.data = value; + return seq; + } + store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } + template + static this_t encode(const vector value) + { + return impl::encode_helper::__call(value); + } + template vector decode(const vector,Dimension> scrambleKey) { - impl::decode_before_scramble_helper,F> helper; + impl::decode_before_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } template - vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) { - impl::decode_after_scramble_helper,F> helper; + impl::decode_after_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } @@ -122,16 +155,18 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) struct QuantizedSequence && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) > { + using this_t = QuantizedSequence; using store_type = T; - using scalar_type = typename vector_traits::scalar_type; + using scalar_type = store_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; - static QuantizedSequence create(const vector value) + static this_t create(const vector value) { - QuantizedSequence seq; - NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + this_t seq; + seq.data = store_type(0u); + NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++) seq.set(i, value[i]); return seq; } @@ -148,17 +183,23 @@ struct QuantizedSequence + static this_t encode(const vector value) + { + return impl::encode_helper::__call(value); + } + template vector decode(const vector,Dimension> scrambleKey) { - impl::decode_before_scramble_helper,F> helper; + impl::decode_before_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } template - vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) { - impl::decode_after_scramble_helper,F> helper; + impl::decode_after_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } @@ -170,25 +211,39 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) struct QuantizedSequence && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) > { + using this_t = QuantizedSequence; using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; + static this_t create(const store_type value) + { + this_t seq; + seq.data = value; + return seq; + } + scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } + template + static this_t encode(const vector value) + { + return impl::encode_helper::__call(value); + } + template vector decode(const vector,Dimension> scrambleKey) { - impl::decode_before_scramble_helper,F> helper; + impl::decode_before_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } template - vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) { - impl::decode_after_scramble_helper,F> helper; + impl::decode_after_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } @@ -200,6 +255,7 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 2 && Dim == 3) struct QuantizedSequence && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 2 && Dim == 3) > { + using this_t = QuantizedSequence; using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; @@ -207,10 +263,11 @@ struct QuantizedSequence) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; - static QuantizedSequence create(const vector value) + static this_t create(const vector value) { - QuantizedSequence seq; - NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + this_t seq; + seq.data = hlsl::promote(0u); + NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++) seq.set(i, value[i]); return seq; } @@ -244,17 +301,23 @@ struct QuantizedSequence + static this_t encode(const vector value) + { + return impl::encode_helper::__call(value); + } + template vector decode(const vector,Dimension> scrambleKey) { - impl::decode_before_scramble_helper,F> helper; + impl::decode_before_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } template - vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) { - impl::decode_after_scramble_helper,F> helper; + impl::decode_after_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } @@ -266,16 +329,18 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 2 && vector_traits::Dimension == 2 && Dim == 4) struct QuantizedSequence && size_of_v::scalar_type> == 2 && vector_traits::Dimension == 2 && Dim == 4) > { + using this_t = QuantizedSequence; using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; - static QuantizedSequence create(const vector value) + static this_t create(const vector value) { - QuantizedSequence seq; - NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + this_t seq; + seq.data = hlsl::promote(0u); + NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++) seq.set(i, value[i]); return seq; } @@ -306,17 +371,23 @@ struct QuantizedSequence + static this_t encode(const vector value) + { + return impl::encode_helper::__call(value); + } + template vector decode(const vector,Dimension> scrambleKey) { - impl::decode_before_scramble_helper,F> helper; + impl::decode_before_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } template - vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) { - impl::decode_after_scramble_helper,F> helper; + impl::decode_after_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } @@ -331,6 +402,7 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 4 && Dim == 3) struct QuantizedSequence && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 4 && Dim == 3) > { + using this_t = QuantizedSequence; using store_type = T; using scalar_type = typename vector_traits::scalar_type; using base_type = vector; @@ -391,14 +463,14 @@ struct QuantizedSequence vector decode(const vector,Dimension> scrambleKey) { - impl::decode_before_scramble_helper,F> helper; + impl::decode_before_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } template - vector decode(NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) + vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) { - impl::decode_after_scramble_helper,F> helper; + impl::decode_after_scramble_helper helper; helper.val.data = data; return helper(scrambleKey); } From a98bcf5c20f4cc475717decf67e23d183af18f09 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 19 Jan 2026 12:08:53 +0100 Subject: [PATCH 420/472] Removed scalar matrix multiplication, too much hustle to make it work --- examples_tests | 2 +- include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/examples_tests b/examples_tests index 6ec25cc953..4dfd279b57 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 6ec25cc9536672cc62d7befb2041c200fd238264 +Subproject commit 4dfd279b573227ebed8d544b00a4c4ba951fa22a diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index 3ec7afbbc4..5be66ebae7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -51,16 +51,6 @@ struct matrix final : private glm::mat { return glm::operator*(reinterpret_cast(rhs), lhs); } - template - inline friend matrix mul(const ScalarT lhs, matrix const& rhs) - { - return matrix(glm::operator*(lhs, reinterpret_cast(rhs))); - } - template - inline friend matrix mul(matrix const& lhs, const ScalarT rhs) - { - return matrix(glm::operator*(reinterpret_cast(lhs), rhs)); - } inline friend bool operator==(matrix const& lhs, matrix const& rhs) { return glm::operator==(reinterpret_cast(lhs), reinterpret_cast(rhs)); From ed47eb85a6cf85067e8b3fa906d7332c43966d66 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 19 Jan 2026 12:21:51 +0100 Subject: [PATCH 421/472] Fixed quaternions --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 85069634fb..f838156c86 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -283,7 +283,9 @@ struct quaternion mat[0][0] = scalar_type(0.5) - mat[0][0]; mat[1][1] = scalar_type(0.5) - mat[1][1]; mat[2][2] = scalar_type(0.5) - mat[2][2]; - mat = hlsl::mul(mat, scalar_type(2.0)); + mat[0] = mat[0] * scalar_type(2.0); + mat[1] = mat[1] * scalar_type(2.0); + mat[2] = mat[2] * scalar_type(2.0); return hlsl::transpose(mat); } From 0ed92412c6ad66b01c6e22684c93b0260c5b8615 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 19 Jan 2026 13:39:51 +0100 Subject: [PATCH 422/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 4dfd279b57..1ca5358d55 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4dfd279b573227ebed8d544b00a4c4ba951fa22a +Subproject commit 1ca5358d553484fad0892b28542064678c053f49 From 9d645cd9016eea2bbd545d87d94b319512ae3b66 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 19 Jan 2026 16:54:05 +0100 Subject: [PATCH 423/472] Fixed a weird bug --- src/nbl/ext/MitsubaLoader/CElementEmitter.cpp | 2 +- src/nbl/ext/MitsubaLoader/PropertyElement.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp index 831de4506e..00cf848067 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp @@ -59,7 +59,7 @@ auto CElementEmitter::compAddPropertyMap() -> AddPropertyMap const auto lookAtGLM = reinterpret_cast(glm::lookAtRH({},target,up)); const auto lookAt = hlsl::transpose(lookAtGLM); // turn lookat into a rotation matrix - const auto rotation = hlsl::inverse(hlsl::float32_t3x3(lookAt)); + const auto rotation = hlsl::inverse(hlsl::float32_t3x3(lookAt)); //_NBL_DEBUG_BREAK_IF(true); // no idea if matrix is correct, looks okay for (auto r=0; r<3; r++) _this->transform.matrix[r].xyz = rotation[r]; diff --git a/src/nbl/ext/MitsubaLoader/PropertyElement.cpp b/src/nbl/ext/MitsubaLoader/PropertyElement.cpp index 1beff22eb7..6283076825 100644 --- a/src/nbl/ext/MitsubaLoader/PropertyElement.cpp +++ b/src/nbl/ext/MitsubaLoader/PropertyElement.cpp @@ -260,7 +260,7 @@ std::optional CPropertyElementManager::createPropertyData const auto lookAtGLM = reinterpret_cast(glm::lookAtLH(origin,target,up)); const auto lookAt = hlsl::transpose(lookAtGLM); // mitsuba understands look-at and right-handed camera little bit differently than I do - const auto rotation = hlsl::inverse(hlsl::float32_t3x3(lookAt)); + const auto rotation = hlsl::inverse(hlsl::float32_t3x3(lookAt)); // set the origin to avoid numerical issues for (auto r=0; r<3; r++) { From dee183bc7dd81f72aa61c7c8a9efa4eaffa82a86 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 19 Jan 2026 18:10:03 +0100 Subject: [PATCH 424/472] Updated examples --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 1ca5358d55..587cbff28b 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 1ca5358d553484fad0892b28542064678c053f49 +Subproject commit 587cbff28b1d0b42f2f704c3ba9b247ad0276590 From 1223e88db2f17ac059e6d28001ac77ba3028c816 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 20 Jan 2026 11:06:54 +0700 Subject: [PATCH 425/472] fixes to removal of core::matrix --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 4 +-- .../transformation_matrix_utils.hlsl | 29 +++++-------------- 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index f838156c86..25fa61162d 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -110,16 +110,16 @@ struct quaternion uniformScaleSq = traits.uniformScaleSq; if (dontAssertValidMatrix) + { if (!valid) { this_t retval; retval.data = hlsl::promote(bit_cast(numeric_limits::quiet_NaN)); return retval; } + } else - { assert(valid); - } } if (uniformScaleSq < numeric_limits::min) { diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 1ad16dc28d..df56d46549 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ #include +#include namespace nbl { @@ -125,30 +126,16 @@ inline matrix buildCameraLookAtMatrixRH( //! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged template -inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(core::quaternion) quat) +inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(math::quaternion) quat) { static_assert(N == 3 || N == 4); + matrix mat = _static_cast>(quat); - outMat[0] = vector( - 1 - 2 * (quat.y * quat.y + quat.z * quat.z), - 2 * (quat.x * quat.y - quat.z * quat.w), - 2 * (quat.x * quat.z + quat.y * quat.w), - outMat[0][3] - ); - - outMat[1] = vector( - 2 * (quat.x * quat.y + quat.z * quat.w), - 1 - 2 * (quat.x * quat.x + quat.z * quat.z), - 2 * (quat.y * quat.z - quat.x * quat.w), - outMat[1][3] - ); - - outMat[2] = vector( - 2 * (quat.x * quat.z - quat.y * quat.w), - 2 * (quat.y * quat.z + quat.x * quat.w), - 1 - 2 * (quat.x * quat.x + quat.y * quat.y), - outMat[2][3] - ); + outMat[0] = mat[0]; + + outMat[1] = mat[1]; + + outMat[2] = mat[2]; } template From cd67d94fed332d3c13a3bf95f66f9c334763f039 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 20 Jan 2026 11:08:35 +0700 Subject: [PATCH 426/472] removed sequence partial spec data too big, minor fixes --- .../hlsl/sampling/quantized_sequence.hlsl | 97 ++----------------- 1 file changed, 6 insertions(+), 91 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index dfc22634f6..70ca28b70d 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -252,8 +252,8 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 2 && Dim == 3) -struct QuantizedSequence && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 2 && Dim == 3) > +template NBL_PARTIAL_REQ_TOP(is_same_v && Dim == 3) +struct QuantizedSequence && Dim == 3) > { using this_t = QuantizedSequence; using store_type = T; @@ -326,8 +326,8 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 2 && vector_traits::Dimension == 2 && Dim == 4) -struct QuantizedSequence && size_of_v::scalar_type> == 2 && vector_traits::Dimension == 2 && Dim == 4) > +template NBL_PARTIAL_REQ_TOP(is_same_v && Dim == 4) +struct QuantizedSequence && Dim == 4) > { using this_t = QuantizedSequence; using store_type = T; @@ -348,7 +348,7 @@ struct QuantizedSequence= 0 && idx < 4); - if (idx >= 0 && idx < 2) // x y + if (idx < 2) // x y { return glsl::bitfieldExtract(data[0], BitsPerComponent * idx, BitsPerComponent); } @@ -361,7 +361,7 @@ struct QuantizedSequence= 0 && idx < 4); - if (idx >= 0 && idx < 2) // x y + if (idx < 2) // x y { glsl::bitfieldInsert(data[0], value, BitsPerComponent * idx, BitsPerComponent); } @@ -397,91 +397,6 @@ struct QuantizedSequence returns uint32_t2 - 42 bits per component: 32 in x, 10 in y -// use uint32_t2 instead of uint16_t4 -template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 4 && Dim == 3) -struct QuantizedSequence && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 4 && Dim == 3) > -{ - using this_t = QuantizedSequence; - using store_type = T; - using scalar_type = typename vector_traits::scalar_type; - using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; - NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; - - base_type get(const uint16_t idx) - { - assert(idx >= 0 && idx < 3); - if (idx == 0) // x - { - base_type x; - x[0] = data[0]; - x[1] = glsl::bitfieldExtract(data[1], 0u, 10u); - return x; - } - else if (idx == 1) // y - { - base_type y; - y[0] = glsl::bitfieldExtract(data[1], 10u, 22u); - y[0] |= glsl::bitfieldExtract(data[2], 0u, 10u) << 22u; - y[1] = glsl::bitfieldExtract(data[2], 10u, 10u); - return y; - } - else // z - { - base_type z; - z[0] = glsl::bitfieldInsert(data[2], 20u, 12u); - z[0] |= glsl::bitfieldInsert(data[3], 0u, 20u) << 12u; - z[1] = glsl::bitfieldInsert(data[3], 20u, 10u); - return z; - } - } - - void set(const uint16_t idx, const base_type value) - { - assert(idx >= 0 && idx < 3); - if (idx == 0) // x - { - data[0] = value[0]; - glsl::bitfieldInsert(data[1], value[1], 0u, 10u); - } - else if (idx == 1) // y - { - glsl::bitfieldInsert(data[1], value[0], 10u, 22u); - glsl::bitfieldInsert(data[2], value[0] >> 22u, 0u, 10u); - glsl::bitfieldInsert(data[2], value[1], 10u, 10u); - } - else // z - { - glsl::bitfieldInsert(data[2], value[0], 20u, 12u); - glsl::bitfieldInsert(data[3], value[0] >> 12u, 0u, 20u); - glsl::bitfieldInsert(data[3], value[1], 20u, 10u); - } - } - - template - vector decode(const vector,Dimension> scrambleKey) - { - impl::decode_before_scramble_helper helper; - helper.val.data = data; - return helper(scrambleKey); - } - template - vector decode(NBL_CONST_REF_ARG(this_t) scrambleKey) - { - impl::decode_after_scramble_helper helper; - helper.val.data = data; - return helper(scrambleKey); - } - - store_type data; - // data[0] = | -- x 32 bits -- | - // data[1] = MSB | -- y 22 bits -- | -- x 10 bits -- | LSB - // data[2] = MSB | -- z 12 bits -- | -- y 20 bits -- | LSB - // data[3] = | -- z 30 bits -- | -}; - } } From e3adac0d28dda099617327d1a8f01e5af9d72c62 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 21 Jan 2026 04:56:01 +0300 Subject: [PATCH 427/472] update examples_tests --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 15e4d5d044..3e39f036cd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 15e4d5d044d0b682279fcce5486a841e1f3d3541 +Subproject commit 3e39f036cda70bc7a8e4dccdfe99d59a60b0a263 From 9ddc95482de9cc6ec719f840951f630fe6d36730 Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 23 Jan 2026 15:01:49 +0100 Subject: [PATCH 428/472] minor improvements and bugfixes --- include/nbl/ext/MitsubaLoader/CElementSensor.h | 1 + .../video/utilities/CDefaultSwapchainFramebuffers.h | 11 +++++++++-- include/nbl/video/utilities/CSimpleResizeSurface.h | 1 + src/nbl/video/CVulkanLogicalDevice.cpp | 2 +- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementSensor.h b/include/nbl/ext/MitsubaLoader/CElementSensor.h index fecd248ad0..d240830902 100644 --- a/include/nbl/ext/MitsubaLoader/CElementSensor.h +++ b/include/nbl/ext/MitsubaLoader/CElementSensor.h @@ -263,6 +263,7 @@ class CElementSensor final : public IElement };*/ union { + CameraBase base; PerspectivePinhole perspective; PerspectiveThinLens thinlens; Orthographic orthographic; diff --git a/include/nbl/video/utilities/CDefaultSwapchainFramebuffers.h b/include/nbl/video/utilities/CDefaultSwapchainFramebuffers.h index 190fa81e70..285d7f46a7 100644 --- a/include/nbl/video/utilities/CDefaultSwapchainFramebuffers.h +++ b/include/nbl/video/utilities/CDefaultSwapchainFramebuffers.h @@ -17,7 +17,10 @@ namespace nbl::video class CDefaultSwapchainFramebuffers : public ISimpleManagedSurface::ISwapchainResources { public: - inline CDefaultSwapchainFramebuffers(ILogicalDevice* device, const asset::E_FORMAT format, const IGPURenderpass::SCreationParams::SSubpassDependency* dependencies) : m_device(device) + inline CDefaultSwapchainFramebuffers( + ILogicalDevice* device, const asset::E_FORMAT format, const IGPURenderpass::SCreationParams::SSubpassDependency* dependencies, + IGPURenderpass::LOAD_OP loadOp = IGPURenderpass::LOAD_OP::CLEAR + ) : m_device(device) { // If we create the framebuffers by default, we also need to default the renderpass (except dependencies) static const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = { @@ -27,7 +30,7 @@ class CDefaultSwapchainFramebuffers : public ISimpleManagedSurface::ISwapchainRe .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, .mayAlias = false }, - /*.loadOp = */IGPURenderpass::LOAD_OP::CLEAR, + /*.loadOp = */loadOp, /*.storeOp = */IGPURenderpass::STORE_OP::STORE, /*.initialLayout = */IGPUImage::LAYOUT::UNDEFINED, // because we clear we don't care about contents /*.finalLayout = */ IGPUImage::LAYOUT::PRESENT_SRC // transition to presentation right away so we can skip a barrier @@ -51,6 +54,10 @@ class CDefaultSwapchainFramebuffers : public ISimpleManagedSurface::ISwapchainRe m_renderpass = m_device->createRenderpass(m_params); return m_renderpass.get(); } + inline const IGPURenderpass* getRenderpass() const + { + return m_renderpass.get(); + } inline IGPUFramebuffer* getFramebuffer(const uint8_t imageIx) { diff --git a/include/nbl/video/utilities/CSimpleResizeSurface.h b/include/nbl/video/utilities/CSimpleResizeSurface.h index 3e9abc6e25..126c9d179e 100644 --- a/include/nbl/video/utilities/CSimpleResizeSurface.h +++ b/include/nbl/video/utilities/CSimpleResizeSurface.h @@ -57,6 +57,7 @@ class CSimpleResizeSurface final : public ISimpleManagedSurface // Can be public because we don't need to worry about mutexes unlike the Smooth Resize class inline ISwapchainResources* getSwapchainResources() override {return m_swapchainResources.get();} + inline const ISwapchainResources* getSwapchainResources() const {return m_swapchainResources.get();} // need to see if the swapchain is invalidated (e.g. because we're starting from 0-area old Swapchain) and try to recreate the swapchain inline SAcquireResult acquireNextImage() diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 5390b4c3fa..46b79a7094 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1551,7 +1551,6 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( for (const auto& info : createInfos) { - core::unordered_map shaderIndexes; auto getVkShaderIndex = [&](const IGPUPipelineBase::SShaderSpecInfo& spec) { @@ -1633,6 +1632,7 @@ void CVulkanLogicalDevice::createRayTracingPipelines_impl( { outCreateInfo->pDynamicState = &vk_dynamicStateCreateInfo; } + outCreateInfo++; } auto vk_pipelines = reinterpret_cast(output); From 94696a0e28e8e5de95424b91aa1e290eb3ead76f Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 26 Jan 2026 22:45:58 +0100 Subject: [PATCH 429/472] fix minor bugs --- include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl | 2 +- include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl index 7509eac493..6ef687c506 100644 --- a/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl @@ -21,7 +21,7 @@ struct ResolveParameters float NOverKappa; }; -ResolveParameters computeResolveParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) +inline ResolveParameters computeResolveParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) { ResolveParameters retval; retval.lastCascadeIndex = cascadeSize - 1u; diff --git a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h index 1abebf23ea..839e7e07e4 100644 --- a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h +++ b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h @@ -42,7 +42,7 @@ struct ProtoPipeline final inline core::smart_refctd_ptr createPipeline( const video::IGPUPipelineBase::SShaderSpecInfo& fragShader, video::IGPUPipelineLayout* layout, - video::IGPURenderpass* renderpass, + const video::IGPURenderpass* renderpass, const uint32_t subpassIx=0, asset::SBlendParams blendParams = {}, const hlsl::SurfaceTransform::FLAG_BITS swapchainTransform=hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT From 04e663847bfc8789a8c3c577c0281af1ea10ad74 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 26 Jan 2026 22:46:36 +0100 Subject: [PATCH 430/472] fix commandpool reset handling (being false failed) --- src/nbl/video/IGPUCommandBuffer.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 1f619666ab..799dcccf20 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -50,14 +50,6 @@ bool IGPUCommandBuffer::checkStateBeforeRecording(const core::bitflag flags, const SInheritanceInfo* inheritanceInfo) { - // Using Vulkan 1.2 VUIDs here because we don't want to confuse ourselves with Dynamic Rendering being core - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkBeginCommandBuffer-commandBuffer-00049 - if (m_state == STATE::RECORDING || m_state == STATE::PENDING) - { - NBL_LOG_ERROR("command buffer must not be in RECORDING or PENDING state!"); - return false; - } - const bool whollyInsideRenderpass = flags.hasFlags(USAGE::RENDER_PASS_CONTINUE_BIT); const auto physDev = getOriginDevice()->getPhysicalDevice(); if (m_level==IGPUCommandPool::BUFFER_LEVEL::PRIMARY) @@ -126,6 +118,14 @@ bool IGPUCommandBuffer::begin(const core::bitflag flags, const SInheritan } checkForParentPoolReset(); + + // Using Vulkan 1.2 VUIDs here because we don't want to confuse ourselves with Dynamic Rendering being core + // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkBeginCommandBuffer-commandBuffer-00049 + if (m_state == STATE::RECORDING || m_state == STATE::PENDING) + { + NBL_LOG_ERROR("command buffer must not be in RECORDING or PENDING state!"); + return false; + } // still not initial and pool wasn't reset if (m_state!=STATE::INITIAL) From 4320efc4c3810c557f65a0199e36851f3851cdcd Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 26 Jan 2026 23:26:14 +0100 Subject: [PATCH 431/472] point at working example 40 --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 5df217517f..11b3b46dc2 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5df217517fd5af0964b6d170afb68d5194daf60d +Subproject commit 11b3b46dc2d681abf3d724a26e53c0f60e42c6e0 From 7753681fc86b4f222bc5d49ef6ab83e17b836d0e Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 10:14:34 +0100 Subject: [PATCH 432/472] improve the SBT api for RT pipeline --- examples_tests | 2 +- include/nbl/asset/IBuffer.h | 9 ++ include/nbl/asset/ICPURayTracingPipeline.h | 4 +- include/nbl/asset/IPipelineLayout.h | 1 + include/nbl/asset/IRayTracingPipeline.h | 125 +++++++++++++----- .../nbl/builtin/hlsl/indirect_commands.hlsl | 6 +- include/nbl/video/CVulkanCommon.h | 7 +- include/nbl/video/IGPUCommandBuffer.h | 26 +--- include/nbl/video/IGPURayTracingPipeline.h | 7 +- .../video/utilities/ISimpleManagedSurface.h | 14 +- src/nbl/video/CVulkanCommandBuffer.cpp | 17 +-- src/nbl/video/CVulkanCommandBuffer.h | 7 +- src/nbl/video/IGPUCommandBuffer.cpp | 88 ++++-------- 13 files changed, 155 insertions(+), 158 deletions(-) diff --git a/examples_tests b/examples_tests index 49d1a6e65d..4d917ddeb3 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 49d1a6e65db98bbad7f11ea7375194581f9567ff +Subproject commit 4d917ddeb3de1109ccf7250a591b0550235979aa diff --git a/include/nbl/asset/IBuffer.h b/include/nbl/asset/IBuffer.h index 6f8c1bb35b..3a7cbb5983 100644 --- a/include/nbl/asset/IBuffer.h +++ b/include/nbl/asset/IBuffer.h @@ -120,6 +120,15 @@ struct SBufferRange inline bool operator!=(const SBufferRange& rhs) const { return !operator==(rhs); } }; +template +struct SStridedRange +{ + inline operator bool() const {return range.isValid();} + + SBufferRange range = {}; + uint32_t stride = 0; +}; + } namespace std diff --git a/include/nbl/asset/ICPURayTracingPipeline.h b/include/nbl/asset/ICPURayTracingPipeline.h index 01cf4a3f28..e882b4b4cb 100644 --- a/include/nbl/asset/ICPURayTracingPipeline.h +++ b/include/nbl/asset/ICPURayTracingPipeline.h @@ -13,9 +13,9 @@ namespace nbl::asset { //! CPU Version of RayTracing Pipeline -class ICPURayTracingPipeline final : public ICPUPipeline> +class ICPURayTracingPipeline final : public ICPUPipeline> { - using pipeline_base_t = IRayTracingPipeline; + using pipeline_base_t = IRayTracingPipeline; using base_t = ICPUPipeline; public: diff --git a/include/nbl/asset/IPipelineLayout.h b/include/nbl/asset/IPipelineLayout.h index 430c812dcb..0244aaaa06 100644 --- a/include/nbl/asset/IPipelineLayout.h +++ b/include/nbl/asset/IPipelineLayout.h @@ -89,6 +89,7 @@ template class IPipelineLayout { public: + using desc_layout_t = DescLayoutType; static inline constexpr uint32_t DESCRIPTOR_SET_COUNT = 4u; std::span getDescriptorSetLayouts() const diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index fe318d271a..23d2fac81f 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -13,49 +13,102 @@ namespace nbl::asset class IRayTracingPipelineBase : public virtual core::IReferenceCounted { - public: - #define base_flag(F) static_cast(IPipelineBase::FLAGS::F) - enum class CreationFlags : uint64_t - { - NONE = base_flag(NONE), - // there's a bit of a problem, as the ICPUCompute and Graphics pipelines don't care about flags, because the following 4 flags - DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS), - ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES), - FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED), - EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), - // don't matter for ICPU Pipelines, we'd really need to have these separate from `base_flag` and use the `IRayTracingPipelineBase::CreationFlags` for the ICPU creation params only - SKIP_BUILT_IN_PRIMITIVES = 1<<12, - SKIP_AABBS = 1<<13, - NO_NULL_ANY_HIT_SHADERS = 1<<14, - NO_NULL_CLOSEST_HIT_SHADERS = 1<<15, - NO_NULL_MISS_SHADERS = 1<<16, - NO_NULL_INTERSECTION_SHADERS = 1<<17, - ALLOW_MOTION = 1<<20, - }; - #undef base_flag - - struct SCachedCreationParams final - { - core::bitflag flags = CreationFlags::NONE; - uint32_t maxRecursionDepth : 6 = 0; - uint32_t dynamicStackSize : 1 = false; - }; + public: + #define base_flag(F) static_cast(IPipelineBase::FLAGS::F) + enum class CreationFlags : uint64_t + { + NONE = base_flag(NONE), + // there's a bit of a problem, as the ICPUCompute and Graphics pipelines don't care about flags, because the following 4 flags + DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS), + ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES), + FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED), + EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), + // don't matter for ICPU Pipelines, we'd really need to have these separate from `base_flag` and use the `IRayTracingPipelineBase::CreationFlags` for the ICPU creation params only + SKIP_BUILT_IN_PRIMITIVES = 1<<12, + SKIP_AABBS = 1<<13, + NO_NULL_ANY_HIT_SHADERS = 1<<14, + NO_NULL_CLOSEST_HIT_SHADERS = 1<<15, + NO_NULL_MISS_SHADERS = 1<<16, + NO_NULL_INTERSECTION_SHADERS = 1<<17, + ALLOW_MOTION = 1<<20, + }; + #undef base_flag + + struct SCachedCreationParams final + { + core::bitflag flags = CreationFlags::NONE; + uint32_t maxRecursionDepth : 6 = 0; + uint32_t dynamicStackSize : 1 = false; + }; }; -template +template class IRayTracingPipeline : public IPipeline, public IRayTracingPipelineBase { - public: + public: + struct SShaderBindingTable + { + inline bool valid(const core::bitflag flags) const + { + return valid(flags,[](const std::string_view, auto... args)->void{}); + } + template + inline bool valid(const core::bitflag flags, Callback&& cb) const + { + using create_flag_e = IRayTracingPipelineBase::CreationFlags; + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03696 + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03697 + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03512 + const auto shouldHaveHitGroup = flags & (core::bitflag(create_flag_e::NO_NULL_ANY_HIT_SHADERS) | create_flag_e::NO_NULL_CLOSEST_HIT_SHADERS | create_flag_e::NO_NULL_INTERSECTION_SHADERS); + if (shouldHaveHitGroup && !hit.range.buffer) + { + cb("bound pipeline indicates that traceRays command should have hit group, but SRayTracingSBT::hit::range::buffer is null!"); + return false; + } + + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03511 + const auto shouldHaveMissGroup = flags & create_flag_e::NO_NULL_MISS_SHADERS; + if (shouldHaveMissGroup && !miss.range.buffer) + { + cb("bound pipeline indicates that traceRays command should have miss group, but SRayTracingSBT::hit::range::buffer is null!"); + return false; + } + + auto invalidBufferRegion = [&cb](const SStridedRange& stRange, const char* groupName) -> bool + { + const auto& range = stRange.range; + const auto* const buffer = range.buffer.get(); + if (!buffer) + return false; + + if (!range.isValid()) + { + cb("%s buffer range is not valid!",groupName); + return false; + } + + return false; + }; + + if (invalidBufferRegion({.range=raygen},"Raygen Group")) return false; + if (invalidBufferRegion(miss,"Miss groups")) return false; + if (invalidBufferRegion(hit,"Hit groups")) return false; + if (invalidBufferRegion(callable,"Callable groups")) return false; + + return true; + } + + asset::SBufferRange raygen = {}; + asset::SStridedRange miss = {}, hit = {}, callable = {}; + }; - inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } + inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } - protected: - explicit IRayTracingPipeline(PipelineLayoutType* layout, const SCachedCreationParams& cachedParams) : - IPipeline(core::smart_refctd_ptr(layout)), - m_params(cachedParams) - {} + protected: + explicit inline IRayTracingPipeline(PipelineLayoutType* layout, const SCachedCreationParams& cachedParams) : + IPipeline(core::smart_refctd_ptr(layout)), m_params(cachedParams) {} - SCachedCreationParams m_params; + SCachedCreationParams m_params; }; diff --git a/include/nbl/builtin/hlsl/indirect_commands.hlsl b/include/nbl/builtin/hlsl/indirect_commands.hlsl index ca8418bde7..db8c7cb1af 100644 --- a/include/nbl/builtin/hlsl/indirect_commands.hlsl +++ b/include/nbl/builtin/hlsl/indirect_commands.hlsl @@ -50,9 +50,9 @@ struct TraceRaysIndirectCommand_t uint64_t callableShaderBindingTableAddress; uint64_t callableShaderBindingTableSize; uint64_t callableShaderBindingTableStride; - uint32_t width; - uint32_t height; - uint32_t depth; + uint32_t width; + uint32_t height; + uint32_t depth; }; } diff --git a/include/nbl/video/CVulkanCommon.h b/include/nbl/video/CVulkanCommon.h index 4232860baa..e4dfb7e3e9 100644 --- a/include/nbl/video/CVulkanCommon.h +++ b/include/nbl/video/CVulkanCommon.h @@ -1098,14 +1098,15 @@ inline VkPipelineBindPoint getVkPipelineBindPointFrom(asset::E_PIPELINE_BIND_POI } } -inline VkStridedDeviceAddressRegionKHR getVkStridedDeviceAddressRegion(const asset::SBufferRange& range, uint32_t stride) +inline VkStridedDeviceAddressRegionKHR getVkStridedDeviceAddressRegion(const asset::SStridedRange& stRange) { - if (range.buffer.get() == nullptr) + const auto& range = stRange.range; + if (range.buffer.get()==nullptr) return {}; return { .deviceAddress = range.buffer->getDeviceAddress() + range.offset, - .stride = stride, + .stride = stRange.stride, .size = range.size, }; } diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index bb6460754a..6b3bfef18c 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -539,12 +539,8 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject bool resolveImage(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* const pRegions); bool setRayTracingPipelineStackSize(uint32_t pipelineStackSize); - bool traceRays( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - uint32_t width, uint32_t height, uint32_t depth); + + bool traceRays(const IGPURayTracingPipeline::SShaderBindingTable& sbt, const uint32_t width, const uint32_t height, const uint32_t depth); bool traceRaysIndirect(const asset::SBufferBinding& indirectBinding); //! Secondary CommandBuffer execute @@ -719,14 +715,8 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) = 0; virtual bool setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) = 0; - virtual bool traceRays_impl( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - uint32_t width, uint32_t height, uint32_t depth) = 0; - virtual bool traceRaysIndirect_impl( - const asset::SBufferBinding& indirectBinding) = 0; + virtual bool traceRays_impl(const IGPURayTracingPipeline::SShaderBindingTable& sbt, const uint32_t width, const uint32_t height, const uint32_t depth) = 0; + virtual bool traceRaysIndirect_impl(const asset::SBufferBinding& indirectBinding) = 0; virtual bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) = 0; @@ -881,14 +871,8 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject } return invalidImage(image,IGPUImage::EUF_TRANSFER_SRC_BIT); } - - bool invalidShaderGroups( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - core::bitflag flags) const; + bool invalidShaderGroups(const IGPURayTracingPipeline::SShaderBindingTable& sbt, const core::bitflag flags) const; // returns total number of Geometries across all AS build infos template uint32_t buildAccelerationStructures_common(const std::span infos, BuildRangeInfos ranges, const IGPUBuffer* const indirectBuffer=nullptr); diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index 816cc68243..7b81ee43e7 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -10,9 +10,9 @@ namespace nbl::video { -class IGPURayTracingPipeline : public IGPUPipeline> +class IGPURayTracingPipeline : public IGPUPipeline> { - using pipeline_t = asset::IRayTracingPipeline; + using pipeline_t = asset::IRayTracingPipeline; public: struct SHitGroup @@ -172,8 +172,7 @@ class IGPURayTracingPipeline : public IGPUPipeline(params.layout->getOriginDevice()), params.layout, params.cached) - {} + IGPURayTracingPipeline(const SCreationParams& params) : IGPUPipeline(core::smart_refctd_ptr(params.layout->getOriginDevice()), params.layout, params.cached) {} virtual ~IGPURayTracingPipeline() = default; diff --git a/include/nbl/video/utilities/ISimpleManagedSurface.h b/include/nbl/video/utilities/ISimpleManagedSurface.h index dbde2d5f53..f60aa022dd 100644 --- a/include/nbl/video/utilities/ISimpleManagedSurface.h +++ b/include/nbl/video/utilities/ISimpleManagedSurface.h @@ -186,6 +186,13 @@ class NBL_API2 ISimpleManagedSurface : public core::IReferenceCounted // inline bool irrecoverable() const {return !const_cast(this)->getSwapchainResources();} + // to trigger `becomeIrrecoverable` if window got closwd + inline bool isWindowOpen() + { + if (!m_cb) return true; // native hwnd has no callbacks set -> user's responsibility to not acquire on window close corresponding to the Surface HWND + return m_cb->isWindowOpen(); + } + // inline CThreadSafeQueueAdapter* getAssignedQueue() const {return m_queue;} @@ -339,13 +346,6 @@ class NBL_API2 ISimpleManagedSurface : public core::IReferenceCounted // virtual void deinit_impl() = 0; - // to trigger `becomeIrrecoverable` if window got closwd - inline bool isWindowOpen() - { - if (!m_cb) return true; // native hwnd has no callbacks set -> user's responsibility to not acquire on window close corresponding to the Surface HWND - return m_cb->isWindowOpen(); - } - // ICallback* const m_cb = nullptr; diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index a55c3a1e7b..a04b5940ce 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -844,17 +844,12 @@ bool CVulkanCommandBuffer::setRayTracingPipelineStackSize_impl(uint32_t pipeline return true; } -bool CVulkanCommandBuffer::traceRays_impl( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - uint32_t width, uint32_t height, uint32_t depth) -{ - const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion(raygenGroupRange, raygenGroupRange.size); - const auto vk_missGroupsRegion = getVkStridedDeviceAddressRegion(missGroupsRange, missGroupStride); - const auto vk_hitGroupsRegion = getVkStridedDeviceAddressRegion(hitGroupsRange, hitGroupStride); - const auto vk_callableGroupsRegion = getVkStridedDeviceAddressRegion(callableGroupsRange, callableGroupStride); +bool CVulkanCommandBuffer::traceRays_impl(const IGPURayTracingPipeline::SShaderBindingTable& sbt, const uint32_t width, const uint32_t height, const uint32_t depth) +{ + const auto vk_raygenGroupRegion = getVkStridedDeviceAddressRegion({.range=sbt.raygen,.stride=uint32_t(sbt.raygen.size)}); + const auto vk_missGroupsRegion = getVkStridedDeviceAddressRegion(sbt.miss); + const auto vk_hitGroupsRegion = getVkStridedDeviceAddressRegion(sbt.hit); + const auto vk_callableGroupsRegion = getVkStridedDeviceAddressRegion(sbt.callable); getFunctionTable().vkCmdTraceRaysKHR(m_cmdbuf, &vk_raygenGroupRegion, diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 9383585b23..48d7e9e85c 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -226,12 +226,7 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool resolveImage_impl(const IGPUImage* const srcImage, const IGPUImage::LAYOUT srcImageLayout, IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const uint32_t regionCount, const SImageResolve* pRegions) override; bool setRayTracingPipelineStackSize_impl(uint32_t pipelineStackSize) override; - bool traceRays_impl( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - uint32_t width, uint32_t height, uint32_t depth) override; + bool traceRays_impl(const IGPURayTracingPipeline::SShaderBindingTable& sbt, const uint32_t width, const uint32_t height, const uint32_t depth) override; bool traceRaysIndirect_impl(const asset::SBufferBinding& indirectBinding) override; bool executeCommands_impl(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs) override; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 799dcccf20..f47428aae8 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -684,50 +684,21 @@ bool IGPUCommandBuffer::copyImage(const IGPUImage* const srcImage, const IGPUIma return copyImage_impl(srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions); } -bool IGPUCommandBuffer::invalidShaderGroups( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - core::bitflag flags) const +bool IGPUCommandBuffer::invalidShaderGroups(const IGPURayTracingPipeline::SShaderBindingTable& sbt, const core::bitflag flags) const { + if (!sbt.valid(flags)) + return true; using PipelineFlag = IGPURayTracingPipeline::SCreationParams::FLAGS; using PipelineFlags = core::bitflag; - // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03696 - // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03697 - // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03512 - const auto shouldHaveHitGroup = flags & - (PipelineFlags(PipelineFlag::NO_NULL_ANY_HIT_SHADERS) | - PipelineFlag::NO_NULL_CLOSEST_HIT_SHADERS | - PipelineFlag::NO_NULL_INTERSECTION_SHADERS); - if (shouldHaveHitGroup && !hitGroupsRange.buffer) - { - NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); - return true; - } - - // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-flags-03511 - const auto shouldHaveMissGroup = flags & PipelineFlag::NO_NULL_MISS_SHADERS; - if (shouldHaveMissGroup && !missGroupsRange.buffer) - { - NBL_LOG_ERROR("bound pipeline indicates that traceRays command should have hit group, but hitGroupsRange.buffer is null!"); - return true; - } - const auto& limits = getOriginDevice()->getPhysicalDevice()->getLimits(); - auto invalidBufferRegion = [this, &limits](const asset::SBufferRange& range, uint32_t stride, const char* groupName) -> bool + auto invalidBufferRegion = [this, &limits](const asset::SStridedRange& stRange, const char* groupName) -> bool { + const auto& range = stRange.range; const IGPUBuffer* const buffer = range.buffer.get(); - - if (!buffer) return false; - - if (!range.isValid()) - { - NBL_LOG_ERROR("%s buffer range is not valid!", groupName); - return true; - } + if (!buffer) + return false; if (!(buffer->getCreationParams().usage & IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT)) { @@ -743,13 +714,13 @@ bool IGPUCommandBuffer::invalidShaderGroups( } // https://registry.khronos.org/vulkan/specs/latest/man/html/vkCmdTraceRaysKHR.html#VUID-vkCmdTraceRaysKHR-pHitShaderBindingTable-03690 - if (stride % limits.shaderGroupHandleAlignment) + if (stRange.stride % limits.shaderGroupHandleAlignment) { NBL_LOG_ERROR("%s buffer offset must be multiple of %u!", groupName, limits.shaderGroupHandleAlignment); return true; } - if (stride > limits.maxShaderGroupStride) + if (stRange.stride > limits.maxShaderGroupStride) { NBL_LOG_ERROR("%s buffer stride must not exceed %u!", groupName, limits.shaderGroupHandleAlignment); return true; @@ -765,10 +736,11 @@ bool IGPUCommandBuffer::invalidShaderGroups( return false; }; - if (invalidBufferRegion(raygenGroupRange, raygenGroupRange.size, "Raygen Group")) return true; - if (invalidBufferRegion(missGroupsRange, missGroupStride, "Miss groups")) return true; - if (invalidBufferRegion(hitGroupsRange, hitGroupStride, "Hit groups")) return true; - if (invalidBufferRegion(callableGroupsRange, callableGroupStride, "Callable groups")) return true; + if (invalidBufferRegion({.range=sbt.raygen,.stride=limits.shaderGroupHandleAlignment},"Raygen Group")) return true; + if (invalidBufferRegion(sbt.miss,"Miss groups")) return true; + if (invalidBufferRegion(sbt.hit,"Hit groups")) return true; + if (invalidBufferRegion(sbt.callable,"Callable groups")) return true; + return false; } @@ -1945,12 +1917,7 @@ bool IGPUCommandBuffer::setRayTracingPipelineStackSize(uint32_t pipelineStackSiz return setRayTracingPipelineStackSize_impl(pipelineStackSize); } -bool IGPUCommandBuffer::traceRays( - const asset::SBufferRange& raygenGroupRange, - const asset::SBufferRange& missGroupsRange, uint32_t missGroupStride, - const asset::SBufferRange& hitGroupsRange, uint32_t hitGroupStride, - const asset::SBufferRange& callableGroupsRange, uint32_t callableGroupStride, - uint32_t width, uint32_t height, uint32_t depth) +bool IGPUCommandBuffer::traceRays(const IGPURayTracingPipeline::SShaderBindingTable& sbt, const uint32_t width, const uint32_t height, const uint32_t depth) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; @@ -1983,11 +1950,7 @@ bool IGPUCommandBuffer::traceRays( } const auto flags = m_boundRayTracingPipeline->getCreationFlags(); - if (invalidShaderGroups(raygenGroupRange, - missGroupsRange, missGroupStride, - hitGroupsRange, hitGroupStride, - callableGroupsRange, callableGroupStride, - flags)) + if (invalidShaderGroups(sbt,flags)) { NBL_LOG_ERROR("invalid shader groups for traceRays command!"); return false; @@ -2000,11 +1963,13 @@ bool IGPUCommandBuffer::traceRays( return false; } - if (!m_cmdpool->m_commandListPool.emplace(m_commandList, - core::smart_refctd_ptr(raygenGroupRange.buffer), - core::smart_refctd_ptr(missGroupsRange.buffer), - core::smart_refctd_ptr(hitGroupsRange.buffer), - core::smart_refctd_ptr(callableGroupsRange.buffer))) + if (!m_cmdpool->m_commandListPool.emplace( + m_commandList, + core::smart_refctd_ptr(sbt.raygen.buffer), + core::smart_refctd_ptr(sbt.miss.range.buffer), + core::smart_refctd_ptr(sbt.hit.range.buffer), + core::smart_refctd_ptr(sbt.callable.range.buffer) + )) { NBL_LOG_ERROR("out of host memory!"); return false; @@ -2012,12 +1977,7 @@ bool IGPUCommandBuffer::traceRays( m_noCommands = false; - return traceRays_impl( - raygenGroupRange, - missGroupsRange, missGroupStride, - hitGroupsRange, hitGroupStride, - callableGroupsRange, callableGroupStride, - width, height, depth); + return traceRays_impl(sbt, width, height, depth); } bool IGPUCommandBuffer::traceRaysIndirect(const asset::SBufferBinding& indirectBinding) From 2685e2120f3d153fdf559a77d58a53cc5e18ab1b Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 27 Jan 2026 11:25:45 +0100 Subject: [PATCH 433/472] precompile fri ext --- .../hlsl/ext/FullScreenTriangle/default.vert.hlsl | 8 ++++++-- include/nbl/builtin/hlsl/surface_transform.h | 4 ++-- .../nbl/ext/FullScreenTriangle/FullScreenTriangle.h | 11 +++++++++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl b/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl index a48d9b4623..e55e4ad096 100644 --- a/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl +++ b/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl @@ -21,8 +21,12 @@ const static float32_t2 tc[3] = { [[vk::constant_id(0)]] const uint32_t SwapchainTransform = 0; +#ifndef NBL_EXT_FULLSCREEN_TRIANGLE_VS_ENTRYPOINT +#define NBL_EXT_FULLSCREEN_TRIANGLE_VS_ENTRYPOINT main +#endif + [shader("vertex")] -SVertexAttributes main() +SVertexAttributes NBL_EXT_FULLSCREEN_TRIANGLE_VS_ENTRYPOINT() { using namespace ::nbl::hlsl::glsl; @@ -33,4 +37,4 @@ SVertexAttributes main() SVertexAttributes retval; retval.uv = tc[gl_VertexIndex()]; return retval; -} \ No newline at end of file +} diff --git a/include/nbl/builtin/hlsl/surface_transform.h b/include/nbl/builtin/hlsl/surface_transform.h index a681ecf0bb..12e74a098a 100644 --- a/include/nbl/builtin/hlsl/surface_transform.h +++ b/include/nbl/builtin/hlsl/surface_transform.h @@ -4,6 +4,7 @@ #ifndef _NBL_BUILTIN_HLSL_SURFACE_TRANSFORM_INCLUDED_ #define _NBL_BUILTIN_HLSL_SURFACE_TRANSFORM_INCLUDED_ #include +#include #include namespace nbl @@ -174,8 +175,7 @@ inline float32_t2 applyToNDC(const FLAG_BITS transform, const float32_t2 ndc) template TwoColumns applyToDerivatives(const FLAG_BITS transform, TwoColumns dDx_dDy) { - using namespace glsl; // IN HLSL mode, C++ doens't need this to access `inverse` - return mul(inverse(transformMatrix(transform)),dDx_dDy); + return mul(::nbl::hlsl::inverse(transformMatrix(transform)),dDx_dDy); } } diff --git a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h index 839e7e07e4..465ffcedec 100644 --- a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h +++ b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h @@ -35,6 +35,12 @@ struct ProtoPipeline final inline ProtoPipeline(asset::IAssetManager* assMan, video::ILogicalDevice* device, system::ILogger* logger=nullptr) { m_vxShader = createDefaultVertexShader(assMan,device,logger); + m_vxEntryPoint = "main"; + } + + inline ProtoPipeline(core::smart_refctd_ptr vertexShader, const char* vertexEntryPoint="main") : m_vxShader(std::move(vertexShader)) + { + m_vxEntryPoint = vertexEntryPoint ? vertexEntryPoint : "main"; } inline operator bool() const {return m_vxShader.get();} @@ -63,7 +69,7 @@ struct ProtoPipeline final IGPUGraphicsPipeline::SCreationParams params[1]; params[0].layout = layout; - params[0].vertexShader = { .shader = m_vxShader.get(), .entryPoint = "main", .entries = &specConstants }; + params[0].vertexShader = { .shader = m_vxShader.get(), .entryPoint = m_vxEntryPoint, .entries = &specConstants }; params[0].fragmentShader = fragShader; params[0].cached = { .vertexInput = {}, // The Full Screen Triangle doesn't use any HW vertex input state @@ -82,6 +88,7 @@ struct ProtoPipeline final core::smart_refctd_ptr m_vxShader; + std::string m_vxEntryPoint = "main"; // The default is correct for us constexpr static inline asset::SRasterizationParams DefaultRasterParams = { .faceCullingMode = asset::EFCM_NONE, @@ -103,4 +110,4 @@ static inline bool recordDrawCall(video::IGPUCommandBuffer* commandBuffer) return commandBuffer->draw(VERTEX_COUNT,INSTANCE_COUNT,0,0); } } -#endif \ No newline at end of file +#endif From 3a5d058e499bcd4857b29022eba8a44f167d5beb Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 27 Jan 2026 11:26:58 +0100 Subject: [PATCH 434/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 4d917ddeb3..acb025fc63 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4d917ddeb3de1109ccf7250a591b0550235979aa +Subproject commit acb025fc63c606ad13b2c798f816ba0e7c9aea6a From 6d8b42d63fabab8c68bca646f6a30f8d74f2ef82 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 27 Jan 2026 12:33:23 +0100 Subject: [PATCH 435/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index acb025fc63..5a6727dfb7 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit acb025fc63c606ad13b2c798f816ba0e7c9aea6a +Subproject commit 5a6727dfb7776ec51725e4bede1c4771c9c038f2 From 37be95beb4111fd562c10b8bb0a99a733625b5a4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 27 Jan 2026 15:03:02 +0100 Subject: [PATCH 436/472] pick important encoding fix from IES branch fixing (I hope) CI HLSL build --- src/nbl/asset/utils/CHLSLCompiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 1020fa9446..62166a885c 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -313,7 +313,7 @@ static DxcCompilationResult dxcCompile(const CHLSLCompiler* compiler, nbl::asset DxcBuffer sourceBuffer; sourceBuffer.Ptr = src->GetBufferPointer(); sourceBuffer.Size = src->GetBufferSize(); - sourceBuffer.Encoding = 0; + sourceBuffer.Encoding = CP_UTF8; ComPtr compileResult; res = dxc->m_dxcCompiler->Compile(&sourceBuffer, args, argCount, nullptr, IID_PPV_ARGS(compileResult.GetAddressOf())); From 7fc9ec3dfd455aaf4d99db07409f673bb8abec4e Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 17:20:03 +0100 Subject: [PATCH 437/472] fix a super silly bug --- include/nbl/asset/ICPUBuffer.h | 50 ++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/include/nbl/asset/ICPUBuffer.h b/include/nbl/asset/ICPUBuffer.h index 2d10c2907b..26f45d4ced 100644 --- a/include/nbl/asset/ICPUBuffer.h +++ b/include/nbl/asset/ICPUBuffer.h @@ -119,30 +119,32 @@ class ICPUBuffer final : public asset::IBuffer, public IPreHashed return (m_alignment > 0 && !(m_alignment & (m_alignment - 1))); } -protected: - inline void discardContent_impl() override - { - if (m_data) - m_mem_resource->deallocate(m_data, m_creationParams.size, m_alignment); - m_data = nullptr; - m_mem_resource = nullptr; - m_creationParams.size = 0ull; - } - -private: - ICPUBuffer(SCreationParams&& params) : - asset::IBuffer({ params.size, EUF_TRANSFER_DST_BIT }), m_data(params.data), - m_mem_resource(params.memoryResource), m_alignment(params.alignment) {} - - ~ICPUBuffer() override { - discardContent_impl(); - } - - inline void visitDependents_impl(std::function visit) const override {} - - void* m_data; - core::smart_refctd_ptr m_mem_resource; - size_t m_alignment; + protected: + inline void discardContent_impl() override + { + if (m_data) + m_mem_resource->deallocate(m_data, m_creationParams.size, m_alignment); + m_data = nullptr; + m_mem_resource = nullptr; + m_creationParams.size = 0ull; + } + + private: + // TODO: we should remove the addition of TRANSFER_DST_BIT because its the asset converter patcher that handles that + // But we need LLVM-pipe CI first so I don't have to test 70 examples by hand + inline ICPUBuffer(SCreationParams&& params) : asset::IBuffer({params.size,params.usage|EUF_TRANSFER_DST_BIT}), + m_data(params.data), m_mem_resource(params.memoryResource), m_alignment(params.alignment) {} + + inline ~ICPUBuffer() override + { + discardContent_impl(); + } + + inline void visitDependents_impl(std::function visit) const override {} + + void* m_data; + core::smart_refctd_ptr m_mem_resource; + size_t m_alignment; }; } // end namespace nbl::asset From 74f56a2e7f20616f654f15087e55eb2974d159f2 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 20:25:44 +0100 Subject: [PATCH 438/472] fix a bug in creating multiple raytracing pipelines in one --- src/nbl/video/ILogicalDevice.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 7c3f5dbb81..d5b38f9b69 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -1110,14 +1110,14 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline newParams[ix] = param; newParams[ix].shaderGroups.raygen = trimTask.trim(param.shaderGroups.raygen, trimmedShaders); - newParams[ix].shaderGroups.misses = trimmedMissSpecs; + newParams[ix].shaderGroups.misses = {trimmedMissSpecData,param.shaderGroups.misses.size()}; for (const auto& miss: param.shaderGroups.misses) { *trimmedMissSpecData = trimTask.trim(miss, trimmedShaders); trimmedMissSpecData++; } - newParams[ix].shaderGroups.hits = trimmedHitSpecs; + newParams[ix].shaderGroups.hits = {trimmedHitSpecData,param.shaderGroups.hits.size()}; for (const auto& hit: param.shaderGroups.hits) { *trimmedHitSpecData = { @@ -1128,7 +1128,7 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline trimmedHitSpecData++; } - newParams[ix].shaderGroups.callables = trimmedCallableSpecs; + newParams[ix].shaderGroups.callables = {trimmedCallableSpecData,param.shaderGroups.callables.size()}; for (const auto& callable: param.shaderGroups.callables) { *trimmedCallableSpecData = trimTask.trim(callable, trimmedShaders); From e8c18fb9c500d5d7e67bb142e6f9c7229f9b4ad0 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 20:30:59 +0100 Subject: [PATCH 439/472] fix the bug Erfan spotted --- include/nbl/video/IGPUCommandPool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index 0424ad83bd..ddc4fcfd5c 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -71,7 +71,7 @@ class IGPUCommandPool : public IBackendObject m_commandListPool.clear(); return reset_impl(); } - inline uint32_t getResetCounter() { return m_resetCount.load(); } + inline uint64_t getResetCounter() { return m_resetCount.load(); } // recycles unused memory from the command pool back to the system virtual void trim() = 0; // no extra stuff needed for `CCommandSegmentListPool` because it trims unused blocks at runtime From d2275465afa3ad5f6950c07e7fe20a0715c43275 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 27 Jan 2026 20:31:49 +0100 Subject: [PATCH 440/472] update examples_tests submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 4d917ddeb3..cb7b0e75b8 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4d917ddeb3de1109ccf7250a591b0550235979aa +Subproject commit cb7b0e75b8d5349b4a915182a5ccf65571af99eb From 5b833280d4754c1c28132fc797fe48c36694c66b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 28 Jan 2026 10:42:32 +0700 Subject: [PATCH 441/472] fix bitfieldInsert usage, truncate data --- .../hlsl/sampling/quantized_sequence.hlsl | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 70ca28b70d..a681de07de 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -5,9 +5,11 @@ #ifndef _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/concepts/vector.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" -#include "nbl/builtin/hlsl/random/pcg.hlsl" namespace nbl { @@ -160,6 +162,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; static this_t create(const vector value) @@ -180,7 +183,7 @@ struct QuantizedSequence 0 && idx < Dim); - glsl::bitfieldInsert(data, value, BitsPerComponent * idx, BitsPerComponent); + data = glsl::bitfieldInsert(data, value >> DiscardBits, BitsPerComponent * idx, BitsPerComponent); } template @@ -290,15 +293,16 @@ struct QuantizedSequence && Di void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < 3); + const scalar_type trunc_val = value >> DiscardBits; if (idx == 0) // x - glsl::bitfieldInsert(data[0], value, 0u, BitsPerComponent); + data[0] = glsl::bitfieldInsert(data[0], trunc_val, 0u, BitsPerComponent); else if (idx == 1) // y { - glsl::bitfieldInsert(data[0], value, BitsPerComponent, DiscardBits); - glsl::bitfieldInsert(data[1], value >> DiscardBits, 0u, DiscardBits - 1u); + data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent, DiscardBits); + data[1] = glsl::bitfieldInsert(data[1], trunc_val >> DiscardBits, 0u, DiscardBits - 1u); } else // z - glsl::bitfieldInsert(data[1], value, DiscardBits - 1u, BitsPerComponent); + data[1] = glsl::bitfieldInsert(data[1], trunc_val, DiscardBits - 1u, BitsPerComponent); } template @@ -334,6 +338,7 @@ struct QuantizedSequence && Di using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dimension = Dim; static this_t create(const vector value) @@ -361,13 +366,14 @@ struct QuantizedSequence && Di void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < 4); + const scalar_type trunc_val = value >> DiscardBits; if (idx < 2) // x y { - glsl::bitfieldInsert(data[0], value, BitsPerComponent * idx, BitsPerComponent); + data[0] = glsl::bitfieldInsert(data[0], trunc_val, BitsPerComponent * idx, BitsPerComponent); } else // z w { - glsl::bitfieldInsert(data[1], value, BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent); + data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent); } } From 57b1b3d80c3e8518262bd99e84fccfd3b8cb22a0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 28 Jan 2026 10:56:30 +0700 Subject: [PATCH 442/472] change some type alias names to make more sense --- examples_tests | 2 +- .../hlsl/sampling/quantized_sequence.hlsl | 36 +++++++++---------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/examples_tests b/examples_tests index 587cbff28b..6e96a2f68f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 587cbff28b1d0b42f2f704c3ba9b247ad0276590 +Subproject commit 6e96a2f68fcc23eeb8524a35839e5c93454b98ec diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index a681de07de..a3c6cb4685 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -46,16 +46,16 @@ struct encode_helper { NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; using sequence_type = Q; - using unorm_vec_type = vector; - using unsigned_scalar_type = unsigned_integer_of_size_t; - using uvec_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << (8u * size_of_v - 1u)) - 1u; + using input_type = vector; + using uniform_storage_scalar_type = unsigned_integer_of_size_t; + using uniform_storage_type = vector; // type that holds uint bit representation of a unorm that can have 1s in MSB (normalized w.r.t whole scalar) + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << (8u * size_of_v - 1u)) - 1u; - static sequence_type __call(const unorm_vec_type unormvec) + static sequence_type __call(const input_type unormvec) { - uvec_type asuint; + uniform_storage_type asuint; NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) - asuint[i] = unsigned_scalar_type(unormvec[i] * UNormMultiplier); + asuint[i] = uniform_storage_scalar_type(unormvec[i] * UNormMultiplier); return sequence_type::create(asuint); } }; @@ -63,12 +63,12 @@ struct encode_helper template struct decode_before_scramble_helper { - using unsigned_scalar_type = typename Q::scalar_type; + using storage_scalar_type = typename Q::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; using uvec_type = vector; using sequence_type = Q; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; return_type operator()(const uvec_type scrambleKey) { @@ -76,7 +76,7 @@ struct decode_before_scramble_helper NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) seqVal[i] = val.get(i); seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast >(UNormConstant); + return return_type(seqVal) * bit_cast >(UNormConstant); } sequence_type val; @@ -84,7 +84,7 @@ struct decode_before_scramble_helper template struct decode_after_scramble_helper { - using unsigned_scalar_type = typename Q::scalar_type; + using storage_scalar_type = typename Q::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; using uvec_type = vector; using sequence_type = Q; @@ -99,7 +99,7 @@ struct decode_after_scramble_helper uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) seqVal[i] = scramble.get(i); - return return_type(seqVal) * bit_cast >(UNormConstant); + return return_type(seqVal) * bit_cast >(UNormConstant); } sequence_type val; @@ -126,8 +126,8 @@ struct QuantizedSequence 0 && idx < 1); return data; } - void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } + store_type get(const uint16_t idx) { assert(idx >= 0 && idx < 1); return data; } + void set(const uint16_t idx, const store_type value) { assert(idx >= 0 && idx < 1); data = value; } template static this_t encode(const vector value) @@ -176,13 +176,13 @@ struct QuantizedSequence 0 && idx < Dim); + assert(idx >= 0 && idx < Dim); return glsl::bitfieldExtract(data, BitsPerComponent * idx, BitsPerComponent); } void set(const uint16_t idx, const store_type value) { - assert(idx > 0 && idx < Dim); + assert(idx >= 0 && idx < Dim); data = glsl::bitfieldInsert(data, value >> DiscardBits, BitsPerComponent * idx, BitsPerComponent); } @@ -227,8 +227,8 @@ struct QuantizedSequence 0 && idx < Dim); return data[idx]; } - void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } + scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < Dim); return data[idx]; } + void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < Dim); data[idx] = value; } template static this_t encode(const vector value) From c0350634f1ea285c0f5f94b3ab1c2f70511ac655 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 28 Jan 2026 12:01:46 +0700 Subject: [PATCH 443/472] added option for encode fullwidth or not --- .../hlsl/sampling/quantized_sequence.hlsl | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index a3c6cb4685..357ea7843b 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -41,7 +41,8 @@ struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0 template<> struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; -template +// FullWidth if intend to decode before scramble, not if decode after scramble +template struct encode_helper { NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; @@ -49,7 +50,8 @@ struct encode_helper using input_type = vector; using uniform_storage_scalar_type = unsigned_integer_of_size_t; using uniform_storage_type = vector; // type that holds uint bit representation of a unorm that can have 1s in MSB (normalized w.r.t whole scalar) - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << (8u * size_of_v - 1u)) - 1u; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Bits = FullWidth ? (8u * size_of_v - 1u) : sequence_type::BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << Bits) - 1u; static sequence_type __call(const input_type unormvec) { @@ -129,10 +131,10 @@ struct QuantizedSequence= 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx >= 0 && idx < 1); data = value; } - template + template static this_t encode(const vector value) { - return impl::encode_helper::__call(value); + return impl::encode_helper::__call(value); } template @@ -186,10 +188,10 @@ struct QuantizedSequence> DiscardBits, BitsPerComponent * idx, BitsPerComponent); } - template + template static this_t encode(const vector value) { - return impl::encode_helper::__call(value); + return impl::encode_helper::__call(value); } template @@ -230,10 +232,10 @@ struct QuantizedSequence= 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < Dim); data[idx] = value; } - template + template static this_t encode(const vector value) { - return impl::encode_helper::__call(value); + return impl::encode_helper::__call(value); } template @@ -305,10 +307,10 @@ struct QuantizedSequence && Di data[1] = glsl::bitfieldInsert(data[1], trunc_val, DiscardBits - 1u, BitsPerComponent); } - template + template static this_t encode(const vector value) { - return impl::encode_helper::__call(value); + return impl::encode_helper::__call(value); } template @@ -377,10 +379,10 @@ struct QuantizedSequence && Di } } - template + template static this_t encode(const vector value) { - return impl::encode_helper::__call(value); + return impl::encode_helper::__call(value); } template From 09846179fc1a9c2d90ab611b9a8f91c186598ea4 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 28 Jan 2026 16:58:54 +0700 Subject: [PATCH 444/472] update examples to include tests --- examples_tests | 2 +- include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 6e96a2f68f..533a90e027 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 6e96a2f68fcc23eeb8524a35839e5c93454b98ec +Subproject commit 533a90e027fc263b6091fbc00eecd29a37f1d254 diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 357ea7843b..b1af365c86 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -48,7 +48,7 @@ struct encode_helper NBL_CONSTEXPR_STATIC_INLINE uint16_t Dim = Q::Dimension; using sequence_type = Q; using input_type = vector; - using uniform_storage_scalar_type = unsigned_integer_of_size_t; + using uniform_storage_scalar_type = unsigned_integer_of_size_t; using uniform_storage_type = vector; // type that holds uint bit representation of a unorm that can have 1s in MSB (normalized w.r.t whole scalar) NBL_CONSTEXPR_STATIC_INLINE uint16_t Bits = FullWidth ? (8u * size_of_v - 1u) : sequence_type::BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormMultiplier = (1u << Bits) - 1u; From 5b73aae3d608a5d6d5b2c57ed2b372c1fba7d708 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 29 Jan 2026 12:21:44 +0700 Subject: [PATCH 445/472] partial specs for bitfield insert and extract for 16 bit types --- .../nbl/builtin/hlsl/glsl_compat/core.hlsl | 92 ++++++++++++++----- 1 file changed, 69 insertions(+), 23 deletions(-) diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl index 7e92cbf282..6d971abca3 100644 --- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl +++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl @@ -22,16 +22,33 @@ namespace glsl #ifndef __HLSL_VERSION // GLM Aliases -template -genIUType bitfieldExtract(genIUType Value, int Offset, int Bits) +namespace impl { - return glm::bitfieldExtract(Value, Offset, Bits); -} +template +struct bitfieldInsert; -template -genIUType bitfieldInsert(genIUType const& Base, genIUType const& Insert, int Offset, int Bits) +template +NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> >= 4) +struct bitfieldInsert::scalar_type> && size_of_v::scalar_type> == 4) > { - return glm::bitfieldInsert(Base, Insert, Offset, Bits); + static T __call( T base, T insert, uint32_t offset, uint32_t bits ) + { + return glm::bitfieldInsert(base, insert, offset, bits); + } +}; + +template +struct bitfieldExtract; + +template +NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> >= 4) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 4) > +{ + static T __call( T val, uint32_t offsetBits, uint32_t numBits ) + { + return glm::bitfieldExtract(val, offsetBits, numBits); + } +}; } template @@ -184,21 +201,25 @@ void memoryBarrierShared() { namespace impl { -template -struct bitfieldExtract {}; +template +struct bitfieldInsert; -template -struct bitfieldExtract +template +NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> >= 4) +struct bitfieldInsert::scalar_type> && size_of_v::scalar_type> == 4) > { - static T __call( T val, uint32_t offsetBits, uint32_t numBits ) + static T __call( T base, T insert, uint32_t offset, uint32_t bits ) { - static_assert( is_integral::value, "T is not an integral type!" ); - return val; + return spirv::bitFieldInsert(base, insert, offset, bits); } }; +template +struct bitfieldExtract; + template -struct bitfieldExtract +NBL_PARTIAL_REQ_TOP(concepts::SignedIntegral::scalar_type> && size_of_v::scalar_type> >= 4) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 4) > { static T __call( T val, uint32_t offsetBits, uint32_t numBits ) { @@ -207,7 +228,8 @@ struct bitfieldExtract }; template -struct bitfieldExtract +NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> == 4) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 4) > { static T __call( T val, uint32_t offsetBits, uint32_t numBits ) { @@ -218,24 +240,48 @@ struct bitfieldExtract } //namespace impl template -T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits ) +T bitfieldReverse(T value) { - return impl::bitfieldExtract::value, is_integral::value>::__call(val,offsetBits,numBits); + return spirv::bitReverse(value); } +#endif + +namespace impl +{ template -T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits) +NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> == 2) +struct bitfieldInsert::scalar_type> && size_of_v::scalar_type> == 2) > +{ + static T __call( T base, T insert, uint32_t offset, uint32_t bits ) + { + const T mask = (T(1u) << T(bits) - T(1u)) << offset; + return (base & ~mask) | ((insert << T(offset)) & mask); + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> == 2) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 2) > { - return spirv::bitFieldInsert(base, insert, offset, bits); + static T __call( T val, uint32_t offsetBits, uint32_t numBits ) + { + return (val >> T(offsetBits)) & T(T(1u) << T(numBits) - T(1u)); + } +}; } template -T bitfieldReverse(T value) +T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits ) { - return spirv::bitReverse(value); + return impl::bitfieldExtract::__call(val, offsetBits, numBits); } -#endif +template +T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits) +{ + return impl::bitfieldInsert::__call(base, insert, offset, bits); +} namespace impl { From b63fa26dc46312f640c1fc19e4be28f87002d8cb Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 29 Jan 2026 14:37:16 +0700 Subject: [PATCH 446/472] fixes bitfieldInsert, account for sign in bitfieldExtract --- .../nbl/builtin/hlsl/glsl_compat/core.hlsl | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl index 6d971abca3..b112762568 100644 --- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl +++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl @@ -255,26 +255,34 @@ struct bitfieldInsert -NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> == 2) -struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 2) > +NBL_PARTIAL_REQ_TOP(concepts::SignedIntegral::scalar_type> && size_of_v::scalar_type> == 2) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 2) > { static T __call( T val, uint32_t offsetBits, uint32_t numBits ) { - return (val >> T(offsetBits)) & T(T(1u) << T(numBits) - T(1u)); + const T ret = (val >> T(offsetBits)) & T((T(1u) << numBits) - T(1u)); + if (ret & (T(1u) << (numBits-1u))) + ret |= T(~0ull) << numBits; + return ret; } }; -} template -T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits ) +NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> == 2) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 2) > { - return impl::bitfieldExtract::__call(val, offsetBits, numBits); + static T __call( T val, uint32_t offsetBits, uint32_t numBits ) + { + return (val >> T(offsetBits)) & T((T(1u) << numBits) - T(1u)); + } +}; } template @@ -283,6 +291,12 @@ T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits) return impl::bitfieldInsert::__call(base, insert, offset, bits); } +template +T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits ) +{ + return impl::bitfieldExtract::__call(val, offsetBits, numBits); +} + namespace impl { template From e1de22554fd884f4800d8766fb1759ac058aa2bf Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 29 Jan 2026 15:36:49 +0700 Subject: [PATCH 447/472] minor fixes to uint16_t2 dim 4 --- include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index b1af365c86..d3313d2875 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -185,7 +185,7 @@ struct QuantizedSequence= 0 && idx < Dim); - data = glsl::bitfieldInsert(data, value >> DiscardBits, BitsPerComponent * idx, BitsPerComponent); + data = glsl::bitfieldInsert(data, scalar_type(value >> DiscardBits), BitsPerComponent * idx, BitsPerComponent); } template @@ -346,7 +346,7 @@ struct QuantizedSequence && Di static this_t create(const vector value) { this_t seq; - seq.data = hlsl::promote(0u); + seq.data = store_type(0u,0u); NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++) seq.set(i, value[i]); return seq; @@ -361,7 +361,7 @@ struct QuantizedSequence && Di } else // z w { - return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent); + return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx - uint16_t(2u)), BitsPerComponent); } } @@ -375,7 +375,7 @@ struct QuantizedSequence && Di } else // z w { - data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent); + data[1] = glsl::bitfieldInsert(data[1], trunc_val, BitsPerComponent * (idx - uint16_t(2u)), BitsPerComponent); } } From 2b08a15064de806b024a67ecb3d743a38a809ee2 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 29 Jan 2026 09:43:59 +0100 Subject: [PATCH 448/472] refactor fri ext, lib + archive based with separate precompiled .spv, update examples_tests submodule --- examples_tests | 2 +- .../ext/FullScreenTriangle/default.vert.hlsl | 6 +- .../FullScreenTriangle/FullScreenTriangle.h | 93 ++---------- src/nbl/ext/CMakeLists.txt | 12 +- src/nbl/ext/DebugDraw/CMakeLists.txt | 4 +- .../CFullScreenTriangle.cpp | 134 ++++++++++++++++++ src/nbl/ext/FullScreenTriangle/CMakeLists.txt | 59 ++++++++ 7 files changed, 215 insertions(+), 95 deletions(-) create mode 100644 src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp create mode 100644 src/nbl/ext/FullScreenTriangle/CMakeLists.txt diff --git a/examples_tests b/examples_tests index 5a6727dfb7..fd49d0a66f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5a6727dfb7776ec51725e4bede1c4771c9c038f2 +Subproject commit fd49d0a66fc43c6c1e282b1fd3a8943e2c584af4 diff --git a/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl b/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl index e55e4ad096..abf65296a6 100644 --- a/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl +++ b/include/nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl @@ -21,12 +21,8 @@ const static float32_t2 tc[3] = { [[vk::constant_id(0)]] const uint32_t SwapchainTransform = 0; -#ifndef NBL_EXT_FULLSCREEN_TRIANGLE_VS_ENTRYPOINT -#define NBL_EXT_FULLSCREEN_TRIANGLE_VS_ENTRYPOINT main -#endif - [shader("vertex")] -SVertexAttributes NBL_EXT_FULLSCREEN_TRIANGLE_VS_ENTRYPOINT() +SVertexAttributes __nbl__hlsl__ext__FullScreenTriangle__vertex_main() { using namespace ::nbl::hlsl::glsl; diff --git a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h index 465ffcedec..1779c6126e 100644 --- a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h +++ b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h @@ -10,104 +10,27 @@ namespace nbl::ext::FullScreenTriangle { struct ProtoPipeline final { - inline core::smart_refctd_ptr createDefaultVertexShader(asset::IAssetManager* assMan, video::ILogicalDevice* device, system::ILogger* logger=nullptr) - { - if (!assMan || !device) - return nullptr; - - using namespace ::nbl::asset; - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = logger; - lp.workingDirectory = ""; // virtual root - auto assetBundle = assMan->getAsset("nbl/builtin/hlsl/ext/FullScreenTriangle/default.vert.hlsl",lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; - - auto source = IAsset::castDown(assets[0]); - if (!source) - return nullptr; - - return device->compileShader({ .source = source.get(), .stage = hlsl::ESS_VERTEX }); - } - public: - inline ProtoPipeline(asset::IAssetManager* assMan, video::ILogicalDevice* device, system::ILogger* logger=nullptr) - { - m_vxShader = createDefaultVertexShader(assMan,device,logger); - m_vxEntryPoint = "main"; - } + static core::smart_refctd_ptr createDefaultVertexShader(asset::IAssetManager* assMan, video::ILogicalDevice* device, system::ILogger* logger=nullptr); + static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = "nbl/ext/FullScreenTriangle"); - inline ProtoPipeline(core::smart_refctd_ptr vertexShader, const char* vertexEntryPoint="main") : m_vxShader(std::move(vertexShader)) - { - m_vxEntryPoint = vertexEntryPoint ? vertexEntryPoint : "main"; - } + ProtoPipeline(asset::IAssetManager* assMan, video::ILogicalDevice* device, system::ILogger* logger=nullptr); + ProtoPipeline(core::smart_refctd_ptr vertexShader, const char* vertexEntryPoint="__nbl__hlsl__ext__FullScreenTriangle__vertex_main"); - inline operator bool() const {return m_vxShader.get();} + operator bool() const; - inline core::smart_refctd_ptr createPipeline( + core::smart_refctd_ptr createPipeline( const video::IGPUPipelineBase::SShaderSpecInfo& fragShader, video::IGPUPipelineLayout* layout, const video::IGPURenderpass* renderpass, const uint32_t subpassIx=0, asset::SBlendParams blendParams = {}, const hlsl::SurfaceTransform::FLAG_BITS swapchainTransform=hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT - ) - { - if (!renderpass || !bool(*this) || hlsl::bitCount(swapchainTransform)!=1) - return nullptr; - - using namespace ::nbl::video; - auto device = const_cast(renderpass->getOriginDevice()); - - core::smart_refctd_ptr m_retval; - { - const auto orientationAsUint32 = static_cast(swapchainTransform); - - IGPUPipelineBase::SShaderEntryMap specConstants; - specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32)}; - - IGPUGraphicsPipeline::SCreationParams params[1]; - params[0].layout = layout; - params[0].vertexShader = { .shader = m_vxShader.get(), .entryPoint = m_vxEntryPoint, .entries = &specConstants }; - params[0].fragmentShader = fragShader; - params[0].cached = { - .vertexInput = {}, // The Full Screen Triangle doesn't use any HW vertex input state - .primitiveAssembly = {}, - .rasterization = DefaultRasterParams, - .blend = blendParams, - .subpassIx = subpassIx - }; - params[0].renderpass = renderpass; - - if (!device->createGraphicsPipelines(nullptr,params,&m_retval)) - return nullptr; - } - return m_retval; - } - + ); core::smart_refctd_ptr m_vxShader; - std::string m_vxEntryPoint = "main"; - // The default is correct for us - constexpr static inline asset::SRasterizationParams DefaultRasterParams = { - .faceCullingMode = asset::EFCM_NONE, - .depthWriteEnable = false, - .depthCompareOp = asset::ECO_ALWAYS - }; }; - -/* - Helper function for drawing full screen triangle. - It should be called between command buffer render pass - records. -*/ -static inline bool recordDrawCall(video::IGPUCommandBuffer* commandBuffer) -{ - constexpr auto VERTEX_COUNT = 3; - constexpr auto INSTANCE_COUNT = 1; - return commandBuffer->draw(VERTEX_COUNT,INSTANCE_COUNT,0,0); -} +bool recordDrawCall(video::IGPUCommandBuffer* commandBuffer); } #endif diff --git a/src/nbl/ext/CMakeLists.txt b/src/nbl/ext/CMakeLists.txt index af46b29aab..b4c6cf2b64 100644 --- a/src/nbl/ext/CMakeLists.txt +++ b/src/nbl/ext/CMakeLists.txt @@ -66,6 +66,16 @@ if(NBL_BUILD_DEBUG_DRAW) ) endif() +add_subdirectory(FullScreenTriangle) +set(NBL_EXT_FULL_SCREEN_TRIANGLE_INCLUDE_DIRS + ${NBL_EXT_FULL_SCREEN_TRIANGLE_INCLUDE_DIRS} + PARENT_SCOPE +) +set(NBL_EXT_FULL_SCREEN_TRIANGLE_LIB + ${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB} + PARENT_SCOPE +) + propagate_changed_variables_to_parent_scope() -NBL_ADJUST_FOLDERS(ext) \ No newline at end of file +NBL_ADJUST_FOLDERS(ext) diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index dfa4a7624f..812abbce1b 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -37,7 +37,6 @@ set(JSON [=[ "INPUT": "${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/draw_aabb.unified.hlsl", "KEY": "draw_aabb", } - ] ]=]) string(CONFIGURE "${JSON}" JSON) @@ -69,5 +68,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( BUILTINS ${KEYS} ) - -add_library(Nabla::ext::DebugDraw ALIAS ${LIB_NAME}) +add_library(Nabla::ext::DebugDraw ALIAS ${LIB_NAME}) \ No newline at end of file diff --git a/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp b/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp new file mode 100644 index 0000000000..5c58af3388 --- /dev/null +++ b/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp @@ -0,0 +1,134 @@ +// Copyright (C) 2018-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +#ifdef NBL_EMBED_BUILTIN_RESOURCES +#include "nbl/ext/FullScreenTriangle/builtin/build/CArchive.h" +#endif + +#include "nbl/ext/FullScreenTriangle/builtin/build/spirv/keys.hpp" + +using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; + +namespace nbl::ext::FullScreenTriangle +{ + +constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/FullScreenTriangle"; +constexpr std::string_view VertexEntryPoint = "__nbl__hlsl__ext__FullScreenTriangle__vertex_main"; + +smart_refctd_ptr ProtoPipeline::mount(smart_refctd_ptr logger, ISystem* system, ILogicalDevice* device, const std::string_view archiveAlias) +{ + assert(system); + if (!system) + return nullptr; + + const auto composed = path(archiveAlias.data()) / builtin::build::get_spirv_key<"full_screen_triangle_vertex">(device); + if (system->exists(composed, {})) + return nullptr; + +#ifdef NBL_EMBED_BUILTIN_RESOURCES + auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); +#else + auto archive = make_smart_refctd_ptr(std::string_view(NBL_FULL_SCREEN_TRIANGLE_HLSL_MOUNT_POINT), smart_refctd_ptr(logger), system); +#endif + + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); + return smart_refctd_ptr(archive); +} + +smart_refctd_ptr ProtoPipeline::createDefaultVertexShader(IAssetManager* assMan, ILogicalDevice* device, ILogger* logger) +{ + if (!assMan || !device) + return nullptr; + + auto system = smart_refctd_ptr(assMan->getSystem()); + if (system) + ProtoPipeline::mount(smart_refctd_ptr(logger), system.get(), device, NBL_EXT_MOUNT_ENTRY); + + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = logger; + lp.workingDirectory = NBL_EXT_MOUNT_ENTRY.data(); + + const auto key = builtin::build::get_spirv_key<"full_screen_triangle_vertex">(device); + auto bundle = assMan->getAsset(key.c_str(), lp); + const auto assets = bundle.getContents(); + if (assets.empty()) + return nullptr; + + auto source = IAsset::castDown(assets[0]); + if (!source) + return nullptr; + + return device->compileShader({.source = source.get(), .stage = hlsl::ESS_VERTEX}); +} + +ProtoPipeline::ProtoPipeline(IAssetManager* assMan, ILogicalDevice* device, ILogger* logger) +{ + m_vxShader = createDefaultVertexShader(assMan, device, logger); +} + +ProtoPipeline::ProtoPipeline(smart_refctd_ptr vertexShader, const char*) : m_vxShader(std::move(vertexShader)) {} + +ProtoPipeline::operator bool() const +{ + return m_vxShader.get(); +} + +smart_refctd_ptr ProtoPipeline::createPipeline( + const IGPUPipelineBase::SShaderSpecInfo& fragShader, + IGPUPipelineLayout* layout, + const IGPURenderpass* renderpass, + const uint32_t subpassIx, + SBlendParams blendParams, + const hlsl::SurfaceTransform::FLAG_BITS swapchainTransform) +{ + if (!renderpass || !bool(*this) || hlsl::bitCount(swapchainTransform) != 1) + return nullptr; + + auto device = const_cast(renderpass->getOriginDevice()); + + smart_refctd_ptr m_retval; + { + constexpr SRasterizationParams defaultRasterParams = { + .faceCullingMode = EFCM_NONE, + .depthWriteEnable = false, + .depthCompareOp = ECO_ALWAYS + }; + const auto orientationAsUint32 = static_cast(swapchainTransform); + + IGPUPipelineBase::SShaderEntryMap specConstants; + specConstants[0] = std::span{ reinterpret_cast(&orientationAsUint32), sizeof(orientationAsUint32) }; + + IGPUGraphicsPipeline::SCreationParams params[1]; + params[0].layout = layout; + params[0].vertexShader = { .shader = m_vxShader.get(), .entryPoint = VertexEntryPoint.data(), .entries = &specConstants }; + params[0].fragmentShader = fragShader; + params[0].cached = { + .vertexInput = {}, // The Full Screen Triangle doesn't use any HW vertex input state + .primitiveAssembly = {}, + .rasterization = defaultRasterParams, + .blend = blendParams, + .subpassIx = subpassIx + }; + params[0].renderpass = renderpass; + + if (!device->createGraphicsPipelines(nullptr, params, &m_retval)) + return nullptr; + } + return m_retval; +} + +bool recordDrawCall(IGPUCommandBuffer* commandBuffer) +{ + constexpr auto VERTEX_COUNT = 3; + constexpr auto INSTANCE_COUNT = 1; + return commandBuffer->draw(VERTEX_COUNT, INSTANCE_COUNT, 0, 0); +} + +} diff --git a/src/nbl/ext/FullScreenTriangle/CMakeLists.txt b/src/nbl/ext/FullScreenTriangle/CMakeLists.txt new file mode 100644 index 0000000000..6d30fd81bc --- /dev/null +++ b/src/nbl/ext/FullScreenTriangle/CMakeLists.txt @@ -0,0 +1,59 @@ +include(common) + +set(HEADERS + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext/FullScreenTriangle/FullScreenTriangle.h +) + +set(SRCS + ${CMAKE_CURRENT_SOURCE_DIR}/CFullScreenTriangle.cpp +) + +nbl_create_ext_library_project(FULL_SCREEN_TRIANGLE + "${HEADERS}" + "${SRCS}" + "" + "" + "" +) + +get_filename_component(DIR "${NBL_ROOT_PATH}/include/nbl/builtin/hlsl/ext/FullScreenTriangle" ABSOLUTE) +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "${DIR}/default.vert.hlsl", + "KEY": "full_screen_triangle_vertex", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -T vs_${SM} + -E __nbl__hlsl__ext__FullScreenTriangle__vertex_main +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${LIB_NAME}SPIRV + LINK_TO ${LIB_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_FULL_SCREEN_TRIANGLE_HLSL_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/ext/FullScreenTriangle/builtin/build/spirv/keys.hpp + NAMESPACE nbl::ext::FullScreenTriangle::builtin::build + INPUTS ${JSON} + GLOB_DIR ${DIR} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::ext::FullScreenTriangle::builtin::build + TARGET ${LIB_NAME}_builtinsBuild + LINK_TO ${LIB_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) + +add_library(Nabla::ext::FullScreenTriangle ALIAS ${LIB_NAME}) \ No newline at end of file From 25b4abacab4d9e3c94470697f944870c1d623d2e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 29 Jan 2026 09:59:20 +0100 Subject: [PATCH 449/472] remove old trash --- include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h | 1 - src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp | 2 -- 2 files changed, 3 deletions(-) diff --git a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h index 1779c6126e..597ebdbd4e 100644 --- a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h +++ b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h @@ -15,7 +15,6 @@ struct ProtoPipeline final static core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = "nbl/ext/FullScreenTriangle"); ProtoPipeline(asset::IAssetManager* assMan, video::ILogicalDevice* device, system::ILogger* logger=nullptr); - ProtoPipeline(core::smart_refctd_ptr vertexShader, const char* vertexEntryPoint="__nbl__hlsl__ext__FullScreenTriangle__vertex_main"); operator bool() const; diff --git a/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp b/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp index 5c58af3388..fd5411c2ab 100644 --- a/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp +++ b/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp @@ -73,8 +73,6 @@ ProtoPipeline::ProtoPipeline(IAssetManager* assMan, ILogicalDevice* device, ILog m_vxShader = createDefaultVertexShader(assMan, device, logger); } -ProtoPipeline::ProtoPipeline(smart_refctd_ptr vertexShader, const char*) : m_vxShader(std::move(vertexShader)) {} - ProtoPipeline::operator bool() const { return m_vxShader.get(); From 00c5379bc9a606fdf27776bf989f1fd47bfbe9b3 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 29 Jan 2026 16:55:34 +0700 Subject: [PATCH 450/472] enforce fullwidth if dim == storage dim --- examples_tests | 2 +- .../nbl/builtin/hlsl/sampling/quantized_sequence.hlsl | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/examples_tests b/examples_tests index 533a90e027..53667051b8 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 533a90e027fc263b6091fbc00eecd29a37f1d254 +Subproject commit 53667051b8dbc53ab8273df1b716b7faa9d97b54 diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index d3313d2875..64573ac85f 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -58,7 +58,10 @@ struct encode_helper uniform_storage_type asuint; NBL_UNROLL for(uint16_t i = 0; i < Dim; i++) asuint[i] = uniform_storage_scalar_type(unormvec[i] * UNormMultiplier); - return sequence_type::create(asuint); + NBL_IF_CONSTEXPR(Dim==1) + return sequence_type::create(asuint[0]); + else + return sequence_type::create(asuint); } }; @@ -134,7 +137,7 @@ struct QuantizedSequence static this_t encode(const vector value) { - return impl::encode_helper::__call(value); + return impl::encode_helper::__call(value); } template @@ -235,7 +238,7 @@ struct QuantizedSequence static this_t encode(const vector value) { - return impl::encode_helper::__call(value); + return impl::encode_helper::__call(value); } template @@ -346,7 +349,7 @@ struct QuantizedSequence && Di static this_t create(const vector value) { this_t seq; - seq.data = store_type(0u,0u); + seq.data = hlsl::promote(0u); NBL_UNROLL for (uint16_t i = 0; i < Dimension; i++) seq.set(i, value[i]); return seq; From ab5e771db460276b8f3accf7d869e91279a88f6d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 29 Jan 2026 16:59:02 +0700 Subject: [PATCH 451/472] latest example --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 53667051b8..8485356fc4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 53667051b8dbc53ab8273df1b716b7faa9d97b54 +Subproject commit 8485356fc4263232746b517d4eca602d56a16816 From a5f431d39c90a50229529be3561052a0fa214485 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 29 Jan 2026 17:50:49 +0700 Subject: [PATCH 452/472] latest example 2 --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 8485356fc4..40bc21ae4a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8485356fc4263232746b517d4eca602d56a16816 +Subproject commit 40bc21ae4aae26cf467910bf696d1e195ec8fc77 From a51e2700e427b6f754ae9beea0caea271eb6bc3a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 29 Jan 2026 12:57:34 +0100 Subject: [PATCH 453/472] update examples_tests --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index fd49d0a66f..f9e3554fcc 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit fd49d0a66fc43c6c1e282b1fd3a8943e2c584af4 +Subproject commit f9e3554fcc0e105f7e5fa742ed91816125455cd6 From 20aa5ab34a5203b532c4a12a6f43d631f265791d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 30 Jan 2026 10:43:09 +0700 Subject: [PATCH 454/472] fixes to bitfield insert/extract requires --- examples_tests | 2 +- include/nbl/builtin/hlsl/glsl_compat/core.hlsl | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples_tests b/examples_tests index 40bc21ae4a..63f0079781 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 40bc21ae4aae26cf467910bf696d1e195ec8fc77 +Subproject commit 63f0079781fe189c672297343b30308333646294 diff --git a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl index b112762568..235cdde8e4 100644 --- a/include/nbl/builtin/hlsl/glsl_compat/core.hlsl +++ b/include/nbl/builtin/hlsl/glsl_compat/core.hlsl @@ -29,7 +29,7 @@ struct bitfieldInsert; template NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> >= 4) -struct bitfieldInsert::scalar_type> && size_of_v::scalar_type> == 4) > +struct bitfieldInsert::scalar_type> && size_of_v::scalar_type> >= 4) > { static T __call( T base, T insert, uint32_t offset, uint32_t bits ) { @@ -42,7 +42,7 @@ struct bitfieldExtract; template NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> >= 4) -struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 4) > +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> >= 4) > { static T __call( T val, uint32_t offsetBits, uint32_t numBits ) { @@ -206,7 +206,7 @@ struct bitfieldInsert; template NBL_PARTIAL_REQ_TOP(concepts::Integral::scalar_type> && size_of_v::scalar_type> >= 4) -struct bitfieldInsert::scalar_type> && size_of_v::scalar_type> == 4) > +struct bitfieldInsert::scalar_type> && size_of_v::scalar_type> >= 4) > { static T __call( T base, T insert, uint32_t offset, uint32_t bits ) { @@ -219,7 +219,7 @@ struct bitfieldExtract; template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegral::scalar_type> && size_of_v::scalar_type> >= 4) -struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 4) > +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> >= 4) > { static T __call( T val, uint32_t offsetBits, uint32_t numBits ) { @@ -228,8 +228,8 @@ struct bitfieldExtract -NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> == 4) -struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> == 4) > +NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> >= 4) +struct bitfieldExtract::scalar_type> && size_of_v::scalar_type> >= 4) > { static T __call( T val, uint32_t offsetBits, uint32_t numBits ) { From 36965e1bec51690efd3484bd4ded523cbbffaef5 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 30 Jan 2026 13:55:50 +0700 Subject: [PATCH 455/472] latest example --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 63f0079781..301fb402e6 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 63f0079781fe189c672297343b30308333646294 +Subproject commit 301fb402e6d0d3c204b1da67e920283d6f9abca5 From cfab6f35f2e4816b288ff684a2e4e5bee5584bae Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 30 Jan 2026 22:08:30 +0100 Subject: [PATCH 456/472] start implementing ICPUScene ! --- include/nbl/asset/ICPUGeometryCollection.h | 12 +- include/nbl/asset/ICPUScene.h | 199 ++- include/nbl/asset/IGeometryCollection.h | 4 +- include/nbl/asset/IMorphTargets.h | 20 +- include/nbl/asset/interchange/IAssetLoader.h | 16 +- .../nbl/asset/material_compiler3/CTrueIR.h | 2 +- include/nbl/asset/metadata/IAssetMetadata.h | 9 +- .../metadata/IGeometryCollectionMetadata.h | 30 + include/nbl/core/containers/CMemoryPool.h | 7 +- .../nbl/ext/MitsubaLoader/CMitsubaLoader.h | 7 +- .../nbl/ext/MitsubaLoader/CMitsubaMetadata.h | 98 +- include/nbl/ext/MitsubaLoader/ParserUtil.h | 50 +- include/nbl/ext/MitsubaLoader/SContext.h | 58 +- src/nbl/ext/MitsubaLoader/CElementEmitter.cpp | 2 - src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 1279 ++++++++--------- src/nbl/ext/MitsubaLoader/ParserUtil.cpp | 44 +- src/nbl/video/CVulkanCommandPool.h | 1 - 17 files changed, 964 insertions(+), 874 deletions(-) create mode 100644 include/nbl/asset/metadata/IGeometryCollectionMetadata.h diff --git a/include/nbl/asset/ICPUGeometryCollection.h b/include/nbl/asset/ICPUGeometryCollection.h index 6202b4de12..d231f1df00 100644 --- a/include/nbl/asset/ICPUGeometryCollection.h +++ b/include/nbl/asset/ICPUGeometryCollection.h @@ -24,11 +24,15 @@ class NBL_API2 ICPUGeometryCollection : public IAsset, public IGeometryCollectio inline E_TYPE getAssetType() const override {return AssetType;} // - inline bool valid() const //override + inline bool valid() const override { for (const auto& ref : m_geometries) - if (!ref.geometry->valid()) - return false; + { + if (!ref.operator bool() || !ref.geometry->valid()) + return false; + if (ref.jointRedirectView.src && ref.jointRedirectView.composed.getRange>().maxVx[0]>=getJointCount()) + return false; + } return true; } @@ -61,6 +65,8 @@ class NBL_API2 ICPUGeometryCollection : public IAsset, public IGeometryCollectio return false; } + // + inline const core::vector& getGeometries() const {return base_t::getGeometries();} // inline core::vector* getGeometries() { diff --git a/include/nbl/asset/ICPUScene.h b/include/nbl/asset/ICPUScene.h index 4ea7a485b4..5b5c8dd4cd 100644 --- a/include/nbl/asset/ICPUScene.h +++ b/include/nbl/asset/ICPUScene.h @@ -5,49 +5,224 @@ #define _NBL_ASSET_I_CPU_SCENE_H_INCLUDED_ +#include "nbl/core/containers/CMemoryPool.h" + #include "nbl/asset/IScene.h" -// TODO: change to true IR later -#include "nbl/asset/material_compiler3/CFrontendIR.h" +#include "nbl/asset/material_compiler3/CTrueIR.h" namespace nbl::asset { // -class NBL_API2 ICPUScene : public IAsset, public IScene +class NBL_API2 ICPUScene final : public IAsset, public IScene { using base_t = IScene; + using material_table_allocator_t = core::GeneralpurposeAddressAllocatorST; +// using material_table_t = core::CMemoryPool<,core::allocator,false>; public: - inline ICPUScene() = default; + using material_pool_t = material_compiler3::CTrueIR; + // + static inline core::smart_refctd_ptr create(core::smart_refctd_ptr&& ir, const uint8_t maxMorphTargetGeometryCountLog2=16) + { + return core::smart_refctd_ptr(new ICPUScene(std::move(ir),maxMorphTargetGeometryCountLog2),core::dont_grab); + } constexpr static inline auto AssetType = ET_SCENE; inline E_TYPE getAssetType() const override { return AssetType; } inline bool valid() const override { + if (!m_instances) + return false; + auto materialTableOffsetIt = m_instances.materials.begin(); + for (const auto& targets : m_instances.morphTargets) + { + const auto materialTableOffset = *(materialTableOffsetIt++); + // TODO: check if `materialTableOffset` can be contained in `materialTable` + if (!targets || targets->valid()) + return false; + const auto geoCount = targets->getGeometryExclusiveCount({}); + // TODO: check if `materialTableOffset+geoCount` can be contained in `materialTable` + // TODO: check every material is either null or belongs in `m_materialPool` + } return true; } inline core::smart_refctd_ptr clone(uint32_t _depth=~0u) const { const auto nextDepth = _depth ? (_depth-1):0; - auto retval = core::smart_refctd_ptr(); + // TODO: copy the material_table state/contents! + // the True IR isn't an asset (yet), but it probably should be? + auto retval = create(core::smart_refctd_ptr(m_materialPool),m_maxMorphTargetGeometryCountLog2); + if (nextDepth) + { + retval->m_instances.morphTargets.resize(retval->m_instances.size()); + for (auto& targets : m_instances.morphTargets) + retval->m_instances.morphTargets.push_back(core::move_and_static_cast(targets->clone(nextDepth))); + retval->m_envLightTexs.reserve(m_envLightTexs.size()); + for (const auto& tex : m_envLightTexs) + retval->m_envLightTexs.push_back(core::move_and_static_cast(tex->clone(nextDepth))); + } + else + { + retval->m_instances = m_instances; + retval->m_envLightTexs = m_envLightTexs; + } + retval->m_instances.materials = m_instances.materials; + retval->m_instances.initialTransforms = m_instances.initialTransforms; + retval->m_envLightTypes = m_envLightTypes; return retval; } + // TODO: change to CRootNode + using material_t = material_pool_t::TypedHandle; + inline material_compiler3::CTrueIR* getMaterialPool() {return m_materialPool.get();} + inline const material_compiler3::CTrueIR* getMaterialPool() const {return m_materialPool.get();} + + // + using material_table_offset_t = uint32_t; + material_table_offset_t allocateMaterialTable(const ICPUMorphTargets* targets) + { + if (!targets) + return material_table_allocator_t::invalid_address; + return allocateMaterialTable(targets->getGeometryExclusiveCount({})); + } + material_table_offset_t allocateMaterialTable(const uint32_t count) + { + // TODO: implement + return material_table_allocator_t::invalid_address; + } + void deallocateMaterialTable(const material_table_offset_t offset, const ICPUMorphTargets* targets) + { + return deallocateMaterialTable(offset,targets->getGeometryExclusiveCount({})); + } + void deallocateMaterialTable(const material_table_offset_t offset, const uint32_t count) + { + // TODO: implement + } + + // TODO: get material table pointer + + // TODO: wrap up in some ECS storage class + struct SInstanceStorage final + { + public: + inline SInstanceStorage(const size_t size=1) : morphTargets(size), materials(size), initialTransforms(size) {} + + inline void clearInitialTransforms() {initialTransforms.clear();} + + inline operator bool() const + { + if (morphTargets.size()!=materials.size()) + return false; + if (initialTransforms.empty()) + return true; + return morphTargets.size()==initialTransforms.size(); + } + + inline size_t resize(const size_t newSize) + { + morphTargets.resize(newSize); + materials.resize(newSize); + if (!initialTransforms.empty()) + initialTransforms.resize(newSize); + } + + inline void erase(const size_t first, const size_t last) + { + morphTargets.erase(morphTargets.begin()+first,morphTargets.begin()+last); + materials.erase(materials.begin()+first, materials.begin()+last); + initialTransforms.erase(initialTransforms.begin()+first,initialTransforms.begin()+last); + } + inline void erase(const size_t ix) {return erase(ix,ix+1);} + + inline size_t size() const {return morphTargets.size();} + + private: + friend class ICPUScene; + + core::vector> morphTargets; + // One material table per morph target, + // Within each morph target, one material per geometry + core::vector materials; + core::vector initialTransforms; + // TODO: animations (keyframed transforms, skeleton instance) + }; + + // + inline SInstanceStorage& getInstances() {return m_instances;} + inline const SInstanceStorage& getInstances() const {return m_instances;} + + inline void setInstanceInitialTransform(const uint32_t index, const hlsl::float32_t3x4& xform) + { + if (index&& tex) + { + if (!tex) + return false; + using view_e = IImageViewBase::E_TYPE; + switch (tex->getCreationParameters().viewType) + { + case view_e::ET_2D: [[fallthrough]]; + case view_e::ET_2D_ARRAY: + m_envLightTypes.push_back(type); + break; + case view_e::ET_CUBE_MAP: [[fallthrough]]; + case view_e::ET_CUBE_MAP_ARRAY: + if (type!=EEnvLightType::Cubemap) + return false; + m_envLightTypes.push_back(EEnvLightType::Cubemap); + break; + default: + return false; + } + m_envLightTexs.push_back(std::move(tex)); + return true; + } + // + inline std::span getEnviornmentLightTypes() const {return m_envLightTypes;} + inline std::span getEnvironmentLightTextures() const {return {&m_envLightTexs.data()->get(),m_envLightTexs.size()}; } + // TODO: add an erase_if and erase with begin/end iterators + inline void clearEnvLights() + { + m_envLightTexs.clear(); + m_envLightTypes.clear(); + } + + // + hlsl::float32_t3 m_ambientLight; + protected: + inline ICPUScene(core::smart_refctd_ptr&& materialPool, const uint32_t maxMorphTargetGeometryCountLog2) : + m_materialPool(std::move(materialPool)), m_maxMorphTargetGeometryCountLog2(maxMorphTargetGeometryCountLog2) {} // inline void visitDependents_impl(std::function visit) const override { + assert(false && "Unimplemented"); // we'd probalby be going over the: morph targets, image views, ... } - - // suggested contents: - // - morph target list - // - material table - // - instance list (morph target, keyframed transforms, material table indexings, FUTURE: reference skeleton) - // - area light list (OBB decompositions, material table indexings) - // - envlight data + // +// TODO material_table_t m_materialTable; + core::smart_refctd_ptr m_materialPool; + // + SInstanceStorage m_instances; + // + core::vector> m_envLightTexs; + core::vector m_envLightTypes; + // + const uint8_t m_maxMorphTargetGeometryCountLog2; }; } diff --git a/include/nbl/asset/IGeometryCollection.h b/include/nbl/asset/IGeometryCollection.h index 29c145c115..8c649435f6 100644 --- a/include/nbl/asset/IGeometryCollection.h +++ b/include/nbl/asset/IGeometryCollection.h @@ -29,9 +29,9 @@ class NBL_API2 IGeometryCollection : public virtual core::IReferenceCounted return false; if (jointRedirectView.src) { - if (!jointRedirectView.isFormattedScalarInteger()) + if (!jointRedirectView.composed.isFormattedScalarInteger()) return false; - if (jointRedirectView.getElementCount()getJointCount()*2) + if (jointRedirectView.getElementCount()getJointCount()) return false; } else diff --git a/include/nbl/asset/IMorphTargets.h b/include/nbl/asset/IMorphTargets.h index 14265aa71a..6f208c6f73 100644 --- a/include/nbl/asset/IMorphTargets.h +++ b/include/nbl/asset/IMorphTargets.h @@ -19,6 +19,7 @@ class NBL_API2 IMorphTargets : public virtual core::IReferenceCounted public: struct index_t { + inline index_t() = default; explicit inline index_t(uint32_t _value) : value(_value) {} inline operator bool() const {return value!=(~0u);} @@ -30,6 +31,19 @@ class NBL_API2 IMorphTargets : public virtual core::IReferenceCounted { return static_cast(m_targets.size()); } + // + virtual inline uint32_t getGeometryExclusiveCount(index_t index) const + { + if (const auto targetCount=getTargetCount(); index.value>targetCount) + index.value = targetCount; + uint32_t retval = 0; + for (uint32_t i=0; igetGeometries().size(); + } + return retval; + } template requires std::is_floating_point_v struct SInterpolants @@ -56,7 +70,11 @@ class NBL_API2 IMorphTargets : public virtual core::IReferenceCounted { inline operator bool() const { - return geoCollection && (!jointRedirectView || jointRedirectView.composed.isFormattedScalarInteger()); + if (!geoCollection) + return false; + if (!jointRedirectView) + return true; + return jointRedirectView.composed.isFormattedScalarInteger() && jointRedirectView.getElementCount()>=geoCollection->getJointCount(); } core::smart_refctd_ptr geoCollection = {}; diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 3658f67026..09f842e659 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -87,24 +87,24 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted enum E_LOADER_PARAMETER_FLAGS : uint64_t { - ELPF_NONE = 0, //!< default value, it doesn't do anything - /*deprecated*/ELPF_RIGHT_HANDED_MESHES = 0x1, //!< specifies that a mesh will be flipped in such a way that it'll look correctly in right-handed camera system - /*deprecated*/ELPF_DONT_COMPILE_GLSL = 0x2, //!< it states that GLSL won't be compiled to SPIR-V if it is loaded or generated - ELPF_LOAD_METADATA_ONLY = 0x4 //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. + ELPF_NONE = 0, //!< default value, it doesn't do anything +//[[deprecated]] ELPF_RIGHT_HANDED_MESHES = 0x1, //!< specifies that a mesh will be flipped in such a way that it'll look correctly in right-handed camera system +//[[deprecated]] ELPF_DONT_COMPILE_GLSL = 0x2, //!< it states that GLSL won't be compiled to SPIR-V if it is loaded or generated + ELPF_LOAD_METADATA_ONLY = 0x4 //!< it forces the loader to not load the entire scene for performance in special cases to fetch metadata. }; struct SAssetLoadParams { - SAssetLoadParams(size_t _decryptionKeyLen = 0u, const uint8_t* _decryptionKey = nullptr, - E_CACHING_FLAGS _cacheFlags = ECF_CACHE_EVERYTHING,const E_LOADER_PARAMETER_FLAGS& _loaderFlags = ELPF_NONE, - system::logger_opt_ptr _logger = nullptr, const std::filesystem::path& cwd = "") : + inline SAssetLoadParams(const size_t _decryptionKeyLen = 0u, const uint8_t* const _decryptionKey = nullptr, + const E_CACHING_FLAGS _cacheFlags = ECF_CACHE_EVERYTHING,const E_LOADER_PARAMETER_FLAGS _loaderFlags = ELPF_NONE, + const system::logger_opt_ptr _logger = nullptr, const std::filesystem::path& cwd = "") : decryptionKeyLen(_decryptionKeyLen), decryptionKey(_decryptionKey), cacheFlags(_cacheFlags), loaderFlags(_loaderFlags), logger(std::move(_logger)), workingDirectory(cwd) { } - SAssetLoadParams(const SAssetLoadParams& rhs, bool _reload = false) : + inline SAssetLoadParams(const SAssetLoadParams& rhs, const bool _reload=false) : decryptionKeyLen(rhs.decryptionKeyLen), decryptionKey(rhs.decryptionKey), cacheFlags(rhs.cacheFlags), diff --git a/include/nbl/asset/material_compiler3/CTrueIR.h b/include/nbl/asset/material_compiler3/CTrueIR.h index a6937f00ab..3d5c89ff58 100644 --- a/include/nbl/asset/material_compiler3/CTrueIR.h +++ b/include/nbl/asset/material_compiler3/CTrueIR.h @@ -17,7 +17,7 @@ class CTrueIR : public CNodePool { public: // constructor - inline core::smart_refctd_ptr create(const uint8_t chunkSizeLog2=19, const uint8_t maxNodeAlignLog2=4, refctd_pmr_t&& _pmr={}) + static inline core::smart_refctd_ptr create(const uint8_t chunkSizeLog2=19, const uint8_t maxNodeAlignLog2=4, refctd_pmr_t&& _pmr={}) { if (chunkSizeLog2<14 || maxNodeAlignLog2<4) return nullptr; diff --git a/include/nbl/asset/metadata/IAssetMetadata.h b/include/nbl/asset/metadata/IAssetMetadata.h index 584fe474f8..d574091dd2 100644 --- a/include/nbl/asset/metadata/IAssetMetadata.h +++ b/include/nbl/asset/metadata/IAssetMetadata.h @@ -10,6 +10,7 @@ #include "nbl/asset/metadata/IImageMetadata.h" #include "nbl/asset/metadata/IImageViewMetadata.h" #include "nbl/asset/metadata/IPolygonGeometryMetadata.h" +#include "nbl/asset/metadata/IGeometryCollectionMetadata.h" namespace nbl::asset @@ -38,6 +39,11 @@ struct IAssetMetadata_base::asset_metadata { using type = IPolygonGeometryMetadata; }; +template<> +struct IAssetMetadata_base::asset_metadata +{ + using type = IGeometryCollectionMetadata; +}; } @@ -79,7 +85,8 @@ class IAssetMetadata : public impl::IAssetMetadata_base std::tuple< asset_metadata_map_t, asset_metadata_map_t, - asset_metadata_map_t + asset_metadata_map_t, + asset_metadata_map_t > m_metaMaps; diff --git a/include/nbl/asset/metadata/IGeometryCollectionMetadata.h b/include/nbl/asset/metadata/IGeometryCollectionMetadata.h new file mode 100644 index 0000000000..02ace03f82 --- /dev/null +++ b/include/nbl/asset/metadata/IGeometryCollectionMetadata.h @@ -0,0 +1,30 @@ +// Copyright (C) 2025-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_ASSET_I_GEOMETRY_COLLECTION_METADATA_H_INCLUDED_ +#define _NBL_ASSET_I_GEOMETRY_COLLECTION_METADATA_H_INCLUDED_ + + +#include "nbl/asset/ICPUGeometryCollection.h" + + +namespace nbl::asset +{ + +//! +class IGeometryCollectionMetadata : public core::Interface +{ + public: + inline IGeometryCollectionMetadata() = default; + + protected: + virtual ~IGeometryCollectionMetadata() = default; + + inline IGeometryCollectionMetadata& operator=(IGeometryCollectionMetadata&& other) + { + return *this; + } +}; + +} +#endif diff --git a/include/nbl/core/containers/CMemoryPool.h b/include/nbl/core/containers/CMemoryPool.h index 85488d8e7d..737848ebff 100644 --- a/include/nbl/core/containers/CMemoryPool.h +++ b/include/nbl/core/containers/CMemoryPool.h @@ -1,5 +1,6 @@ -#ifndef __NBL_C_MEMORY_POOL_H_INCLUDED__ -#define __NBL_C_MEMORY_POOL_H_INCLUDED__ +#ifndef _NBL_CORE_C_MEMORY_POOL_H_INCLUDED_ +#define _NBL_CORE_C_MEMORY_POOL_H_INCLUDED_ + #include "nbl/core/decl/compile_config.h" #include "nbl/core/alloc/SimpleBlockBasedAllocator.h" @@ -8,9 +9,11 @@ #include #include + namespace nbl::core { +// TODO: change DataAllocator to PMR template class DataAllocator, bool isThreadSafe, typename... Args> class CMemoryPool : public Uncopyable { diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h index 95b2f45c41..f918962b3c 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h @@ -9,7 +9,6 @@ #include "nbl/ext/MitsubaLoader/CSerializedLoader.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" -//#include "nbl/ext/MitsubaLoader/CElementShape.h" #include "nbl/ext/MitsubaLoader/SContext.h" @@ -57,12 +56,8 @@ class CMitsubaLoader final : public asset::ISceneLoader //! Destructor virtual ~CMitsubaLoader() = default; -#if 0 - // - core::vector getMesh(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape); - core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const core::matrix3x4SIMD& relTform); - SContext::shape_ass_type loadBasicShape(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const core::matrix3x4SIMD& relTform); +#if 0 void cacheTexture(SContext& ctx, uint32_t hierarchyLevel, const CElementTexture* texture, const CMitsubaMaterialCompilerFrontend::E_IMAGE_VIEW_SEMANTIC semantic); void cacheEmissionProfile(SContext& ctx, const CElementEmissionProfile* profile); diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h index 6f24951c50..7c073e9058 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h @@ -8,17 +8,15 @@ #include "nbl/asset/metadata/IAssetMetadata.h" #include "nbl/asset/ICPUImage.h" -//#include "nbl/ext/MitsubaLoader/SContext.h" -//#include "nbl/ext/MitsubaLoader/CElementEmitter.h" #include "nbl/ext/MitsubaLoader/CElementIntegrator.h" #include "nbl/ext/MitsubaLoader/CElementSensor.h" -//#include "nbl/ext/MitsubaLoader/CElementShape.h" +#include "nbl/ext/MitsubaLoader/CElementShape.h" namespace nbl::ext::MitsubaLoader { -//! A class to derive mitsuba mesh loader metadata objects from +//! A class to derive mitsuba scene loader metadata objects from class CMitsubaMetadata : public asset::IAssetMetadata { public: @@ -27,84 +25,70 @@ class CMitsubaMetadata : public asset::IAssetMetadata public: std::string m_id; }; -#if 0 - class CMesh : public asset::IMeshMetadata, public CID + class IGeometry : public CID { public: - CMesh() : IMeshMetadata(), CID(), type(CElementShape::Type::INVALID) {} - ~CMesh() {} + inline IGeometry() : CID(), type(CElementShape::Type::INVALID) {} + inline ~IGeometry() = default; CElementShape::Type type; }; -#endif + class CPolygonGeometry final : public asset::IPolygonGeometryMetadata, public IGeometry + { + public: + inline CPolygonGeometry() : asset::IPolygonGeometryMetadata(), IGeometry() {} + inline ~CPolygonGeometry() = default; + + inline CPolygonGeometry& operator=(CPolygonGeometry&& other) = default; + }; + class CGeometryCollection final : public asset::IGeometryCollectionMetadata, public CID + { + public: + inline CGeometryCollection() : asset::IGeometryCollectionMetadata(), CID() {} + inline ~CGeometryCollection() = default; + }; + struct SGlobal { public: - inline SGlobal() : m_integrator("invalid") {}// TODO + inline SGlobal() : m_integrator("invalid") {} CElementIntegrator m_integrator; core::vector m_sensors; } m_global; - inline CMitsubaMetadata() : IAssetMetadata()/*, m_metaMeshStorage(), m_metaMeshInstanceStorage(), m_metaMeshInstanceAuxStorage(), - m_meshStorageIt(nullptr), m_instanceStorageIt(nullptr), m_instanceAuxStorageIt(nullptr)*/ - { - } + inline CMitsubaMetadata() : IAssetMetadata(), m_metaPolygonGeometryStorage() {} constexpr static inline const char* LoaderName = "ext::MitsubaLoader::CMitsubaLoader"; const char* getLoaderName() const override {return LoaderName;} -#if 0 - //! - inline const CMesh* getAssetSpecificMetadata(const asset::ICPUMesh* asset) const + + // add more overloads when more asset implementations of IGeometry exist + inline const CPolygonGeometry* getAssetSpecificMetadata(const asset::ICPUPolygonGeometry* asset) const { const auto found = IAssetMetadata::getAssetSpecificMetadata(asset); - return static_cast(found); + return static_cast(found); } -#endif - private: -// friend class CMitsubaLoader; -#if 0 - meta_container_t m_metaMeshStorage; - CMesh* m_meshStorageIt; - inline void reserveMeshStorage(uint32_t meshCount, uint32_t instanceCount) + private: + friend struct SContext; + struct SGeometryMetaPair { - m_metaMeshStorage = IAssetMetadata::createContainer(meshCount); - m_metaMeshInstanceStorage = IAssetMetadata::createContainer(instanceCount); - m_metaMeshInstanceAuxStorage = IAssetMetadata::createContainer(instanceCount); - m_meshStorageIt = m_metaMeshStorage->begin(); - m_instanceStorageIt = m_metaMeshInstanceStorage->begin(); - m_instanceAuxStorageIt = m_metaMeshInstanceAuxStorage->begin(); - } - template - inline uint32_t addMeshMeta(const asset::ICPUMesh* mesh, std::string&& id, const CElementShape::Type type, InstanceIterator instancesBegin, InstanceIterator instancesEnd) + core::smart_refctd_ptr geom; + CMitsubaMetadata::CPolygonGeometry meta; + }; + inline void setPolygonGeometryMeta(core::unordered_map&& container) { - auto instanceStorageBegin = m_instanceStorageIt; - auto instanceAuxStorageBegin = m_instanceAuxStorageIt; - - auto* meta = m_meshStorageIt++; - meta->m_id = std::move(id); + const uint32_t count = container.size(); + m_metaPolygonGeometryStorage = IAssetMetadata::createContainer(count); + auto outIt = m_metaPolygonGeometryStorage->begin(); + for (auto& el : container) { - // copy instance data - for (auto it=instancesBegin; it!=instancesEnd; ++it) - { - auto& inst = it->second; - (m_instanceStorageIt++)->worldTform = inst.tform; - *(m_instanceAuxStorageIt++) = { - inst.emitter.front, - inst.emitter.back, - inst.bsdf - }; - } - meta->m_instances = { instanceStorageBegin,m_instanceStorageIt }; - meta->m_instanceAuxData = { instanceAuxStorageBegin,m_instanceAuxStorageIt }; + *outIt = std::move(el.second.meta); + IAssetMetadata::insertAssetSpecificMetadata(el.second.geom.get(),outIt++); } - meta->type = type; - IAssetMetadata::insertAssetSpecificMetadata(mesh,meta); - - return meta->m_instances.size(); } -#endif + + meta_container_t m_metaPolygonGeometryStorage; }; } diff --git a/include/nbl/ext/MitsubaLoader/ParserUtil.h b/include/nbl/ext/MitsubaLoader/ParserUtil.h index 787c1a534a..5e33951348 100644 --- a/include/nbl/ext/MitsubaLoader/ParserUtil.h +++ b/include/nbl/ext/MitsubaLoader/ParserUtil.h @@ -32,7 +32,7 @@ class IElement; // TODO: replace with common Class for Material Compiler V3 Node Pool template -class ElementPool // similar to : public std::tuple...> +class ElementPool final : public core::Unmovable// similar to : public std::tuple...> { core::SimpleBlockBasedAllocator,core::aligned_allocator> poolAllocator; public: @@ -64,14 +64,32 @@ class ParserManager final system::ISystem* system; asset::IAssetLoader::IAssetLoaderOverride* _override; }; - struct Result + template + struct SNamedElement { - explicit inline operator bool() const {return bool(metadata);} - - // note that its shared between per-file contexts - core::smart_refctd_ptr metadata = nullptr; - // - core::vector > shapegroups = {}; + ElementT* element = nullptr; + core::string name = {}; + }; + struct Result final + { + public: + explicit inline operator bool() const {return bool(metadata);} + + // note that its shared between per-file contexts + core::smart_refctd_ptr metadata = nullptr; + // + using emitter_t = SNamedElement; + core::vector emitters = {}; + // + using shape_group_t = SNamedElement; + core::vector shapegroups = {}; + // + hlsl::float32_t3 ambient = {0,0,0}; + + private: + friend class ParserManager; + // TODO: This leaks memory all over the place because destructors are not ran! + std::unique_ptr> objects = std::make_unique>(); }; Result parse(system::IFile* _file, const Params& _params) const; @@ -97,11 +115,6 @@ class ParserManager final private: const core::tuple_transform_t addPropertyMaps; - struct SNamedElement - { - IElement* element = nullptr; - core::string name = {}; - }; // the XMLs can include each other, so this stores the stuff across files struct SessionContext { @@ -120,13 +133,12 @@ class ParserManager final const ParserManager* const manager; // uint32_t sceneDeclCount = 0; - // TODO: This leaks memory all over the place because destructors are not ran! - ElementPool<> objects = {}; // aliases and names (in Mitsbua XML you can give nodes names and `ref` them) core::unordered_map handles = {}; // stack of currently processed elements, each element of index N is parent of the element of index N+1 // the scene element is a parent of all elements of index 0 - core::stack elements = {}; + using named_element_t = SNamedElement; + core::stack elements = {}; }; // This is for a single XML File struct XMLContext @@ -147,7 +159,7 @@ class ParserManager final { // we still push nullptr (failed creation) onto the stack, we only stop parse on catastrphic failure later on if a use of the element pops up // this is why we don't need XMLCOntext for `killParseWithError` - using func_t = SNamedElement(*)(const char**/*attributes*/,SessionContext*); + using func_t = SessionContext::named_element_t(*)(const char**/*attributes*/,SessionContext*); func_t create; bool retvalGoesOnStack; }; @@ -156,8 +168,8 @@ class ParserManager final template struct CreateElement; // - static SNamedElement processAlias(const char** _atts, SessionContext* ctx); - static SNamedElement processRef(const char** _atts, SessionContext* ctx); + static SessionContext::named_element_t processAlias(const char** _atts, SessionContext* ctx); + static SessionContext::named_element_t processRef(const char** _atts, SessionContext* ctx); }; } diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index c1d9c6d9b1..a65854bedc 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -9,15 +9,14 @@ //#include "nbl/asset/utils/IGeometryCreator.h" #include "nbl/asset/interchange/CIESProfileLoader.h" +#include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" //#include "nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h" -//#include "nbl/ext/MitsubaLoader/CElementShape.h" + namespace nbl::ext::MitsubaLoader { -class CMitsubaMetadata; - -struct SContext +struct SContext final { public: SContext( @@ -28,19 +27,29 @@ struct SContext CMitsubaMetadata* _metadata ); + using shape_ass_type = core::smart_refctd_ptr; + shape_ass_type loadBasicShape(const uint32_t hierarchyLevel, const CElementShape* shape); + using group_ass_type = core::smart_refctd_ptr; + group_ass_type loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup); + + inline void transferMetadata() + { + meta->setPolygonGeometryMeta(std::move(shapeCache)); + } + // const asset::IGeometryCreator* creator; // const asset::IMeshManipulator* manipulator; const asset::IAssetLoader::SAssetLoadContext inner; asset::IAssetLoader::IAssetLoaderOverride* override_; CMitsubaMetadata* meta; + core::smart_refctd_ptr scene; -#if 0 + private: // - using group_ass_type = core::vector>; - //core::map groupCache; + core::unordered_map groupCache; // - using shape_ass_type = core::smart_refctd_ptr; - core::map shapeCache; + core::unordered_map shapeCache; +#if 0 //image, sampler using tex_ass_type = std::tuple,core::smart_refctd_ptr>; //image, scale @@ -167,37 +176,8 @@ struct SContext using bsdf_type = const CMitsubaMaterialCompilerFrontend::front_and_back_t; //caches instr buffer instr-wise offset (.first) and instruction count (.second) for each bsdf node core::unordered_map instrStreamCache; - - struct SInstanceData - { - SInstanceData(core::matrix3x4SIMD _tform, SContext::bsdf_type _bsdf, const std::string& _id, const CElementEmitter& _emitterFront, const CElementEmitter& _emitterBack) : - tform(_tform), bsdf(_bsdf), -#if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) - bsdf_id(_id), -#endif - emitter{_emitterFront, _emitterBack} - {} - - core::matrix3x4SIMD tform; - SContext::bsdf_type bsdf; -#if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) - std::string bsdf_id; #endif - struct { - // type is invalid if not used - CElementEmitter front; - CElementEmitter back; - } emitter; - }; - core::unordered_multimap mapMesh2instanceData; - - core::unordered_map, SPipelineCacheKey::hash> pipelineCache; -#endif - //material compiler -// core::smart_refctd_ptr ir; -// CMitsubaMaterialCompilerFrontend frontend; - - private: + core::smart_refctd_ptr frontIR; }; } diff --git a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp index 00cf848067..93b77b4c3a 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp @@ -197,8 +197,6 @@ bool CElementEmitter::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_ case Type::AREA: break; default: - // TODO: slap into the scene instead! -// globalMetadata->m_global.m_emitters.push_back(*this); break; } diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index a852978d75..4c23c41781 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -28,15 +28,6 @@ namespace ext::MitsubaLoader { #if 0 // old material compiler -_NBL_STATIC_INLINE_CONSTEXPR const char* FRAGMENT_SHADER_INPUT_OUTPUT = -R"( -layout (location = 0) in vec3 WorldPos; -layout (location = 1) flat in uint InstanceIndex; -layout (location = 2) in vec3 Normal; -layout (location = 3) in vec2 UV; - -layout (location = 0) out vec4 OutColor; -)"; _NBL_STATIC_INLINE_CONSTEXPR const char* FRAGMENT_SHADER_DEFINITIONS = R"( #include @@ -135,23 +126,6 @@ static core::smart_refctd_ptr createFragmentShader #endif #if 0 -static core::smart_refctd_ptr createImageView(core::smart_refctd_ptr&& _img) // TODO: this should seriously be a utility somewhere -{ - const auto& iparams = _img->getCreationParameters(); - - ICPUImageView::SCreationParams params; - params.format = iparams.format; - params.subresourceRange.baseArrayLayer = 0u; - params.subresourceRange.layerCount = iparams.arrayLayers; - assert(params.subresourceRange.layerCount == 1u); - params.subresourceRange.baseMipLevel = 0u; - params.subresourceRange.levelCount = iparams.mipLevels; - params.viewType = IImageView::ET_2D; - params.flags = static_cast::E_CREATE_FLAGS>(0); - params.image = std::move(_img); - - return ICPUImageView::create(std::move(params)); -} static core::smart_refctd_ptr createDerivMap(SContext& ctx, asset::ICPUImage* _heightMap, const ICPUSampler::SParams& _samplerParams, bool fromNormalMap) { core::smart_refctd_ptr derivmap_img; @@ -302,61 +276,77 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: if (!result) return {}; - auto scene = core::make_smart_refctd_ptr(); if (_params.loaderFlags&IAssetLoader::ELPF_LOAD_METADATA_ONLY) { - return SAssetBundle(std::move(result.metadata),{std::move(scene)}); + return SAssetBundle(std::move(result.metadata),{ICPUScene::create(nullptr)}); } else { -#if 0 SContext ctx( // m_assetMgr->getGeometryCreator(), // m_assetMgr->getMeshManipulator(), IAssetLoader::SAssetLoadContext{ - IAssetLoader::SAssetLoadParams(_params.decryptionKeyLen,_params.decryptionKey,_params.cacheFlags,_params.logger,_file->getFileName().parent_path()), + IAssetLoader::SAssetLoadParams(_params.decryptionKeyLen,_params.decryptionKey,_params.cacheFlags,_params.loaderFlags,_params.logger,_file->getFileName().parent_path()), _file }, _override, - parserManager.m_metadata.get() + result.metadata.get() ); + // + ctx.scene->m_ambientLight = result.ambient; - core::map,std::pair> meshes; - for (auto& shapepair : parserManager.shapegroups) + // first go over all actually used shapes which are not shapegroups (regular shapes and instances) + for (auto& shapepair : result.shapegroups) { - auto* shapedef = shapepair.first; - if (shapedef->type == CElementShape::Type::SHAPEGROUP) + auto* shapedef = shapepair.element; + // this should be filtered out while parsing and we should just assert it + if (shapedef->type==CElementShape::Type::SHAPEGROUP) continue; - auto lowermeshes = getMesh(ctx, _hierarchyLevel, shapedef); - for (auto& mesh : lowermeshes) + if (shapedef->type!=CElementShape::Type::INSTANCE) + { + auto geometry = ctx.loadBasicShape(_hierarchyLevel,shapedef); + // TODO: add to geometry collection, make a morph target, and add to scene + shapedef->getAbsoluteTransform(); + } + else // mitsuba is weird and lists instances under a shapegroup instead of having instances reference the shapegroup { - if (!mesh) + // get group reference + const CElementShape* parent = shapedef->instance.parent; + if (!parent) // we should probably assert this continue; - - auto found = meshes.find(mesh); - if (found == meshes.end()) - meshes.emplace(std::move(mesh),std::pair(std::move(shapepair.second),shapedef->type)); + assert(parent->type==CElementShape::Type::SHAPEGROUP); + const CElementShape::ShapeGroup* shapegroup = &parent->shapegroup; + auto collection = ctx.loadShapeGroup(_hierarchyLevel,shapegroup); + // TODO: make a morph target and add to scene with transform of + shapedef->getAbsoluteTransform(); } } + result.shapegroups.clear(); - parserManager.m_metadata->reserveMeshStorage(meshes.size(),ctx.mapMesh2instanceData.size()); - for (auto& mesh : meshes) - { - auto instances_rng = ctx.mapMesh2instanceData.equal_range(mesh.first.get()); - assert(instances_rng.first!=instances_rng.second); +#if 0 + // TODO: add materials (incl emission) to the instances + auto addInstance = [shape,&ctx,&relTform,this](SContext::shape_ass_type& mesh) + { + auto emitter = shape->obtainEmitter(); + core::matrix3x4SIMD tform = core::concatenateBFollowedByA(relTform, shape->getAbsoluteTransform()); + auto bsdf = getBSDFtreeTraversal(ctx, shape->bsdf, &emitter, core::matrix4SIMD(tform)); - const uint32_t instanceCount = parserManager.m_metadata->addMeshMeta(mesh.first.get(),std::move(mesh.second.first),mesh.second.second,instances_rng.first,instances_rng.second); - for (auto mb : mesh.first.get()->getMeshBuffers()) - mb->setInstanceCount(instanceCount); - } + SContext::SInstanceData instance( + tform, + bsdf, +#if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) + shape->bsdf ? shape->bsdf->id : "", #endif + emitter, + CElementEmitter{} // no backface emission + ); + ctx.mapMesh2instanceData.insert({ mesh.get(), instance }); + }; -#if 0 // TODO: put IR and stuff in metadata so that we can recompile the materials after load auto compResult = ctx.backend.compile(&ctx.backend_ctx, ctx.ir.get(), decltype(ctx.backend)::EGST_PRESENT_WITH_AOV_EXTRACTION); ctx.backend_ctx.vt.commitAll(); - auto pipelineLayout = createPipelineLayout(m_assetMgr, ctx.backend_ctx.vt.getCPUVirtualTexture()); auto fragShader = createFragmentShader(compResult, ctx.backend_ctx.vt.getCPUVirtualTexture()->getFloatViews().size()); auto ds0 = createDS0(ctx, pipelineLayout.get(), compResult, meshes.begin(), meshes.end()); auto basePipeline = createPipeline( @@ -373,570 +363,211 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: { ctx.meta->addDerivMapMeta(derivMap.first.get(), derivMap.second); } - - auto meshSmartPtrArray = core::make_refctd_dynamic_array(meshes.size()); - auto meshSmartPtrArrayIt = meshSmartPtrArray->begin(); - for (const auto& mesh_ : meshes) - { - for (auto mb : mesh_.first.get()->getMeshBuffers()) - { - const auto* prevPipeline = mb->getPipeline(); - SContext::SPipelineCacheKey cacheKey; - cacheKey.vtxParams = prevPipeline->getVertexInputParams(); - cacheKey.primParams = prevPipeline->getPrimitiveAssemblyParams(); - auto found = ctx.pipelineCache.find(cacheKey); - core::smart_refctd_ptr pipeline; - if (found != ctx.pipelineCache.end()) - { - pipeline = found->second; - } - else - { - pipeline = core::smart_refctd_ptr_static_cast(//shallow copy because we're only going to override parameter structs - basePipeline->clone(0u) - ); - pipeline->getVertexInputParams() = cacheKey.vtxParams; - pipeline->getPrimitiveAssemblyParams() = cacheKey.primParams; - ctx.pipelineCache.insert({ cacheKey, pipeline }); - } - - mb->setPipeline(core::smart_refctd_ptr(pipeline)); - } - *(meshSmartPtrArrayIt++) = std::move(mesh_.first); - } - - parserManager.m_metadata->reservePplnStorage(ctx.pipelineCache.size(),core::smart_refctd_ptr(IRenderpassIndependentPipelineLoader::m_basicViewParamsSemantics)); - for (auto& ppln : ctx.pipelineCache) - parserManager.m_metadata->addPplnMeta(ppln.second.get(),core::smart_refctd_ptr(ds0)); - - for (const auto& emitter : parserManager.m_metadata->m_global.m_emitters) +#endif + for (const auto& emitter : result.emitters) { - if(emitter.type == ext::MitsubaLoader::CElementEmitter::Type::ENVMAP) + if(emitter.element->type == ext::MitsubaLoader::CElementEmitter::Type::ENVMAP) { - assert(emitter.envmap.filename.type==ext::MitsubaLoader::SPropertyElementData::Type::STRING); - auto envfilename = emitter.envmap.filename.svalue; - SAssetBundle envmapImageBundle = interm_getAssetInHierarchy(m_assetMgr, envfilename, ctx.inner.params, _hierarchyLevel, ctx.override_); + const auto& envmap = emitter.element->envmap; +#if 0 + SAssetBundle envmapImageBundle = interm_getAssetInHierarchy(m_assetMgr,envmap.filename,ctx.inner.params,_hierarchyLevel,ctx.override_); auto contentRange = envmapImageBundle.getContents(); if (contentRange.empty()) { os::Printer::log(std::string("[ERROR] Could Not Find Envmap Image: ") + envfilename, ELL_ERROR); continue; } - if (envmapImageBundle.getAssetType()!=asset::IAsset::ET_IMAGE) + core::smart_refctd_ptr view = {}; + switch(envmapImageBundle.getAssetType()) { - os::Printer::log("[ERROR] Loaded an Asset for the Envmap but it wasn't an image, was E_ASSET_TYPE " + std::to_string(envmapImageBundle.getAssetType()), ELL_ERROR); - continue; + case asset::IAsset::ET_IMAGE: + { + // TODO: create image view + } + [[fallthrough]]; + case asset::IAsset::ET_IMAGE_VIEW: + view = core::smart_refctd_ptr_static_cast(*contentRange.begin()); + break; + default: + os::Printer::log("[ERROR] Loaded an Asset for the Envmap but it wasn't an image, was E_ASSET_TYPE " + std::to_string(envmapImageBundle.getAssetType()), ELL_ERROR); + break; } - parserManager.m_metadata->m_global.m_envMapImages.push_back(core::smart_refctd_ptr_static_cast(*contentRange.begin())); + ctx.scene->addEnvLight(ICPUScene::EEnvLightType::SphereMap,std::move(view)); +#endif } } -#endif - return asset::SAssetBundle(std::move(result.metadata),{std::move(scene)}); + + ctx.transferMetadata(); + return asset::SAssetBundle(std::move(result.metadata),{std::move(ctx.scene)}); } } #if 0 -core::vector CMitsubaLoader::getMesh(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape) +void CMitsubaLoader::cacheEmissionProfile(SContext& ctx, const CElementEmissionProfile* profile) { - if (!shape) - return {}; + if (!profile) + return; - if (shape->type!=CElementShape::Type::INSTANCE) - return {loadBasicShape(ctx, hierarchyLevel, shape, core::matrix3x4SIMD())}; - else + auto params = ctx.inner.params; + params.loaderFlags = asset::IAssetLoader::ELPF_LOAD_METADATA_ONLY; + + auto assetLoaded = interm_getAssetInHierarchy(m_assetMgr, profile->filename, params, 0u, ctx.override_); + + if (!assetLoaded.getMetadata()) { - core::matrix3x4SIMD relTform = shape->getAbsoluteTransform(); - // get group reference - const CElementShape* parent = shape->instance.parent; - if (!parent) - return {}; - assert(parent->type==CElementShape::Type::SHAPEGROUP); - const CElementShape::ShapeGroup* shapegroup = &parent->shapegroup; - - return loadShapeGroup(ctx, hierarchyLevel, shapegroup, relTform); + os::Printer::log("[ERROR] Could Not Find Emission Profile: " + profile->filename, ELL_ERROR); + return; } } -core::vector CMitsubaLoader::loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const core::matrix3x4SIMD& relTform) +void CMitsubaLoader::cacheTexture(SContext& ctx, uint32_t hierarchyLevel, const CElementTexture* tex, const CMitsubaMaterialCompilerFrontend::E_IMAGE_VIEW_SEMANTIC semantic) { - // @Crisspl why no group cache? - // find group - //auto found = ctx.groupCache.find(shapegroup); - //if (found != ctx.groupCache.end()) - // return found->second; - - const auto children = shapegroup->children; + if (!tex) + return; - core::vector meshes; - for (auto i=0u; ichildCount; i++) + switch (tex->type) { - auto child = children[i]; - if (!child) - continue; - - assert(child->type!=CElementShape::Type::INSTANCE); - if (child->type != CElementShape::Type::SHAPEGROUP) { - auto lowermesh = loadBasicShape(ctx, hierarchyLevel, child, relTform); - meshes.push_back(std::move(lowermesh)); - } - else { - auto lowermeshes = loadShapeGroup(ctx, hierarchyLevel, &child->shapegroup, relTform); - meshes.insert(meshes.begin(), std::make_move_iterator(lowermeshes.begin()), std::make_move_iterator(lowermeshes.end())); - } - } - - //ctx.groupCache.insert({shapegroup,meshes}); - return meshes; -} + case CElementTexture::Type::BITMAP: + { + // get sampler parameters + const auto samplerParams = ctx.computeSamplerParameters(tex->bitmap); -static core::smart_refctd_ptr createMeshFromGeomCreatorReturnType(IGeometryCreator::return_type&& _data, asset::IAssetManager* _manager) -{ - //creating pipeline just to forward vtx and primitive params - auto pipeline = core::make_smart_refctd_ptr( - nullptr, nullptr, nullptr, //no layout nor shaders - _data.inputParams, - asset::SBlendParams(), - _data.assemblyParams, - asset::SRasterizationParams() - ); + // search the cache for the imageview + const auto cacheKey = ctx.imageViewCacheKey(tex->bitmap,semantic); + const asset::IAsset::E_TYPE types[]{asset::IAsset::ET_IMAGE_VIEW,asset::IAsset::ET_TERMINATING_ZERO}; + // could not find view in the cache + if (ctx.override_->findCachedAsset(cacheKey,types,ctx.inner,hierarchyLevel).getContents().empty()) + { + ICPUImageView::SCreationParams viewParams = {}; + // find or restore image from cache + { + auto loadParams = ctx.inner.params; + // always restore, the only reason we haven't found a view is because either the image wasnt loaded yet, or its going to be processed with channel extraction or derivative mapping + const uint32_t restoreLevels = semantic==CMitsubaMaterialCompilerFrontend::EIVS_IDENTITIY&&tex->bitmap.channel==CElementTexture::Bitmap::CHANNEL::INVALID ? 0u:2u; // all the way to the buffer providing the pixels + loadParams.restoreLevels = std::max(loadParams.restoreLevels,hierarchyLevel+restoreLevels); + // load using the actual filename, not the cache key + asset::SAssetBundle bundle = interm_getAssetInHierarchy(m_assetMgr,tex->bitmap.filename.svalue,loadParams,hierarchyLevel,ctx.override_); - auto mb = core::make_smart_refctd_ptr( - nullptr, nullptr, - _data.bindings, std::move(_data.indexBuffer) - ); - mb->setIndexCount(_data.indexCount); - mb->setIndexType(_data.indexType); - mb->setBoundingBox(_data.bbox); - mb->setPipeline(std::move(pipeline)); - constexpr auto NORMAL_ATTRIBUTE = 3; - mb->setNormalAttributeIx(NORMAL_ATTRIBUTE); + // check if found + auto contentRange = bundle.getContents(); + if (contentRange.empty()) + { + os::Printer::log("[ERROR] Could Not Find Texture: "+cacheKey,ELL_ERROR); + return; + } + auto asset = contentRange.begin()[0]; + if (asset->getAssetType()!=asset::IAsset::ET_IMAGE) + { + os::Printer::log("[ERROR] Loaded an Asset but it wasn't a texture, was E_ASSET_TYPE "+std::to_string(asset->getAssetType()),ELL_ERROR); + return; + } - auto mesh = core::make_smart_refctd_ptr(); - mesh->getMeshBufferVector().push_back(std::move(mb)); + viewParams.image = core::smart_refctd_ptr_static_cast(asset); + } + // adjust gamma on pixels (painful and long process) + if (!std::isnan(tex->bitmap.gamma)) + { + _NBL_DEBUG_BREAK_IF(true); // TODO : use an image filter (unify with the below maybe?)! + } + switch (semantic) + { + case CMitsubaMaterialCompilerFrontend::EIVS_IDENTITIY: + case CMitsubaMaterialCompilerFrontend::EIVS_BLEND_WEIGHT: + { + switch (tex->bitmap.channel) + { + // no GL_R8_SRGB support yet + case CElementTexture::Bitmap::CHANNEL::R: + viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_R); + break; + case CElementTexture::Bitmap::CHANNEL::G: + viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_G); + break; + case CElementTexture::Bitmap::CHANNEL::B: + viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_B); + break; + case CElementTexture::Bitmap::CHANNEL::A: + viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_A); + break; + /* special conversions needed to CIE space + case CElementTexture::Bitmap::CHANNEL::X: + case CElementTexture::Bitmap::CHANNEL::Y: + case CElementTexture::Bitmap::CHANNEL::Z:*/ + case CElementTexture::Bitmap::CHANNEL::INVALID: + [[fallthrough]]; + default: + if (semantic==CMitsubaMaterialCompilerFrontend::EIVS_BLEND_WEIGHT && asset::getFormatChannelCount(viewParams.image->getCreationParameters().format)<3u) + viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_IDENTITY); + break; + } + } + break; + case CMitsubaMaterialCompilerFrontend::EIVS_NORMAL_MAP: + viewParams.image = createDerivMap(ctx,viewParams.image.get(),samplerParams,true); + break; + case CMitsubaMaterialCompilerFrontend::EIVS_BUMP_MAP: + viewParams.image = createDerivMap(ctx,viewParams.image.get(),samplerParams,false); + break; + default: + _NBL_DEBUG_BREAK_IF(true); + assert(false); + break; + } + // get rest of view params and insert into cache + { + viewParams.flags = static_cast(0); + viewParams.viewType = IImageView::ET_2D; + viewParams.format = viewParams.image->getCreationParameters().format; + viewParams.subresourceRange.aspectMask = static_cast(0); + viewParams.subresourceRange.levelCount = viewParams.image->getCreationParameters().mipLevels; + viewParams.subresourceRange.layerCount = 1u; + //! TODO: this stuff (custom shader sampling code?) + _NBL_DEBUG_BREAK_IF(tex->bitmap.uoffset != 0.f); + _NBL_DEBUG_BREAK_IF(tex->bitmap.voffset != 0.f); + _NBL_DEBUG_BREAK_IF(tex->bitmap.uscale != 1.f); + _NBL_DEBUG_BREAK_IF(tex->bitmap.vscale != 1.f); - return mesh; + asset::SAssetBundle viewBundle(nullptr,{ICPUImageView::create(std::move(viewParams))}); + ctx.override_->insertAssetIntoCache(std::move(viewBundle),cacheKey,ctx.inner,hierarchyLevel); + } + } + } + break; + case CElementTexture::Type::SCALE: + cacheTexture(ctx,hierarchyLevel,tex->scale.texture,semantic); + break; + default: + _NBL_DEBUG_BREAK_IF(true); + break; + } } -SContext::shape_ass_type CMitsubaLoader::loadBasicShape(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const core::matrix3x4SIMD& relTform) +auto CMitsubaLoader::getBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf, const CElementEmitter* emitter, core::matrix4SIMD tform) -> SContext::bsdf_type { - constexpr uint32_t UV_ATTRIB_ID = 2u; - - auto addInstance = [shape,&ctx,&relTform,this](SContext::shape_ass_type& mesh) + if (!bsdf) { - auto emitter = shape->obtainEmitter(); - core::matrix3x4SIMD tform = core::concatenateBFollowedByA(relTform, shape->getAbsoluteTransform()); - auto bsdf = getBSDFtreeTraversal(ctx, shape->bsdf, &emitter, core::matrix4SIMD(tform)); - - SContext::SInstanceData instance( - tform, - bsdf, -#if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) - shape->bsdf ? shape->bsdf->id : "", -#endif - emitter, - CElementEmitter{} - ); - ctx.mapMesh2instanceData.insert({ mesh.get(), instance }); - }; + static auto blackBSDF = []() -> auto + { + CElementBSDF retval("nullptr BSDF"); + retval.type = CElementBSDF::Type::DIFFUSE, + retval.diffuse.reflectance = 0.f; + retval.diffuse.alpha = 0.f; + return retval; + }(); + bsdf = &blackBSDF; + } - auto found = ctx.shapeCache.find(shape); - if (found != ctx.shapeCache.end()) { - addInstance(found->second); + auto found = ctx.instrStreamCache.find(bsdf); + if (found == ctx.instrStreamCache.end()) + found = ctx.instrStreamCache.insert({ bsdf,genBSDFtreeTraversal(ctx, bsdf) }).first; + auto compiled_bsdf = found->second; - return found->second; - } - - auto loadModel = [&](const ext::MitsubaLoader::SPropertyElementData& filename, int64_t index=-1) -> core::smart_refctd_ptr - { - assert(filename.type==ext::MitsubaLoader::SPropertyElementData::Type::STRING); - auto loadParams = ctx.inner.params; - loadParams.loaderFlags = static_cast(loadParams.loaderFlags | IAssetLoader::ELPF_RIGHT_HANDED_MESHES); - auto retval = interm_getAssetInHierarchy(m_assetMgr, filename.svalue, loadParams, hierarchyLevel/*+ICPUScene::MESH_HIERARCHY_LEVELS_BELOW*/, ctx.override_); - if (retval.getContents().empty()) - { - os::Printer::log(std::string("[ERROR] Could Not Find Mesh: ") + filename.svalue, ELL_ERROR); - return nullptr; - } - if (retval.getAssetType()!=asset::IAsset::ET_MESH) - { - os::Printer::log("[ERROR] Loaded an Asset but it wasn't a mesh, was E_ASSET_TYPE " + std::to_string(retval.getAssetType()), ELL_ERROR); - return nullptr; - } - auto contentRange = retval.getContents(); - auto serializedMeta = retval.getMetadata()->selfCast(); - // - uint32_t actualIndex = 0; - if (index>=0ll && serializedMeta) - for (auto it=contentRange.begin(); it!=contentRange.end(); it++) - { - auto meshMeta = static_cast(serializedMeta->getAssetSpecificMetadata(IAsset::castDown(*it).get())); - if (meshMeta->m_id!=static_cast(index)) - continue; - actualIndex = it-contentRange.begin(); - break; - } - // - if (contentRange.begin()+actualIndex < contentRange.end()) - { - auto asset = contentRange.begin()[actualIndex]; - if (!asset) - return nullptr; - return core::smart_refctd_ptr_static_cast(asset); - } - else - return nullptr; - }; - - core::smart_refctd_ptr mesh,newMesh; - bool flipNormals = false; - bool faceNormals = false; - float maxSmoothAngle = NAN; - switch (shape->type) - { - case CElementShape::Type::CUBE: - { - auto cubeData = ctx.creator->createCubeMesh(core::vector3df(2.f)); - - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createCubeMesh(core::vector3df(2.f)), m_assetMgr); - flipNormals = flipNormals!=shape->cube.flipNormals; - } - break; - case CElementShape::Type::SPHERE: - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createSphereMesh(1.f,64u,64u), m_assetMgr); - flipNormals = flipNormals!=shape->sphere.flipNormals; - { - core::matrix3x4SIMD tform; - tform.setScale(core::vectorSIMDf(shape->sphere.radius,shape->sphere.radius,shape->sphere.radius)); - tform.setTranslation(shape->sphere.center); - shape->transform.matrix = core::concatenateBFollowedByA(shape->transform.matrix,core::matrix4SIMD(tform)); - } - break; - case CElementShape::Type::CYLINDER: - { - auto diff = shape->cylinder.p0-shape->cylinder.p1; - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createCylinderMesh(1.f, 1.f, 64), m_assetMgr); - core::vectorSIMDf up(0.f); - float maxDot = diff[0]; - uint32_t index = 0u; - for (auto i = 1u; i < 3u; i++) - if (diff[i] < maxDot) - { - maxDot = diff[i]; - index = i; - } - up[index] = 1.f; - core::matrix3x4SIMD tform; - // mesh is left haded so transforming by LH matrix is fine (I hope but lets check later on) - core::matrix3x4SIMD::buildCameraLookAtMatrixLH(shape->cylinder.p0,shape->cylinder.p1,up).getInverse(tform); - core::matrix3x4SIMD scale; - scale.setScale(core::vectorSIMDf(shape->cylinder.radius,shape->cylinder.radius,core::length(diff).x)); - shape->transform.matrix = core::concatenateBFollowedByA(shape->transform.matrix,core::matrix4SIMD(core::concatenateBFollowedByA(tform,scale))); - } - flipNormals = flipNormals!=shape->cylinder.flipNormals; - break; - case CElementShape::Type::RECTANGLE: - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createRectangleMesh(core::vector2df_SIMD(1.f,1.f)), m_assetMgr); - flipNormals = flipNormals!=shape->rectangle.flipNormals; - break; - case CElementShape::Type::DISK: - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createDiskMesh(1.f,64u), m_assetMgr); - flipNormals = flipNormals!=shape->disk.flipNormals; - break; - case CElementShape::Type::OBJ: - mesh = loadModel(shape->obj.filename); - flipNormals = flipNormals!=shape->obj.flipNormals; - faceNormals = shape->obj.faceNormals; - maxSmoothAngle = shape->obj.maxSmoothAngle; - if (mesh && shape->obj.flipTexCoords) - { - newMesh = core::smart_refctd_ptr_static_cast (mesh->clone(1u)); - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - auto binding = meshbuffer->getVertexBufferBindings()[UV_ATTRIB_ID]; - if (binding.buffer) - { - binding.buffer = core::smart_refctd_ptr_static_cast(binding.buffer->clone(0u)); - meshbuffer->setVertexBufferBinding(std::move(binding),UV_ATTRIB_ID); - core::vectorSIMDf uv; - for (uint32_t i=0u; meshbuffer->getAttribute(uv,UV_ATTRIB_ID,i); i++) - { - uv.y = -uv.y; - meshbuffer->setAttribute(uv,UV_ATTRIB_ID,i); - } - } - } - } - // collapse parameter gets ignored - break; - case CElementShape::Type::PLY: - _NBL_DEBUG_BREAK_IF(true); // this code has never been tested - mesh = loadModel(shape->ply.filename); - flipNormals = flipNormals!=shape->ply.flipNormals; - faceNormals = shape->ply.faceNormals; - maxSmoothAngle = shape->ply.maxSmoothAngle; - if (mesh && shape->ply.srgb) - { - uint32_t totalVertexCount = 0u; - for (auto meshbuffer : mesh->getMeshBuffers()) - totalVertexCount += IMeshManipulator::upperBoundVertexID(meshbuffer); - if (totalVertexCount) - { - constexpr uint32_t hidefRGBSize = 4u; - auto newRGBbuff = core::make_smart_refctd_ptr(hidefRGBSize*totalVertexCount); - newMesh = core::smart_refctd_ptr_static_cast(mesh->clone(1u)); - constexpr uint32_t COLOR_ATTR = 1u; - constexpr uint32_t COLOR_BUF_BINDING = 15u; - uint32_t* newRGB = reinterpret_cast(newRGBbuff->getPointer()); - uint32_t offset = 0u; - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - core::vectorSIMDf rgb; - for (uint32_t i=0u; meshbuffer->getAttribute(rgb,COLOR_ATTR,i); i++,offset++) - { - for (auto i=0; i<3u; i++) - rgb[i] = core::srgb2lin(rgb[i]); - ICPUMeshBuffer::setAttribute(rgb,newRGB+offset,asset::EF_A2B10G10R10_UNORM_PACK32); - } - auto newPipeline = core::smart_refctd_ptr_static_cast(meshbuffer->getPipeline()->clone(0u)); - auto& vtxParams = newPipeline->getVertexInputParams(); - vtxParams.attributes[COLOR_ATTR].format = EF_A2B10G10R10_UNORM_PACK32; - vtxParams.attributes[COLOR_ATTR].relativeOffset = 0u; - vtxParams.attributes[COLOR_ATTR].binding = COLOR_BUF_BINDING; - vtxParams.bindings[COLOR_BUF_BINDING].inputRate = EVIR_PER_VERTEX; - vtxParams.bindings[COLOR_BUF_BINDING].stride = hidefRGBSize; - vtxParams.enabledBindingFlags |= (1u<setPipeline(std::move(newPipeline)); - meshbuffer->setVertexBufferBinding({offset*hidefRGBSize,core::smart_refctd_ptr(newRGBbuff)},COLOR_BUF_BINDING); - } - } - } - break; - case CElementShape::Type::SERIALIZED: - mesh = loadModel(shape->serialized.filename,shape->serialized.shapeIndex); - flipNormals = flipNormals!=shape->serialized.flipNormals; - faceNormals = shape->serialized.faceNormals; - maxSmoothAngle = shape->serialized.maxSmoothAngle; - break; - case CElementShape::Type::SHAPEGROUP: - [[fallthrough]]; - case CElementShape::Type::INSTANCE: - assert(false); - break; - default: - _NBL_DEBUG_BREAK_IF(true); - break; - } - // - if (!mesh) - return nullptr; - - // mesh including meshbuffers needs to be cloned because instance counts and base instances will be changed - if (!newMesh) - newMesh = core::smart_refctd_ptr_static_cast(mesh->clone(1u)); - // flip normals if necessary - if (flipNormals) - { - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - auto binding = meshbuffer->getIndexBufferBinding(); - binding.buffer = core::smart_refctd_ptr_static_cast(binding.buffer->clone(0u)); - meshbuffer->setIndexBufferBinding(std::move(binding)); - ctx.manipulator->flipSurfaces(meshbuffer.get()); - } - } - // recompute normalis if necessary - if (faceNormals || !std::isnan(maxSmoothAngle)) - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - const float smoothAngleCos = cos(core::radians(maxSmoothAngle)); - - // TODO: make these mesh manipulator functions const-correct - auto newMeshBuffer = ctx.manipulator->createMeshBufferUniquePrimitives(meshbuffer.get()); - ctx.manipulator->filterInvalidTriangles(newMeshBuffer.get()); - ctx.manipulator->calculateSmoothNormals(newMeshBuffer.get(), false, 0.f, newMeshBuffer->getNormalAttributeIx(), - [&](const asset::IMeshManipulator::SSNGVertexData& a, const asset::IMeshManipulator::SSNGVertexData& b, asset::ICPUMeshBuffer* buffer) - { - if (faceNormals) - return a.indexOffset == b.indexOffset; - else - return core::dot(a.parentTriangleFaceNormal, b.parentTriangleFaceNormal).x >= smoothAngleCos; - }); - meshbuffer = std::move(newMeshBuffer); - } - IMeshManipulator::recalculateBoundingBox(newMesh.get()); - mesh = std::move(newMesh); - - addInstance(mesh); - // cache and return - ctx.shapeCache.insert({ shape,mesh }); - return mesh; -} - -void CMitsubaLoader::cacheEmissionProfile(SContext& ctx, const CElementEmissionProfile* profile) -{ - if (!profile) - return; - - auto params = ctx.inner.params; - params.loaderFlags = asset::IAssetLoader::ELPF_LOAD_METADATA_ONLY; - - auto assetLoaded = interm_getAssetInHierarchy(m_assetMgr, profile->filename, params, 0u, ctx.override_); - - if (!assetLoaded.getMetadata()) - { - os::Printer::log("[ERROR] Could Not Find Emission Profile: " + profile->filename, ELL_ERROR); - return; - } -} - -void CMitsubaLoader::cacheTexture(SContext& ctx, uint32_t hierarchyLevel, const CElementTexture* tex, const CMitsubaMaterialCompilerFrontend::E_IMAGE_VIEW_SEMANTIC semantic) -{ - if (!tex) - return; - - switch (tex->type) - { - case CElementTexture::Type::BITMAP: - { - // get sampler parameters - const auto samplerParams = ctx.computeSamplerParameters(tex->bitmap); - - // search the cache for the imageview - const auto cacheKey = ctx.imageViewCacheKey(tex->bitmap,semantic); - const asset::IAsset::E_TYPE types[]{asset::IAsset::ET_IMAGE_VIEW,asset::IAsset::ET_TERMINATING_ZERO}; - // could not find view in the cache - if (ctx.override_->findCachedAsset(cacheKey,types,ctx.inner,hierarchyLevel).getContents().empty()) - { - ICPUImageView::SCreationParams viewParams = {}; - // find or restore image from cache - { - auto loadParams = ctx.inner.params; - // always restore, the only reason we haven't found a view is because either the image wasnt loaded yet, or its going to be processed with channel extraction or derivative mapping - const uint32_t restoreLevels = semantic==CMitsubaMaterialCompilerFrontend::EIVS_IDENTITIY&&tex->bitmap.channel==CElementTexture::Bitmap::CHANNEL::INVALID ? 0u:2u; // all the way to the buffer providing the pixels - loadParams.restoreLevels = std::max(loadParams.restoreLevels,hierarchyLevel+restoreLevels); - // load using the actual filename, not the cache key - asset::SAssetBundle bundle = interm_getAssetInHierarchy(m_assetMgr,tex->bitmap.filename.svalue,loadParams,hierarchyLevel,ctx.override_); - - // check if found - auto contentRange = bundle.getContents(); - if (contentRange.empty()) - { - os::Printer::log("[ERROR] Could Not Find Texture: "+cacheKey,ELL_ERROR); - return; - } - auto asset = contentRange.begin()[0]; - if (asset->getAssetType()!=asset::IAsset::ET_IMAGE) - { - os::Printer::log("[ERROR] Loaded an Asset but it wasn't a texture, was E_ASSET_TYPE "+std::to_string(asset->getAssetType()),ELL_ERROR); - return; - } - - viewParams.image = core::smart_refctd_ptr_static_cast(asset); - } - // adjust gamma on pixels (painful and long process) - if (!std::isnan(tex->bitmap.gamma)) - { - _NBL_DEBUG_BREAK_IF(true); // TODO : use an image filter (unify with the below maybe?)! - } - switch (semantic) - { - case CMitsubaMaterialCompilerFrontend::EIVS_IDENTITIY: - case CMitsubaMaterialCompilerFrontend::EIVS_BLEND_WEIGHT: - { - switch (tex->bitmap.channel) - { - // no GL_R8_SRGB support yet - case CElementTexture::Bitmap::CHANNEL::R: - viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_R); - break; - case CElementTexture::Bitmap::CHANNEL::G: - viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_G); - break; - case CElementTexture::Bitmap::CHANNEL::B: - viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_B); - break; - case CElementTexture::Bitmap::CHANNEL::A: - viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_A); - break; - /* special conversions needed to CIE space - case CElementTexture::Bitmap::CHANNEL::X: - case CElementTexture::Bitmap::CHANNEL::Y: - case CElementTexture::Bitmap::CHANNEL::Z:*/ - case CElementTexture::Bitmap::CHANNEL::INVALID: - [[fallthrough]]; - default: - if (semantic==CMitsubaMaterialCompilerFrontend::EIVS_BLEND_WEIGHT && asset::getFormatChannelCount(viewParams.image->getCreationParameters().format)<3u) - viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_IDENTITY); - break; - } - } - break; - case CMitsubaMaterialCompilerFrontend::EIVS_NORMAL_MAP: - viewParams.image = createDerivMap(ctx,viewParams.image.get(),samplerParams,true); - break; - case CMitsubaMaterialCompilerFrontend::EIVS_BUMP_MAP: - viewParams.image = createDerivMap(ctx,viewParams.image.get(),samplerParams,false); - break; - default: - _NBL_DEBUG_BREAK_IF(true); - assert(false); - break; - } - // get rest of view params and insert into cache - { - viewParams.flags = static_cast(0); - viewParams.viewType = IImageView::ET_2D; - viewParams.format = viewParams.image->getCreationParameters().format; - viewParams.subresourceRange.aspectMask = static_cast(0); - viewParams.subresourceRange.levelCount = viewParams.image->getCreationParameters().mipLevels; - viewParams.subresourceRange.layerCount = 1u; - //! TODO: this stuff (custom shader sampling code?) - _NBL_DEBUG_BREAK_IF(tex->bitmap.uoffset != 0.f); - _NBL_DEBUG_BREAK_IF(tex->bitmap.voffset != 0.f); - _NBL_DEBUG_BREAK_IF(tex->bitmap.uscale != 1.f); - _NBL_DEBUG_BREAK_IF(tex->bitmap.vscale != 1.f); - - asset::SAssetBundle viewBundle(nullptr,{ICPUImageView::create(std::move(viewParams))}); - ctx.override_->insertAssetIntoCache(std::move(viewBundle),cacheKey,ctx.inner,hierarchyLevel); - } - } - } - break; - case CElementTexture::Type::SCALE: - cacheTexture(ctx,hierarchyLevel,tex->scale.texture,semantic); - break; - default: - _NBL_DEBUG_BREAK_IF(true); - break; - } -} - -auto CMitsubaLoader::getBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf, const CElementEmitter* emitter, core::matrix4SIMD tform) -> SContext::bsdf_type -{ - if (!bsdf) - { - static auto blackBSDF = []() -> auto - { - CElementBSDF retval("nullptr BSDF"); - retval.type = CElementBSDF::Type::DIFFUSE, - retval.diffuse.reflectance = 0.f; - retval.diffuse.alpha = 0.f; - return retval; - }(); - bsdf = &blackBSDF; - } - - auto found = ctx.instrStreamCache.find(bsdf); - if (found == ctx.instrStreamCache.end()) - found = ctx.instrStreamCache.insert({ bsdf,genBSDFtreeTraversal(ctx, bsdf) }).first; - auto compiled_bsdf = found->second; - - // TODO cache the IR Node - CMitsubaMaterialCompilerFrontend::EmitterNode* emitterIRNode = nullptr; - if (emitter->type == CElementEmitter::AREA) - { - cacheEmissionProfile(ctx,emitter->area.emissionProfile); - emitterIRNode = ctx.frontend.createEmitterNode(ctx.ir.get(),emitter,tform); + // TODO cache the IR Node + CMitsubaMaterialCompilerFrontend::EmitterNode* emitterIRNode = nullptr; + if (emitter->type == CElementEmitter::AREA) + { + cacheEmissionProfile(ctx,emitter->area.emissionProfile); + emitterIRNode = ctx.frontend.createEmitterNode(ctx.ir.get(),emitter,tform); } // A new root node gets made for every {bsdf,emitter} combo @@ -1043,163 +674,401 @@ auto CMitsubaLoader::genBSDFtreeTraversal(SContext& ctx, const CElementBSDF* _bs template inline core::smart_refctd_ptr CMitsubaLoader::createDS0(const SContext& _ctx, asset::ICPUPipelineLayout* _layout, const asset::material_compiler::CMaterialCompilerGLSLBackendCommon::result_t& _compResult, Iter meshBegin, Iter meshEnd) { - auto* ds0layout = _layout->getDescriptorSetLayout(0u); - auto ds0 = core::make_smart_refctd_ptr(core::smart_refctd_ptr(ds0layout)); - { - auto count = _ctx.backend_ctx.vt.getCPUVirtualTexture()->getDescriptorSetWrites(nullptr, nullptr, nullptr); +#ifdef DEBUG_MITSUBA_LOADER + std::ofstream ofile("log.txt"); +#endif + core::vector instanceData; + for (auto it=meshBegin; it != meshEnd; ++it) + { + auto mesh = it->first.get(); + + core::vectorSIMDf emissive; + for (auto& mb : mesh->getMeshBuffers()) + mb->setBaseInstance(instanceData.size()); + auto* meshMeta = _ctx.meta->getAssetSpecificMetadata(mesh); + auto baseInstanceDataIt = meshMeta->m_instances.begin(); + for (const auto& inst : meshMeta->m_instanceAuxData) + { + nbl_glsl_ext_Mitsuba_Loader_instance_data_t instData; - auto writes = core::make_refctd_dynamic_array>(count.first); - auto info = core::make_refctd_dynamic_array>(count.second); + instData.tform = baseInstanceDataIt->worldTform; + instData.tform.getSub3x3InverseTranspose(reinterpret_cast(instData.normalMatrixRow0)); + reinterpret_cast(instData.determinantSignBit) = instData.tform.getPseudoDeterminant().x; + instData.determinantSignBit &= 0x80000000; - _ctx.backend_ctx.vt.getCPUVirtualTexture()->getDescriptorSetWrites(writes->data(), info->data(), ds0.get()); + const auto& bsdf = inst.bsdf; + auto bsdf_front = bsdf.front; + auto bsdf_back = bsdf.back; + auto material_it = _compResult.materials.find(bsdf_front); + { + const asset::material_compiler::oriented_material_t* material = + (material_it != _compResult.materials.end()) ? &material_it->second : nullptr; - for (const auto& w : (*writes)) - { - auto descRng = ds0->getDescriptors(w.binding); - for (uint32_t i = 0u; i < w.count; ++i) - descRng.begin()[w.arrayElement+i].assign(w.info[i], w.descriptorType); + if (material) { +#ifdef DEBUG_MITSUBA_LOADER + //os::Printer::log("Debug print front BSDF with id = ", std::to_string(&bsdf), ELL_INFORMATION); + + ofile << "Debug print front BSDF with id = " << &bsdf << std::endl; + _ctx.backend.debugPrint(ofile, *material, _compResult, &_ctx.backend_ctx); + +#endif + instData.material.front = *material; + } + } + material_it = _compResult.materials.find(bsdf_back); + { + const asset::material_compiler::oriented_material_t* material = + (material_it != _compResult.materials.end()) ? &material_it->second : nullptr; + + if (material) + { +#ifdef DEBUG_MITSUBA_LOADER + //os::Printer::log("Debug print back BSDF with id = ", std::to_string(&bsdf), ELL_INFORMATION); + ofile << "Debug print back BSDF with id = " << &bsdf << std::endl; + _ctx.backend.debugPrint(ofile, *material, _compResult, &_ctx.backend_ctx); +#endif + + instData.material.back = *material; + } + } + + instanceData.push_back(instData); + baseInstanceDataIt++; } } - auto d = ds0->getDescriptors(PRECOMPUTED_VT_DATA_BINDING).begin(); +#ifdef DEBUG_MITSUBA_LOADER + ofile.close(); +#endif + d = ds0->getDescriptors(INSTANCE_DATA_BINDING).begin(); { - auto precompDataBuf = core::make_smart_refctd_ptr(sizeof(asset::ICPUVirtualTexture::SPrecomputedData)); - memcpy(precompDataBuf->getPointer(), &_ctx.backend_ctx.vt.getCPUVirtualTexture()->getPrecomputedData(), precompDataBuf->getSize()); + auto instDataBuf = core::make_smart_refctd_ptr(instanceData.size()*sizeof(nbl_glsl_ext_Mitsuba_Loader_instance_data_t)); + memcpy(instDataBuf->getPointer(), instanceData.data(), instDataBuf->getSize()); d->buffer.offset = 0u; - d->buffer.size = precompDataBuf->getSize(); - d->desc = std::move(precompDataBuf); + d->buffer.size = instDataBuf->getSize(); + d->desc = std::move(instDataBuf); } - d = ds0->getDescriptors(INSTR_BUF_BINDING).begin(); - { - auto instrbuf = core::make_smart_refctd_ptr(_compResult.instructions.size()*sizeof(decltype(_compResult.instructions)::value_type)); - memcpy(instrbuf->getPointer(), _compResult.instructions.data(), instrbuf->getSize()); - d->buffer.offset = 0u; - d->buffer.size = instrbuf->getSize(); - d->desc = std::move(instrbuf); - } - d = ds0->getDescriptors(BSDF_BUF_BINDING).begin(); + return ds0; +} +#endif + +using namespace std::string_literals; + +SContext::SContext( +// const asset::IGeometryCreator* _geomCreator, +// const asset::IMeshManipulator* _manipulator, + const asset::IAssetLoader::SAssetLoadContext& _ctx, + asset::IAssetLoader::IAssetLoaderOverride* _override, + CMitsubaMetadata* _metadata +) : /*creator(_geomCreator), manipulator(_manipulator),*/ inner(_ctx), override_(_override), meta(_metadata) +//,ir(core::make_smart_refctd_ptr()), frontend(this) +{ + auto materialPool = material_compiler3::CTrueIR::create(); + scene = ICPUScene::create(core::smart_refctd_ptr(materialPool)); // TODO: feed it max shapes per group + frontIR = material_compiler3::CFrontendIR::create(); +} + +auto SContext::loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup) -> SContext::group_ass_type +{ + auto found = groupCache.find(shapegroup); + if (found!=groupCache.end()) + return found->second; + + const auto children = shapegroup->children; +#if 0 + core::vector meshes; + for (auto i=0u; ichildCount; i++) { - auto bsdfbuf = core::make_smart_refctd_ptr(_compResult.bsdfData.size()*sizeof(decltype(_compResult.bsdfData)::value_type)); - memcpy(bsdfbuf->getPointer(), _compResult.bsdfData.data(), bsdfbuf->getSize()); + auto child = children[i]; + if (!child) + continue; - d->buffer.offset = 0u; - d->buffer.size = bsdfbuf->getSize(); - d->desc = std::move(bsdfbuf); + assert(child->type!=CElementShape::Type::INSTANCE); + if (child->type != CElementShape::Type::SHAPEGROUP) { + auto lowermesh = loadBasicShape(hierarchyLevel, child, relTform); + meshes.push_back(std::move(lowermesh)); + } + else { + auto lowermeshes = loadShapeGroup(hierarchyLevel, &child->shapegroup, relTform); + meshes.insert(meshes.begin(), std::make_move_iterator(lowermeshes.begin()), std::make_move_iterator(lowermeshes.end())); + } } - d = ds0->getDescriptors(EMITTER_DATA_BUF_BINDING).begin(); + + ctx.groupCache.insert({shapegroup,meshes}); + return meshes; +#endif + return nullptr; +} + +#if 0 +static core::smart_refctd_ptr createMeshFromGeomCreatorReturnType(IGeometryCreator::return_type&& _data, asset::IAssetManager* _manager) +{ + //creating pipeline just to forward vtx and primitive params + auto pipeline = core::make_smart_refctd_ptr( + nullptr, nullptr, nullptr, //no layout nor shaders + _data.inputParams, + asset::SBlendParams(), + _data.assemblyParams, + asset::SRasterizationParams() + ); + + auto mb = core::make_smart_refctd_ptr( + nullptr, nullptr, + _data.bindings, std::move(_data.indexBuffer) + ); + mb->setIndexCount(_data.indexCount); + mb->setIndexType(_data.indexType); + mb->setBoundingBox(_data.bbox); + mb->setPipeline(std::move(pipeline)); + constexpr auto NORMAL_ATTRIBUTE = 3; + mb->setNormalAttributeIx(NORMAL_ATTRIBUTE); + + auto mesh = core::make_smart_refctd_ptr(); + mesh->getMeshBufferVector().push_back(std::move(mb)); + + return mesh; +} +#endif + +auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape* shape) -> SContext::shape_ass_type +{ + auto found = shapeCache.find(shape); + if (found!=shapeCache.end()) + return found->second.geom; + +#if 0 + constexpr uint32_t UV_ATTRIB_ID = 2u; + + + + auto loadModel = [&](const ext::MitsubaLoader::SPropertyElementData& filename, int64_t index=-1) -> core::smart_refctd_ptr { - auto emitterbuf = core::make_smart_refctd_ptr(_compResult.emitterData.size() * sizeof(decltype(_compResult.emitterData)::value_type)); - memcpy(emitterbuf->getPointer(), _compResult.emitterData.data(), emitterbuf->getSize()); + assert(filename.type==ext::MitsubaLoader::SPropertyElementData::Type::STRING); + auto loadParams = ctx.inner.params; + loadParams.loaderFlags = static_cast(loadParams.loaderFlags | IAssetLoader::ELPF_RIGHT_HANDED_MESHES); + auto retval = interm_getAssetInHierarchy(m_assetMgr, filename.svalue, loadParams, hierarchyLevel/*+ICPUScene::MESH_HIERARCHY_LEVELS_BELOW*/, ctx.override_); + if (retval.getContents().empty()) + { + os::Printer::log(std::string("[ERROR] Could Not Find Mesh: ") + filename.svalue, ELL_ERROR); + return nullptr; + } + if (retval.getAssetType()!=asset::IAsset::ET_MESH) + { + os::Printer::log("[ERROR] Loaded an Asset but it wasn't a mesh, was E_ASSET_TYPE " + std::to_string(retval.getAssetType()), ELL_ERROR); + return nullptr; + } + auto contentRange = retval.getContents(); + auto serializedMeta = retval.getMetadata()->selfCast(); + // + uint32_t actualIndex = 0; + if (index>=0ll && serializedMeta) + for (auto it=contentRange.begin(); it!=contentRange.end(); it++) + { + auto meshMeta = static_cast(serializedMeta->getAssetSpecificMetadata(IAsset::castDown(*it).get())); + if (meshMeta->m_id!=static_cast(index)) + continue; + actualIndex = it-contentRange.begin(); + break; + } + // + if (contentRange.begin()+actualIndex < contentRange.end()) + { + auto asset = contentRange.begin()[actualIndex]; + if (!asset) + return nullptr; + return core::smart_refctd_ptr_static_cast(asset); + } + else + return nullptr; + }; - d->buffer.offset = 0u; - d->buffer.size = emitterbuf->getSize(); - d->desc = std::move(emitterbuf); - } - d = ds0->getDescriptors(PREFETCH_INSTR_BUF_BINDING).begin(); + core::smart_refctd_ptr mesh,newMesh; + bool flipNormals = false; + bool faceNormals = false; + float maxSmoothAngle = NAN; + switch (shape->type) { - const size_t sz = _compResult.prefetch_stream.size()*sizeof(decltype(_compResult.prefetch_stream)::value_type); - - constexpr size_t MIN_SSBO_SZ = 128ull; //prefetch stream won't be generated if no textures are used, so make sure we're not creating 0-size buffer - auto prefetch_instr_buf = core::make_smart_refctd_ptr(std::max(MIN_SSBO_SZ, sz)); - memcpy(prefetch_instr_buf->getPointer(), _compResult.prefetch_stream.data(), sz); - - d->buffer.offset = 0u; - d->buffer.size = prefetch_instr_buf->getSize(); - d->desc = std::move(prefetch_instr_buf); - } - -#ifdef DEBUG_MITSUBA_LOADER - std::ofstream ofile("log.txt"); -#endif - core::vector instanceData; - for (auto it=meshBegin; it != meshEnd; ++it) - { - auto mesh = it->first.get(); - - core::vectorSIMDf emissive; - for (auto& mb : mesh->getMeshBuffers()) - mb->setBaseInstance(instanceData.size()); - auto* meshMeta = _ctx.meta->getAssetSpecificMetadata(mesh); - auto baseInstanceDataIt = meshMeta->m_instances.begin(); - for (const auto& inst : meshMeta->m_instanceAuxData) + case CElementShape::Type::CUBE: { - nbl_glsl_ext_Mitsuba_Loader_instance_data_t instData; - - instData.tform = baseInstanceDataIt->worldTform; - instData.tform.getSub3x3InverseTranspose(reinterpret_cast(instData.normalMatrixRow0)); - reinterpret_cast(instData.determinantSignBit) = instData.tform.getPseudoDeterminant().x; - instData.determinantSignBit &= 0x80000000; + auto cubeData = ctx.creator->createCubeMesh(core::vector3df(2.f)); - const auto& bsdf = inst.bsdf; - auto bsdf_front = bsdf.front; - auto bsdf_back = bsdf.back; - auto material_it = _compResult.materials.find(bsdf_front); + mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createCubeMesh(core::vector3df(2.f)), m_assetMgr); + flipNormals = flipNormals!=shape->cube.flipNormals; + } + break; + case CElementShape::Type::SPHERE: + mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createSphereMesh(1.f,64u,64u), m_assetMgr); + flipNormals = flipNormals!=shape->sphere.flipNormals; { - const asset::material_compiler::oriented_material_t* material = - (material_it != _compResult.materials.end()) ? &material_it->second : nullptr; - - if (material) { -#ifdef DEBUG_MITSUBA_LOADER - //os::Printer::log("Debug print front BSDF with id = ", std::to_string(&bsdf), ELL_INFORMATION); - - ofile << "Debug print front BSDF with id = " << &bsdf << std::endl; - _ctx.backend.debugPrint(ofile, *material, _compResult, &_ctx.backend_ctx); - -#endif - instData.material.front = *material; + core::matrix3x4SIMD tform; + tform.setScale(core::vectorSIMDf(shape->sphere.radius,shape->sphere.radius,shape->sphere.radius)); + tform.setTranslation(shape->sphere.center); + shape->transform.matrix = core::concatenateBFollowedByA(shape->transform.matrix,core::matrix4SIMD(tform)); + } + break; + case CElementShape::Type::CYLINDER: + { + auto diff = shape->cylinder.p0-shape->cylinder.p1; + mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createCylinderMesh(1.f, 1.f, 64), m_assetMgr); + core::vectorSIMDf up(0.f); + float maxDot = diff[0]; + uint32_t index = 0u; + for (auto i = 1u; i < 3u; i++) + if (diff[i] < maxDot) + { + maxDot = diff[i]; + index = i; + } + up[index] = 1.f; + core::matrix3x4SIMD tform; + // mesh is left haded so transforming by LH matrix is fine (I hope but lets check later on) + core::matrix3x4SIMD::buildCameraLookAtMatrixLH(shape->cylinder.p0,shape->cylinder.p1,up).getInverse(tform); + core::matrix3x4SIMD scale; + scale.setScale(core::vectorSIMDf(shape->cylinder.radius,shape->cylinder.radius,core::length(diff).x)); + shape->transform.matrix = core::concatenateBFollowedByA(shape->transform.matrix,core::matrix4SIMD(core::concatenateBFollowedByA(tform,scale))); + } + flipNormals = flipNormals!=shape->cylinder.flipNormals; + break; + case CElementShape::Type::RECTANGLE: + mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createRectangleMesh(core::vector2df_SIMD(1.f,1.f)), m_assetMgr); + flipNormals = flipNormals!=shape->rectangle.flipNormals; + break; + case CElementShape::Type::DISK: + mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createDiskMesh(1.f,64u), m_assetMgr); + flipNormals = flipNormals!=shape->disk.flipNormals; + break; + case CElementShape::Type::OBJ: + mesh = loadModel(shape->obj.filename); + flipNormals = flipNormals!=shape->obj.flipNormals; + faceNormals = shape->obj.faceNormals; + maxSmoothAngle = shape->obj.maxSmoothAngle; + if (mesh && shape->obj.flipTexCoords) + { + newMesh = core::smart_refctd_ptr_static_cast (mesh->clone(1u)); + for (auto& meshbuffer : mesh->getMeshBufferVector()) + { + auto binding = meshbuffer->getVertexBufferBindings()[UV_ATTRIB_ID]; + if (binding.buffer) + { + binding.buffer = core::smart_refctd_ptr_static_cast(binding.buffer->clone(0u)); + meshbuffer->setVertexBufferBinding(std::move(binding),UV_ATTRIB_ID); + core::vectorSIMDf uv; + for (uint32_t i=0u; meshbuffer->getAttribute(uv,UV_ATTRIB_ID,i); i++) + { + uv.y = -uv.y; + meshbuffer->setAttribute(uv,UV_ATTRIB_ID,i); + } + } } } - material_it = _compResult.materials.find(bsdf_back); + // collapse parameter gets ignored + break; + case CElementShape::Type::PLY: + _NBL_DEBUG_BREAK_IF(true); // this code has never been tested + mesh = loadModel(shape->ply.filename); + flipNormals = flipNormals!=shape->ply.flipNormals; + faceNormals = shape->ply.faceNormals; + maxSmoothAngle = shape->ply.maxSmoothAngle; + if (mesh && shape->ply.srgb) { - const asset::material_compiler::oriented_material_t* material = - (material_it != _compResult.materials.end()) ? &material_it->second : nullptr; - - if (material) + uint32_t totalVertexCount = 0u; + for (auto meshbuffer : mesh->getMeshBuffers()) + totalVertexCount += IMeshManipulator::upperBoundVertexID(meshbuffer); + if (totalVertexCount) { -#ifdef DEBUG_MITSUBA_LOADER - //os::Printer::log("Debug print back BSDF with id = ", std::to_string(&bsdf), ELL_INFORMATION); - ofile << "Debug print back BSDF with id = " << &bsdf << std::endl; - _ctx.backend.debugPrint(ofile, *material, _compResult, &_ctx.backend_ctx); -#endif - - instData.material.back = *material; + constexpr uint32_t hidefRGBSize = 4u; + auto newRGBbuff = core::make_smart_refctd_ptr(hidefRGBSize*totalVertexCount); + newMesh = core::smart_refctd_ptr_static_cast(mesh->clone(1u)); + constexpr uint32_t COLOR_ATTR = 1u; + constexpr uint32_t COLOR_BUF_BINDING = 15u; + uint32_t* newRGB = reinterpret_cast(newRGBbuff->getPointer()); + uint32_t offset = 0u; + for (auto& meshbuffer : mesh->getMeshBufferVector()) + { + core::vectorSIMDf rgb; + for (uint32_t i=0u; meshbuffer->getAttribute(rgb,COLOR_ATTR,i); i++,offset++) + { + for (auto i=0; i<3u; i++) + rgb[i] = core::srgb2lin(rgb[i]); + ICPUMeshBuffer::setAttribute(rgb,newRGB+offset,asset::EF_A2B10G10R10_UNORM_PACK32); + } + auto newPipeline = core::smart_refctd_ptr_static_cast(meshbuffer->getPipeline()->clone(0u)); + auto& vtxParams = newPipeline->getVertexInputParams(); + vtxParams.attributes[COLOR_ATTR].format = EF_A2B10G10R10_UNORM_PACK32; + vtxParams.attributes[COLOR_ATTR].relativeOffset = 0u; + vtxParams.attributes[COLOR_ATTR].binding = COLOR_BUF_BINDING; + vtxParams.bindings[COLOR_BUF_BINDING].inputRate = EVIR_PER_VERTEX; + vtxParams.bindings[COLOR_BUF_BINDING].stride = hidefRGBSize; + vtxParams.enabledBindingFlags |= (1u<setPipeline(std::move(newPipeline)); + meshbuffer->setVertexBufferBinding({offset*hidefRGBSize,core::smart_refctd_ptr(newRGBbuff)},COLOR_BUF_BINDING); + } } } + break; + case CElementShape::Type::SERIALIZED: + mesh = loadModel(shape->serialized.filename,shape->serialized.shapeIndex); + flipNormals = flipNormals!=shape->serialized.flipNormals; + faceNormals = shape->serialized.faceNormals; + maxSmoothAngle = shape->serialized.maxSmoothAngle; + break; + case CElementShape::Type::SHAPEGROUP: + [[fallthrough]]; + case CElementShape::Type::INSTANCE: + assert(false); + break; + default: + _NBL_DEBUG_BREAK_IF(true); + break; + } + // + if (!mesh) + return nullptr; - instanceData.push_back(instData); - baseInstanceDataIt++; + // mesh including meshbuffers needs to be cloned because instance counts and base instances will be changed + if (!newMesh) + newMesh = core::smart_refctd_ptr_static_cast(mesh->clone(1u)); + // flip normals if necessary + if (flipNormals) + { + for (auto& meshbuffer : mesh->getMeshBufferVector()) + { + auto binding = meshbuffer->getIndexBufferBinding(); + binding.buffer = core::smart_refctd_ptr_static_cast(binding.buffer->clone(0u)); + meshbuffer->setIndexBufferBinding(std::move(binding)); + ctx.manipulator->flipSurfaces(meshbuffer.get()); } } -#ifdef DEBUG_MITSUBA_LOADER - ofile.close(); -#endif - d = ds0->getDescriptors(INSTANCE_DATA_BINDING).begin(); + // recompute normalis if necessary + if (faceNormals || !std::isnan(maxSmoothAngle)) + for (auto& meshbuffer : mesh->getMeshBufferVector()) { - auto instDataBuf = core::make_smart_refctd_ptr(instanceData.size()*sizeof(nbl_glsl_ext_Mitsuba_Loader_instance_data_t)); - memcpy(instDataBuf->getPointer(), instanceData.data(), instDataBuf->getSize()); + const float smoothAngleCos = cos(core::radians(maxSmoothAngle)); - d->buffer.offset = 0u; - d->buffer.size = instDataBuf->getSize(); - d->desc = std::move(instDataBuf); + // TODO: make these mesh manipulator functions const-correct + auto newMeshBuffer = ctx.manipulator->createMeshBufferUniquePrimitives(meshbuffer.get()); + ctx.manipulator->filterInvalidTriangles(newMeshBuffer.get()); + ctx.manipulator->calculateSmoothNormals(newMeshBuffer.get(), false, 0.f, newMeshBuffer->getNormalAttributeIx(), + [&](const asset::IMeshManipulator::SSNGVertexData& a, const asset::IMeshManipulator::SSNGVertexData& b, asset::ICPUMeshBuffer* buffer) + { + if (faceNormals) + return a.indexOffset == b.indexOffset; + else + return core::dot(a.parentTriangleFaceNormal, b.parentTriangleFaceNormal).x >= smoothAngleCos; + }); + meshbuffer = std::move(newMeshBuffer); } + IMeshManipulator::recalculateBoundingBox(newMesh.get()); + mesh = std::move(newMesh); - return ds0; -} + addInstance(mesh); + // cache and return + ctx.shapeCache.insert({ shape,mesh }); + return mesh; #endif - -using namespace std::string_literals; - -SContext::SContext( -// const asset::IGeometryCreator* _geomCreator, -// const asset::IMeshManipulator* _manipulator, - const asset::IAssetLoader::SAssetLoadContext& _ctx, - asset::IAssetLoader::IAssetLoaderOverride* _override, - CMitsubaMetadata* _metadata -) : /*creator(_geomCreator), manipulator(_manipulator),*/ inner(_ctx), override_(_override), meta(_metadata) -//,ir(core::make_smart_refctd_ptr()), frontend(this) -{ + return nullptr; } } diff --git a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp index fb54fab0ef..e8b18d22a8 100644 --- a/src/nbl/ext/MitsubaLoader/ParserUtil.cpp +++ b/src/nbl/ext/MitsubaLoader/ParserUtil.cpp @@ -28,10 +28,8 @@ using namespace nbl::system; auto ParserManager::parse(IFile* _file, const Params& _params) const -> Result { -// CMitsubaMetadata* obj = new CMitsubaMetadata(); - Result result = { - .metadata = core::make_smart_refctd_ptr() - }; + Result result = {}; + result.metadata = core::make_smart_refctd_ptr(); SessionContext ctx = { .result = &result, .params = &_params, @@ -325,11 +323,27 @@ void ParserManager::XMLContext::onEnd(const char* _el) return; } - if (element.element && element.element->getType()==IElement::Type::SHAPE) + // TODO: only allow this for top level elements so we don't load unused shapes + if (element.element) + switch (element.element->getType()) { - auto shape = static_cast(element.element); - if (shape) + case IElement::Type::EMITTER: + { + auto emitter = static_cast(element.element); + if (emitter->type!=CElementEmitter::Type::CONSTANT) + result.emitters.emplace_back(emitter,std::move(element.name)); + else + result.ambient += emitter->constant.radiance; + break; + } + case IElement::Type::SHAPE: + { + auto shape = static_cast(element.element); result.shapegroups.emplace_back(shape,std::move(element.name)); + break; + } + default: + break; } } @@ -337,20 +351,20 @@ void ParserManager::XMLContext::onEnd(const char* _el) template<> struct ParserManager::CreateElement { - static inline SNamedElement __call(const char** _atts, SessionContext* ctx) + static inline SessionContext::named_element_t __call(const char** _atts, SessionContext* ctx) { if (IElement::invalidAttributeCount(_atts,2u)) return {}; if (core::strcmpi(_atts[0],"name")) return {}; - return {ctx->objects.construct(),_atts[1]}; + return {ctx->result->objects->construct(),_atts[1]}; }; }; template<> struct ParserManager::CreateElement { - static inline SNamedElement __call(const char** _atts, SessionContext* ctx) + static inline SessionContext::named_element_t __call(const char** _atts, SessionContext* ctx) { const char* type; const char* id; @@ -358,7 +372,7 @@ struct ParserManager::CreateElement if (!IElement::getTypeIDAndNameStrings(type, id, name, _atts)) return {}; - CElementEmissionProfile* obj = ctx->objects.construct(id); + CElementEmissionProfile* obj = ctx->result->objects->construct(id); if (!obj) return {}; @@ -378,7 +392,7 @@ concept HasVisit = requires() { template requires HasTypeMap struct ParserManager::CreateElement { - static inline SNamedElement __call(const char** _atts, SessionContext* ctx) + static inline SessionContext::named_element_t __call(const char** _atts, SessionContext* ctx) { const char* type; const char* id; @@ -394,7 +408,7 @@ struct ParserManager::CreateElement return {}; } - Element* obj = ctx->objects.construct(id); + Element* obj = ctx->result->objects->construct(id); if (!obj) return {}; @@ -446,7 +460,7 @@ ParserManager::ParserManager() : propertyElements({ CElementEmissionProfile::compAddPropertyMap() }) { } -auto ParserManager::processAlias(const char** _atts, SessionContext* ctx) -> SNamedElement +auto ParserManager::processAlias(const char** _atts, SessionContext* ctx) -> SessionContext::named_element_t { const char* id = nullptr; const char* as = nullptr; @@ -480,7 +494,7 @@ auto ParserManager::processAlias(const char** _atts, SessionContext* ctx) -> SNa return {original,std::move(name)}; } -auto ParserManager::processRef(const char** _atts, SessionContext* ctx) -> SNamedElement +auto ParserManager::processRef(const char** _atts, SessionContext* ctx) -> SessionContext::named_element_t { const char* id; std::string name; diff --git a/src/nbl/video/CVulkanCommandPool.h b/src/nbl/video/CVulkanCommandPool.h index 8775553b99..d1c3085b4e 100644 --- a/src/nbl/video/CVulkanCommandPool.h +++ b/src/nbl/video/CVulkanCommandPool.h @@ -2,7 +2,6 @@ #define _NBL_VIDEO_C_VULKAN_COMMAND_POOL_H_INCLUDED_ #include "nbl/video/IGPUCommandPool.h" -#include "nbl/core/containers/CMemoryPool.h" #include From 4978b3666044602fe7432b3a2de4f99d7f68f31f Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 30 Jan 2026 22:17:30 +0100 Subject: [PATCH 457/472] make the example submodule point at the correct thing --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index cf8231abe7..79053ed2c4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit cf8231abe7326f397bca0dac97c18c31bbd7b5eb +Subproject commit 79053ed2c48ba8e0aa629c1c70ca781da3def4d2 From 4e093f8c6bab958b542254adab3f3fc91c37be0a Mon Sep 17 00:00:00 2001 From: devsh Date: Sat, 31 Jan 2026 01:04:25 +0100 Subject: [PATCH 458/472] mark TODOs for @AnastaZIuk to coordinate --- .../hlsl/math/linalg/matrix_runtime_traits.hlsl | 2 +- .../matrix_utils/transformation_matrix_utils.hlsl | 2 +- include/nbl/ext/MitsubaLoader/CElementShape.h | 12 +++++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl index dc74c45ddd..3ed2f549c9 100644 --- a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl @@ -56,7 +56,7 @@ struct RuntimeTraits bool invertible; bool orthogonal; - scalar_t uniformScaleSq; + scalar_t uniformScaleSq; // TODO: rename to `uniformColumnSqNorm` and move this whole header to `nbl/builtin/hlsl/matrix_utils/` and associated namespace bool orthonormal; }; diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 1ad16dc28d..3869c7aaed 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -71,7 +71,7 @@ inline matrix getCastedMatrix(const matrix& in) return out; } -// TODO: use portable_float when merged +// TODO: remove //! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) template inline matrix concatenateBFollowedByA(const matrix& a, const matrix& b) diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index 1ce78a399b..db0ca020e2 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -10,6 +10,9 @@ #include "nbl/ext/MitsubaLoader/CElementBSDF.h" #include "nbl/ext/MitsubaLoader/CElementEmitter.h" +// awful path +#include "nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl" + namespace nbl::ext::MitsubaLoader { @@ -236,15 +239,14 @@ class CElementShape final : public IElement inline std::string getLogName() const override { return "shape"; } - inline hlsl::float32_t3x4 getAbsoluteTransform() const + inline hlsl::float32_t3x4 getTransform() const { // explicit truncation auto local = hlsl::float32_t3x4(transform.matrix); - // TODO restore at some point (and make it actually work??) - // note: INSTANCE can only contain SHAPEGROUP and the latter doesnt have its own transform - //if (type==CElementShape::INSTANCE && instance.parent) - // return mul(instance.parent->getAbsoluteTransform(),local); + // SHAPEGROUP cannot have its own transformation + assert(type!=Type::SHAPEGROUP || hlsl::math::linalg::diagonal(1)==local); + return local; } From a39059c85b4713b9fc1985b745a29236f00fef3f Mon Sep 17 00:00:00 2001 From: devsh Date: Sat, 31 Jan 2026 01:06:04 +0100 Subject: [PATCH 459/472] do the ICPUScene filling code with instances (everything but materials) --- include/nbl/asset/ICPUScene.h | 33 ++- include/nbl/asset/IGeometryCollection.h | 2 +- .../nbl/ext/MitsubaLoader/CMitsubaMetadata.h | 8 +- src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 248 +++++++++++------- 4 files changed, 188 insertions(+), 103 deletions(-) diff --git a/include/nbl/asset/ICPUScene.h b/include/nbl/asset/ICPUScene.h index 5b5c8dd4cd..6205e651fa 100644 --- a/include/nbl/asset/ICPUScene.h +++ b/include/nbl/asset/ICPUScene.h @@ -8,13 +8,14 @@ #include "nbl/core/containers/CMemoryPool.h" #include "nbl/asset/IScene.h" +#include "nbl/asset/ICPUMorphTargets.h" #include "nbl/asset/material_compiler3/CTrueIR.h" namespace nbl::asset { // -class NBL_API2 ICPUScene final : public IAsset, public IScene +class ICPUScene final : public IAsset, public IScene { using base_t = IScene; using material_table_allocator_t = core::GeneralpurposeAddressAllocatorST; @@ -82,6 +83,7 @@ class NBL_API2 ICPUScene final : public IAsset, public IScene // using material_table_offset_t = uint32_t; + constexpr static inline material_table_offset_t InvalidMaterialTable = ~0u; material_table_offset_t allocateMaterialTable(const ICPUMorphTargets* targets) { if (!targets) @@ -121,12 +123,20 @@ class NBL_API2 ICPUScene final : public IAsset, public IScene return morphTargets.size()==initialTransforms.size(); } - inline size_t resize(const size_t newSize) + inline void reserve(const size_t newSize) + { + morphTargets.reserve(newSize); + materials.reserve(newSize); + if (!initialTransforms.empty()) + initialTransforms.reserve(newSize); + } + + inline void resize(const size_t newSize) { morphTargets.resize(newSize); - materials.resize(newSize); + materials.resize(newSize,InvalidMaterialTable); if (!initialTransforms.empty()) - initialTransforms.resize(newSize); + initialTransforms.resize(newSize,ICPUGeometryCollection::SGeometryReference{}.transform); } inline void erase(const size_t first, const size_t last) @@ -139,6 +149,15 @@ class NBL_API2 ICPUScene final : public IAsset, public IScene inline size_t size() const {return morphTargets.size();} + inline std::span> getMorphTargets() {return morphTargets;} + inline std::span> getMorphTargets() const {return morphTargets;} + + inline std::span getMaterialTables() {return materials;} + inline std::span getMaterialTables() const {return materials;} + + inline std::span getInitialTransforms() {return initialTransforms;} + inline std::span getInitialTransforms() const {return initialTransforms;} + private: friend class ICPUScene; @@ -154,12 +173,6 @@ class NBL_API2 ICPUScene final : public IAsset, public IScene inline SInstanceStorage& getInstances() {return m_instances;} inline const SInstanceStorage& getInstances() const {return m_instances;} - inline void setInstanceInitialTransform(const uint32_t index, const hlsl::float32_t3x4& xform) - { - if (indexm_ambientLight = result.ambient; + // TODO: abstract/move away since many loaders will need to do this + core::unordered_map> morphTargetCache; + auto createMorphTargets = [&_params,&morphTargetCache](core::smart_refctd_ptr&& collection)->core::smart_refctd_ptr + { + auto found = morphTargetCache.find(collection.get()); + if (found!=morphTargetCache.end()) + return found->second; + auto targets = core::make_smart_refctd_ptr(); + if (targets) + { + morphTargetCache[collection.get()] = targets; + targets->getTargets()->push_back({.geoCollection=std::move(collection)}); + } + return targets; + }; + + // + auto& instances = ctx.scene->getInstances(); + instances.reserve(result.shapegroups.size()); + auto addToScene = [&](const CElementShape* shape, core::smart_refctd_ptr&& collection)->void + { + assert(shape && collection); + auto targets = createMorphTargets(std::move(collection)); + if (!targets) + { + _params.logger.log("Failed to create ICPUMorphTargets for Shape with id %s",system::ILogger::ELL_ERROR,shape->id.c_str()); + return; + } + const auto index = instances.size(); + instances.resize(index+1); + instances.getMorphTargets()[index] = std::move(targets); + // TODO: add materials (incl emission) to the instances + /* + auto emitter = shape->obtainEmitter(); + auto bsdf = getBSDFtreeTraversal(ctx, shape->bsdf, &emitter, getAbsoluteTransform()); + + SContext::SInstanceData instance( + tform, + bsdf, + #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) + shape->bsdf ? shape->bsdf->id : "", + #endif + emitter, + CElementEmitter{} // no backface emission + ); + */ + instances.getInitialTransforms()[index] = shape->getTransform(); + }; + // first go over all actually used shapes which are not shapegroups (regular shapes and instances) for (auto& shapepair : result.shapegroups) { @@ -306,8 +355,17 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: if (shapedef->type!=CElementShape::Type::INSTANCE) { auto geometry = ctx.loadBasicShape(_hierarchyLevel,shapedef); - // TODO: add to geometry collection, make a morph target, and add to scene - shapedef->getAbsoluteTransform(); + if (!geometry) + continue; + auto collection = core::make_smart_refctd_ptr(); + if (!collection) + { + _params.logger.log("Failed to create an ICPUGeometryCollection non-Instanced Shape with id %s",system::ILogger::ELL_ERROR,shapedef->id.c_str()); + continue; + } + // we don't put a transform on the geometry, because we want the transform on the instance + collection->getGeometries()->push_back({.geometry=std::move(geometry)}); + addToScene(shapedef,std::move(collection)); } else // mitsuba is weird and lists instances under a shapegroup instead of having instances reference the shapegroup { @@ -316,34 +374,13 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: if (!parent) // we should probably assert this continue; assert(parent->type==CElementShape::Type::SHAPEGROUP); - const CElementShape::ShapeGroup* shapegroup = &parent->shapegroup; - auto collection = ctx.loadShapeGroup(_hierarchyLevel,shapegroup); - // TODO: make a morph target and add to scene with transform of - shapedef->getAbsoluteTransform(); + auto collection = ctx.loadShapeGroup(_hierarchyLevel,&parent->shapegroup); + addToScene(shapedef,std::move(collection)); } } result.shapegroups.clear(); #if 0 - // TODO: add materials (incl emission) to the instances - auto addInstance = [shape,&ctx,&relTform,this](SContext::shape_ass_type& mesh) - { - auto emitter = shape->obtainEmitter(); - core::matrix3x4SIMD tform = core::concatenateBFollowedByA(relTform, shape->getAbsoluteTransform()); - auto bsdf = getBSDFtreeTraversal(ctx, shape->bsdf, &emitter, core::matrix4SIMD(tform)); - - SContext::SInstanceData instance( - tform, - bsdf, -#if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) - shape->bsdf ? shape->bsdf->id : "", -#endif - emitter, - CElementEmitter{} // no backface emission - ); - ctx.mapMesh2instanceData.insert({ mesh.get(), instance }); - }; - // TODO: put IR and stuff in metadata so that we can recompile the materials after load auto compResult = ctx.backend.compile(&ctx.backend_ctx, ctx.ir.get(), decltype(ctx.backend)::EGST_PRESENT_WITH_AOV_EXTRACTION); ctx.backend_ctx.vt.commitAll(); @@ -775,31 +812,47 @@ auto SContext::loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape auto found = groupCache.find(shapegroup); if (found!=groupCache.end()) return found->second; - - const auto children = shapegroup->children; -#if 0 - core::vector meshes; - for (auto i=0u; ichildCount; i++) + + auto collection = core::make_smart_refctd_ptr(); + if (!collection) + inner.params.logger.log("Failed to create an ICPUGeometryCollection for Shape Group",system::ILogger::ELL_ERROR); + else { - auto child = children[i]; - if (!child) - continue; - - assert(child->type!=CElementShape::Type::INSTANCE); - if (child->type != CElementShape::Type::SHAPEGROUP) { - auto lowermesh = loadBasicShape(hierarchyLevel, child, relTform); - meshes.push_back(std::move(lowermesh)); - } - else { - auto lowermeshes = loadShapeGroup(hierarchyLevel, &child->shapegroup, relTform); - meshes.insert(meshes.begin(), std::make_move_iterator(lowermeshes.begin()), std::make_move_iterator(lowermeshes.end())); + auto* geometries = collection->getGeometries(); + const auto children = shapegroup->children; + for (auto i=0u; ichildCount; i++) + { + auto child = children[i]; + if (!child) + continue; + + assert(child->type!=CElementShape::Type::INSTANCE); + if (child->type!=CElementShape::Type::SHAPEGROUP) + { + auto geometry = loadBasicShape(hierarchyLevel,child); + if (geometry) + geometries->push_back({.transform=child->getTransform(),.geometry=std::move(geometry)}); + } + else + { + auto nestedCollection = loadShapeGroup(hierarchyLevel,&child->shapegroup); + if (!nestedCollection) + continue; + auto* nestedGeometries = nestedCollection->getGeometries(); + for (auto& ref : *nestedGeometries) + { + auto& newRef = geometries->emplace_back(std::move(ref)); + // thankfully because SHAPEGROUPS are not allowed to have transforms we don't need to rack them up + //if (newRef.hasTransform()) + // newRef.transform = hlsl::mul(thisTransform,newRef.transform); + //else + // newRef.transform = thisTransform; + } + } } + groupCache.insert({shapegroup,collection}); } - - ctx.groupCache.insert({shapegroup,meshes}); - return meshes; -#endif - return nullptr; + return collection; } #if 0 @@ -838,6 +891,15 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape if (found!=shapeCache.end()) return found->second.geom; + core::smart_refctd_ptr geo; + auto exiter = core::makeRAIIExiter<>([&]()->void + { + if (geo) + return; + this->inner.params.logger.log("Failed to Load/Create Basic non-Instanced Shape with id %s",system::ILogger::ELL_ERROR,shape->id.c_str()); + } + ); + #if 0 constexpr uint32_t UV_ATTRIB_ID = 2u; @@ -883,21 +945,21 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape else return nullptr; }; - - core::smart_refctd_ptr mesh,newMesh; +#endif bool flipNormals = false; bool faceNormals = false; - float maxSmoothAngle = NAN; + float maxSmoothAngle = hlsl::bit_cast(hlsl::numeric_limits::quiet_NaN); switch (shape->type) { +#if 0 case CElementShape::Type::CUBE: { auto cubeData = ctx.creator->createCubeMesh(core::vector3df(2.f)); mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createCubeMesh(core::vector3df(2.f)), m_assetMgr); flipNormals = flipNormals!=shape->cube.flipNormals; - } break; + } case CElementShape::Type::SPHERE: mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createSphereMesh(1.f,64u,64u), m_assetMgr); flipNormals = flipNormals!=shape->sphere.flipNormals; @@ -939,6 +1001,8 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createDiskMesh(1.f,64u), m_assetMgr); flipNormals = flipNormals!=shape->disk.flipNormals; break; +#endif +#if 0 case CElementShape::Type::OBJ: mesh = loadModel(shape->obj.filename); flipNormals = flipNormals!=shape->obj.flipNormals; @@ -1014,61 +1078,63 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape faceNormals = shape->serialized.faceNormals; maxSmoothAngle = shape->serialized.maxSmoothAngle; break; +#endif case CElementShape::Type::SHAPEGROUP: [[fallthrough]]; case CElementShape::Type::INSTANCE: assert(false); break; default: - _NBL_DEBUG_BREAK_IF(true); +// _NBL_DEBUG_BREAK_IF(true); break; } // - if (!mesh) - return nullptr; - - // mesh including meshbuffers needs to be cloned because instance counts and base instances will be changed - if (!newMesh) - newMesh = core::smart_refctd_ptr_static_cast(mesh->clone(1u)); - // flip normals if necessary - if (flipNormals) + if (geo) { - for (auto& meshbuffer : mesh->getMeshBufferVector()) +#if 0 + // mesh including meshbuffers needs to be cloned because instance counts and base instances will be changed + if (!newMesh) + newMesh = core::smart_refctd_ptr_static_cast(mesh->clone(1u)); + // flip normals if necessary + if (flipNormals) { - auto binding = meshbuffer->getIndexBufferBinding(); - binding.buffer = core::smart_refctd_ptr_static_cast(binding.buffer->clone(0u)); - meshbuffer->setIndexBufferBinding(std::move(binding)); - ctx.manipulator->flipSurfaces(meshbuffer.get()); + for (auto& meshbuffer : mesh->getMeshBufferVector()) + { + auto binding = meshbuffer->getIndexBufferBinding(); + binding.buffer = core::smart_refctd_ptr_static_cast(binding.buffer->clone(0u)); + meshbuffer->setIndexBufferBinding(std::move(binding)); + ctx.manipulator->flipSurfaces(meshbuffer.get()); + } } - } - // recompute normalis if necessary - if (faceNormals || !std::isnan(maxSmoothAngle)) - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - const float smoothAngleCos = cos(core::radians(maxSmoothAngle)); - - // TODO: make these mesh manipulator functions const-correct - auto newMeshBuffer = ctx.manipulator->createMeshBufferUniquePrimitives(meshbuffer.get()); - ctx.manipulator->filterInvalidTriangles(newMeshBuffer.get()); - ctx.manipulator->calculateSmoothNormals(newMeshBuffer.get(), false, 0.f, newMeshBuffer->getNormalAttributeIx(), - [&](const asset::IMeshManipulator::SSNGVertexData& a, const asset::IMeshManipulator::SSNGVertexData& b, asset::ICPUMeshBuffer* buffer) + // recompute normalis if necessary + if (faceNormals || !std::isnan(maxSmoothAngle)) + for (auto& meshbuffer : mesh->getMeshBufferVector()) { - if (faceNormals) - return a.indexOffset == b.indexOffset; - else - return core::dot(a.parentTriangleFaceNormal, b.parentTriangleFaceNormal).x >= smoothAngleCos; - }); - meshbuffer = std::move(newMeshBuffer); - } - IMeshManipulator::recalculateBoundingBox(newMesh.get()); - mesh = std::move(newMesh); + const float smoothAngleCos = cos(core::radians(maxSmoothAngle)); - addInstance(mesh); - // cache and return - ctx.shapeCache.insert({ shape,mesh }); - return mesh; + // TODO: make these mesh manipulator functions const-correct + auto newMeshBuffer = ctx.manipulator->createMeshBufferUniquePrimitives(meshbuffer.get()); + ctx.manipulator->filterInvalidTriangles(newMeshBuffer.get()); + ctx.manipulator->calculateSmoothNormals(newMeshBuffer.get(), false, 0.f, newMeshBuffer->getNormalAttributeIx(), + [&](const asset::IMeshManipulator::SSNGVertexData& a, const asset::IMeshManipulator::SSNGVertexData& b, asset::ICPUMeshBuffer* buffer) + { + if (faceNormals) + return a.indexOffset == b.indexOffset; + else + return core::dot(a.parentTriangleFaceNormal, b.parentTriangleFaceNormal).x >= smoothAngleCos; + }); + meshbuffer = std::move(newMeshBuffer); + } + IMeshManipulator::recalculateBoundingBox(newMesh.get()); + mesh = std::move(newMesh); #endif - return nullptr; + // cache and return + CMitsubaMetadata::SGeometryMetaPair geoMeta = {.geom=std::move(geo)}; + geoMeta.meta.m_id = shape->id; + geoMeta.meta.type = shape->type; + shapeCache.insert({shape,std::move(geoMeta)}); + } + return geo; } } From 925023e98ac8697c65604c512b90e38fac4af328 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 31 Jan 2026 14:11:56 +0100 Subject: [PATCH 460/472] adjust to comments --- examples_tests | 2 +- include/nbl/builtin/hlsl/array_accessors.hlsl | 3 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 + include/nbl/builtin/hlsl/ies/profile.hlsl | 56 ++--- include/nbl/builtin/hlsl/ies/sampler.hlsl | 194 +++++++++--------- include/nbl/builtin/hlsl/ies/texture.hlsl | 109 +++++----- include/nbl/builtin/hlsl/math/octahedral.hlsl | 52 ++--- include/nbl/builtin/hlsl/math/polar.hlsl | 14 +- .../transformation_matrix_utils.hlsl | 5 +- include/nbl/builtin/hlsl/surface_transform.h | 8 +- include/nbl/ext/ScreenShot/ScreenShot.h | 136 +++++++++++- .../asset/interchange/CIESProfileLoader.cpp | 7 +- src/nbl/asset/interchange/CIESProfileLoader.h | 3 - src/nbl/asset/utils/CIESProfile.cpp | 45 ++-- src/nbl/asset/utils/CIESProfile.h | 54 ++--- src/nbl/asset/utils/CIESProfileParser.cpp | 15 +- src/nbl/builtin/CMakeLists.txt | 4 + src/nbl/ext/ImGui/ImGui.cpp | 40 +++- .../CMitsubaMaterialCompilerFrontend.cpp | 40 +--- src/nbl/video/utilities/CAssetConverter.cpp | 4 +- 20 files changed, 458 insertions(+), 335 deletions(-) diff --git a/examples_tests b/examples_tests index 00e3812432..870dc85170 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 00e3812432d9c1a9e1bfdc6d00cfa852bb7e9f32 +Subproject commit 870dc8517033585b25f6f9f139f4611fad8f2c35 diff --git a/include/nbl/builtin/hlsl/array_accessors.hlsl b/include/nbl/builtin/hlsl/array_accessors.hlsl index b025f9d677..73a4b83102 100644 --- a/include/nbl/builtin/hlsl/array_accessors.hlsl +++ b/include/nbl/builtin/hlsl/array_accessors.hlsl @@ -24,7 +24,8 @@ struct array_set arr[index] = val; } }; + } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index a5715efa15..89753d4ba5 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -84,6 +84,8 @@ struct add_pointer #endif + + namespace nbl { namespace hlsl diff --git a/include/nbl/builtin/hlsl/ies/profile.hlsl b/include/nbl/builtin/hlsl/ies/profile.hlsl index a85141aebd..a6bbfdb692 100644 --- a/include/nbl/builtin/hlsl/ies/profile.hlsl +++ b/include/nbl/builtin/hlsl/ies/profile.hlsl @@ -6,6 +6,7 @@ #define _NBL_BUILTIN_HLSL_IES_PROFILE_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/cpp_compat/basic.h" namespace nbl { @@ -16,31 +17,21 @@ namespace ies struct ProfileProperties { - //! max 16K resolution - NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MAX_TEXTURE_WIDTH = 15360u; - NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MAX_TEXTURE_HEIGHT = 8640u; - - // TODO: This constraint is hack because the mitsuba loader and its material compiler use Virtual Texturing, and there's some bug with IES not sampling sub 128x128 mip levels - // don't want to spend time to fix this since we'll be using descriptor indexing for the next iteration - NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MIN_TEXTURE_WIDTH = 128u; - NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_MIN_TEXTURE_HEIGHT = 128u; - - NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_DEFAULT_TEXTURE_WIDTH = 1024u; - NBL_CONSTEXPR_STATIC_INLINE uint32_t CDC_DEFAULT_TEXTURE_HEIGHT = 1024u; - - NBL_CONSTEXPR_STATIC_INLINE float32_t MAX_VANGLE = 180.f; - NBL_CONSTEXPR_STATIC_INLINE float32_t MAX_HANGLE = 360.f; + NBL_CONSTEXPR_STATIC_INLINE float32_t MaxVAngleDegrees = 180.f; + NBL_CONSTEXPR_STATIC_INLINE float32_t MaxHAngleDegrees = 360.f; // TODO: could change to uint8_t once we get implemented // https://github.com/microsoft/hlsl-specs/pull/538 - using packed_flags_t = uint16_t; + using packed_flags_t = uint32_t; - NBL_CONSTEXPR_STATIC_INLINE packed_flags_t VERSION_BITS = 2u; - NBL_CONSTEXPR_STATIC_INLINE packed_flags_t TYPE_BITS = 2u; - NBL_CONSTEXPR_STATIC_INLINE packed_flags_t SYMM_BITS = 3u; - NBL_CONSTEXPR_STATIC_INLINE packed_flags_t VERSION_MASK = (packed_flags_t(1u) << VERSION_BITS) - packed_flags_t(1u); - NBL_CONSTEXPR_STATIC_INLINE packed_flags_t TYPE_MASK = (packed_flags_t(1u) << TYPE_BITS) - packed_flags_t(1u); - NBL_CONSTEXPR_STATIC_INLINE packed_flags_t SYMM_MASK = (packed_flags_t(1u) << SYMM_BITS) - packed_flags_t(1u); + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t VersionBits = 2u; + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t TypeBits = 2u; + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t SymmetryBits = 3u; + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t VersionMask = (packed_flags_t(1u) << VersionBits) - packed_flags_t(1u); + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t TypeMask = (packed_flags_t(1u) << TypeBits) - packed_flags_t(1u); + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t SymmetryMask = (packed_flags_t(1u) << SymmetryBits) - packed_flags_t(1u); + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t TypeShift = VersionBits; + NBL_CONSTEXPR_STATIC_INLINE packed_flags_t SymmetryShift = VersionBits + TypeBits; enum Version : packed_flags_t { @@ -68,39 +59,35 @@ struct ProfileProperties Version getVersion() NBL_CONST_MEMBER_FUNC { - return (Version)( packed & VERSION_MASK ); + return (Version)(packed & VersionMask); } PhotometricType getType() NBL_CONST_MEMBER_FUNC { - const packed_flags_t shift = VERSION_BITS; - return (PhotometricType)((packed >> shift) & TYPE_MASK); + return (PhotometricType)((packed >> TypeShift) & TypeMask); } LuminairePlanesSymmetry getSymmetry() NBL_CONST_MEMBER_FUNC { - const packed_flags_t shift = VERSION_BITS + TYPE_BITS; - return (LuminairePlanesSymmetry)((packed >> shift) & SYMM_MASK); + return (LuminairePlanesSymmetry)((packed >> SymmetryShift) & SymmetryMask); } void setVersion(Version v) { - packed_flags_t vBits = (packed_flags_t)(v) & VERSION_MASK; - packed = (packed & ~VERSION_MASK) | vBits; + packed_flags_t vBits = (packed_flags_t)(v) & VersionMask; + packed = (packed & ~VersionMask) | vBits; } void setType(PhotometricType t) { - const packed_flags_t shift = VERSION_BITS; - packed_flags_t tBits = ((packed_flags_t)(t) & TYPE_MASK) << shift; - packed = (packed & ~(TYPE_MASK << shift)) | tBits; + packed_flags_t tBits = ((packed_flags_t)(t) & TypeMask) << TypeShift; + packed = (packed & ~(TypeMask << TypeShift)) | tBits; } void setSymmetry(LuminairePlanesSymmetry s) { - const packed_flags_t shift = VERSION_BITS + TYPE_BITS; - packed_flags_t sBits = ((packed_flags_t)(s) & SYMM_MASK) << shift; - packed = (packed & ~(SYMM_MASK << shift)) | sBits; + packed_flags_t sBits = ((packed_flags_t)(s) & SymmetryMask) << SymmetryShift; + packed = (packed & ~(SymmetryMask << SymmetryShift)) | sBits; } float32_t maxCandelaValue; //! Max candela sample value @@ -111,6 +98,7 @@ struct ProfileProperties }; } + } } diff --git a/include/nbl/builtin/hlsl/ies/sampler.hlsl b/include/nbl/builtin/hlsl/ies/sampler.hlsl index 41f273e82c..ab4046477c 100644 --- a/include/nbl/builtin/hlsl/ies/sampler.hlsl +++ b/include/nbl/builtin/hlsl/ies/sampler.hlsl @@ -6,16 +6,19 @@ #define _NBL_BUILTIN_HLSL_IES_SAMPLER_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/limits.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/algorithm.hlsl" #include "nbl/builtin/hlsl/math/polar.hlsl" #include "nbl/builtin/hlsl/math/octahedral.hlsl" #include "nbl/builtin/hlsl/concepts.hlsl" #include "nbl/builtin/hlsl/ies/profile.hlsl" -namespace nbl +namespace nbl { -namespace hlsl +namespace hlsl { -namespace ies +namespace ies { namespace concepts { @@ -27,26 +30,26 @@ NBL_CONCEPT_BEGIN(1) #define accessor NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define req_key_t uint32_t #define req_key_t2 uint32_t2 -#define req_value_t float32_t +#define req_angle_t float32_t +#define req_candela_t float32_t NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_TYPE)(accessor_t::key_t)) - ((NBL_CONCEPT_REQ_TYPE)(accessor_t::key_t2)) - ((NBL_CONCEPT_REQ_TYPE)(accessor_t::value_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((req_key_t(0)), is_same_v, typename accessor_t::key_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((req_key_t2(0, 0)), is_same_v, typename accessor_t::key_t2)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((req_value_t(0)), is_same_v, typename accessor_t::value_t)) + ((NBL_CONCEPT_REQ_TYPE)(accessor_t::angle_t)) + ((NBL_CONCEPT_REQ_TYPE)(accessor_t::candela_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((req_angle_t(0)), is_same_v, typename accessor_t::angle_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((req_candela_t(0)), is_same_v, typename accessor_t::candela_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.getProperties()), is_same_v, ProfileProperties)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.value(req_key_t2(0, 0))), is_same_v, typename accessor_t::candela_t)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.vAnglesCount()), is_same_v, req_key_t)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.hAnglesCount()), is_same_v, req_key_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.getProperties()), is_same_v, ProfileProperties)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template vAngle((req_key_t)0)), is_same_v, req_value_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template hAngle((req_key_t)0)), is_same_v, req_value_t)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template value((req_key_t2)0)), is_same_v, req_value_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.vAngle(req_key_t(0))), is_same_v, typename accessor_t::angle_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.hAngle(req_key_t(0))), is_same_v, typename accessor_t::angle_t)) ); #undef accessor #undef req_key_t #undef req_key_t2 -#undef req_value_t +#undef req_angle_t +#undef req_candela_t #include template @@ -57,109 +60,106 @@ template) struct CandelaSampler { using accessor_t = Accessor; - using value_t = typename accessor_t::value_t; + using angle_t = typename accessor_t::angle_t; + using candela_t = typename accessor_t::candela_t; using symmetry_t = ProfileProperties::LuminairePlanesSymmetry; - using polar_t = math::Polar; - using octahedral_t = math::OctahedralTransform; + using polar_t = math::Polar; + using octahedral_t = math::OctahedralTransform; + using vector2_type = float32_t2; + + vector2_type halfMinusHalfPixel; - static value_t sample(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(math::Polar) polar) + static inline CandelaSampler create(NBL_CONST_REF_ARG(vector2_type) lastTexelRcp) { - // TODO: DXC seems to have a bug and cannot use symmetry_t directly with == operator https://godbolt.devsh.eu/z/P9Kc5x - const ProfileProperties::LuminairePlanesSymmetry symmetry = accessor.getProperties().getSymmetry(); - const float32_t vAngle = degrees(polar.theta); - const float32_t hAngle = degrees(wrapPhi(polar.phi, symmetry)); + CandelaSampler retval; + retval.halfMinusHalfPixel = vector2_type(0.5f, 0.5f) / (vector2_type(1.f, 1.f) + lastTexelRcp); + return retval; + } - const float32_t vABack = accessor.vAngle(accessor.vAnglesCount() - 1u); - if (vAngle > vABack) - return 0.f; + inline candela_t operator()(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(polar_t) polar) NBL_CONST_MEMBER_FUNC + { + assert(polar.theta >= float32_t(0.0) && polar.theta <= numbers::pi); + assert(hlsl::abs(polar.phi) <= numbers::pi * float32_t(2.0)); - const uint32_t j0 = getVLB(accessor, vAngle); - const uint32_t j1 = getVUB(accessor, vAngle); - const uint32_t i0 = (symmetry == ProfileProperties::LuminairePlanesSymmetry::ISOTROPIC) ? 0u : getHLB(accessor, hAngle); - const uint32_t i1 = (symmetry == ProfileProperties::LuminairePlanesSymmetry::ISOTROPIC) ? 0u : getHUB(accessor, hAngle); + const symmetry_t symmetry = accessor.getProperties().getSymmetry(); + const angle_t vAngle = degrees(polar.theta); + const angle_t hAngle = degrees(__wrapPhi(polar.phi, symmetry)); - const float32_t uReciprocal = ((i1 == i0) ? 1.f : 1.f / (accessor.hAngle(i1) - accessor.hAngle(i0))); - const float32_t vReciprocal = ((j1 == j0) ? 1.f : 1.f / (accessor.vAngle(j1) - accessor.vAngle(j0))); +#define NBL_IES_DEF_ANGLE_ACC(T, EXPR) struct T { using value_type = angle_t; accessor_t acc; value_type operator[](uint32_t idx) NBL_CONST_MEMBER_FUNC { return EXPR; } }; - const float32_t u = ((hAngle - accessor.hAngle(i0)) * uReciprocal); - const float32_t v = ((vAngle - accessor.vAngle(j0)) * vReciprocal); + NBL_IES_DEF_ANGLE_ACC(VAcc, acc.vAngle(idx)) + NBL_IES_DEF_ANGLE_ACC(HAcc, acc.hAngle(idx)) - const float32_t s0 = (accessor.value(uint32_t2(i0, j0)) * (1.f - v) + accessor.value(uint32_t2(i0, j1)) * v); - const float32_t s1 = (accessor.value(uint32_t2(i1, j0)) * (1.f - v) + accessor.value(uint32_t2(i1, j1)) * v); + VAcc vAcc; vAcc.acc = accessor; HAcc hAcc; hAcc.acc = accessor; - return s0 * (1.f - u) + s1 * u; - } +#undef NBL_IES_DEF_ANGLE_ACC - static value_t sample(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(float32_t2) uv) - { - const float32_t3 dir = octahedral_t::uvToDir(uv); - const polar_t polar = polar_t::createFromCartesian(dir); - return sample(accessor, polar); - } + const uint32_t vCount = accessor.vAnglesCount(); + const uint32_t hCount = accessor.hAnglesCount(); + const angle_t vABack = vAcc[vCount - 1u]; + if (vAngle > vABack) + return candela_t(0); - static float32_t wrapPhi(const float32_t phi, const symmetry_t symmetry) - { - switch (symmetry) - { - case symmetry_t::ISOTROPIC: //! axial symmetry - return 0.0f; - case symmetry_t::QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range - { - NBL_CONSTEXPR float32_t M_HALF_PI = numbers::pi * 0.5f; - float32_t wrapPhi = abs(phi); //! first MIRROR - if (wrapPhi > M_HALF_PI) //! then REPEAT - wrapPhi = hlsl::clamp(M_HALF_PI - (wrapPhi - M_HALF_PI), 0.f, M_HALF_PI); - return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 - } - case symmetry_t::HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range - case symmetry_t::OTHER_HALF_SYMMETRIC: //! eg. maps (in degress) 181 -> 179 or 359 -> 1 - return abs(phi); - case symmetry_t::NO_LATERAL_SYMMET: //! plot onto whole (in degress) [0, 360] range - { - NBL_CONSTEXPR float32_t M_TWICE_PI = numbers::pi *2.f; - return (phi < 0.f) ? (phi + M_TWICE_PI) : phi; - } - } - return 69.f; - } + const uint32_t vUbRaw = __upperBound(vAcc, vCount, vAngle); + const uint32_t vLb = __lowerFromUpper(vUbRaw); + const uint32_t vUb = __clampUpper(vUbRaw, vCount); - struct impl_t - { - static uint32_t getVUB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) - { - for (uint32_t i = 0u; i < accessor.vAnglesCount(); ++i) - if (accessor.vAngle(i) > angle) - return i; - return accessor.vAnglesCount(); - } + const bool isotropic = (symmetry == symmetry_t::ISOTROPIC); + const uint32_t hUbRaw = isotropic ? 0u : __upperBound(hAcc, hCount, hAngle); + const uint32_t hLb = isotropic ? 0u : __lowerFromUpper(hUbRaw); + const uint32_t hUb = isotropic ? 0u : __clampUpper(hUbRaw, hCount); - static uint32_t getHUB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) - { - for (uint32_t i = 0u; i < accessor.hAnglesCount(); ++i) - if (accessor.hAngle(i) > angle) - return i; - return accessor.hAnglesCount(); - } - }; + const angle_t uReciprocal = (hUb == hLb) ? angle_t(1) : angle_t(1) / (hAcc[hUb] - hAcc[hLb]); + const angle_t vReciprocal = (vUb == vLb) ? angle_t(1) : angle_t(1) / (vAcc[vUb] - vAcc[vLb]); - static uint32_t getVLB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) - { - return (uint32_t)hlsl::max((int64_t)impl_t::getVUB(accessor, angle) - 1ll, 0ll); - } + const angle_t u = (hAngle - hAcc[hLb]) * uReciprocal; + const angle_t v = (vAngle - vAcc[vLb]) * vReciprocal; - static uint32_t getHLB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) - { - return (uint32_t)hlsl::max((int64_t)impl_t::getHUB(accessor, angle) - 1ll, 0ll); + const candela_t s0 = accessor.value(uint32_t2(hLb, vLb)) * (angle_t(1) - v) + accessor.value(uint32_t2(hLb, vUb)) * v; + const candela_t s1 = accessor.value(uint32_t2(hUb, vLb)) * (angle_t(1) - v) + accessor.value(uint32_t2(hUb, vUb)) * v; + + return s0 * (angle_t(1) - u) + s1 * u; } - static uint32_t getVUB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) + inline candela_t operator()(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(float32_t2) uv) NBL_CONST_MEMBER_FUNC { - return (uint32_t)hlsl::min((int64_t)impl_t::getVUB(accessor, angle), (int64_t)(accessor.vAnglesCount() - 1u)); + const float32_t3 dir = octahedral_t::uvToDir(uv, halfMinusHalfPixel); + const polar_t polar = polar_t::createFromCartesian(dir); + return operator()(accessor, polar); } - static uint32_t getHUB(NBL_CONST_REF_ARG(accessor_t) accessor, const float32_t angle) + template + static inline uint32_t __upperBound(NBL_REF_ARG(View) view, const uint32_t count, const angle_t angle) { return nbl::hlsl::upper_bound(view, 0u, count, angle); } + + static inline uint32_t __lowerFromUpper(const uint32_t ubRaw) { return ubRaw > 0u ? (ubRaw - 1u) : 0u; } + + static inline uint32_t __clampUpper(const uint32_t ubRaw, const uint32_t count) { return ubRaw < count ? ubRaw : (count - 1u); } + + static inline angle_t __wrapPhi(const angle_t phi, const symmetry_t symmetry) { - return (uint32_t)hlsl::min((int64_t)impl_t::getHUB(accessor, angle), (int64_t)(accessor.hAnglesCount() - 1u)); + switch (symmetry) + { + case symmetry_t::ISOTROPIC: //! axial symmetry + return angle_t(0.0); + case symmetry_t::QUAD_SYMETRIC: //! phi MIRROR_REPEAT wrap onto [0, 90] degrees range + { + const angle_t HalfPI = numbers::pi * angle_t(0.5); + angle_t wrapPhi = hlsl::abs(phi); //! first MIRROR + if (wrapPhi > HalfPI) //! then REPEAT + wrapPhi = hlsl::clamp(HalfPI - (wrapPhi - HalfPI), angle_t(0), HalfPI); + return wrapPhi; //! eg. maps (in degrees) 91,269,271 -> 89 and 179,181,359 -> 1 + } + case symmetry_t::HALF_SYMETRIC: //! phi MIRROR wrap onto [0, 180] degrees range + case symmetry_t::OTHER_HALF_SYMMETRIC: //! eg. maps (in degress) 181 -> 179 or 359 -> 1 + return hlsl::abs(phi); + case symmetry_t::NO_LATERAL_SYMMET: //! plot onto whole (in degress) [0, 360] range + { + const angle_t TwicePI = numbers::pi * angle_t(2.0); + return (phi < angle_t(0)) ? (phi + TwicePI) : phi; + } + } + + return bit_cast(numeric_limits::quiet_NaN); } }; diff --git a/include/nbl/builtin/hlsl/ies/texture.hlsl b/include/nbl/builtin/hlsl/ies/texture.hlsl index 4ea04755df..6d26963c87 100644 --- a/include/nbl/builtin/hlsl/ies/texture.hlsl +++ b/include/nbl/builtin/hlsl/ies/texture.hlsl @@ -12,85 +12,74 @@ namespace nbl { namespace hlsl { - -// TODO(?): should be in nbl::hlsl::ies (or in the Texutre struct) but I get -// error GA3909C62: class template specialization of 'member_count' not in a namespace enclosing 'bda' -// which I don't want to deal with rn to not (eventually) break stuff - +namespace ies +{ struct IESTextureInfo; -NBL_HLSL_DEFINE_STRUCT((IESTextureInfo), - ((inv, float32_t2)) - ((flatten, float32_t)) - ((maxValueRecip, float32_t)) - ((flattenTarget, float32_t)) - ((domainLo, float32_t)) - ((domainHi, float32_t)) - ((fullDomainFlatten, uint16_t)) // bool +} +} +} + +NBL_HLSL_DEFINE_STRUCT((::nbl::hlsl::ies::IESTextureInfo), + ((lastTexelRcp, float32_t2)) + ((maxValueRecip, float32_t)) ); +namespace nbl +{ +namespace hlsl +{ namespace ies { -template) -struct Texture +struct SProceduralTexture { - using accessor_t = Accessor; - using value_t = typename accessor_t::value_t; - using sampler_t = CandelaSampler; - using polar_t = math::Polar; - using octahedral_t = math::OctahedralTransform; - using SInfo = nbl::hlsl::IESTextureInfo; + using info_t = IESTextureInfo; + using octahedral_t = math::OctahedralTransform; + using polar_t = math::Polar; - static inline SInfo createInfo(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(uint32_t2) size, float32_t flatten, bool fullDomainFlatten) - { - SInfo retval; - const ProfileProperties props = accessor.getProperties(); - - // There is one huge issue, the IES files love to give us values for degrees 0, 90, 180 an 360 - // So standard octahedral mapping won't work, because for above data points you need corner sampled images. + NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxTextureWidth = 15360u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxTextureHeight = 8640u; - retval.inv = float32_t2(1.f, 1.f) / float32_t2(size - 1u); - retval.flatten = flatten; - retval.maxValueRecip = 1.0f / props.maxCandelaValue; // Late Optimization TODO: Modify the Max Value for the UNORM texture to be the Max Value after flatten blending - retval.domainLo = radians(accessor.vAngle(0u)); - retval.domainHi = radians(accessor.vAngle(accessor.vAnglesCount() - 1u)); - retval.fullDomainFlatten = fullDomainFlatten; + NBL_CONSTEXPR_STATIC_INLINE uint32_t MinTextureWidth = 3u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t MinTextureHeight = 3u; - if(fullDomainFlatten) - retval.flattenTarget = props.fullDomainAvgEmission; - else - retval.flattenTarget = props.avgEmmision; + info_t info; + static inline SProceduralTexture create(const float32_t maxCandelaValue, const uint32_t2 resolution) + { + SProceduralTexture retval; + retval.info.lastTexelRcp = float32_t2(1.f, 1.f) / (float32_t2(resolution) - float32_t2(1.f, 1.f)); + retval.info.maxValueRecip = maxCandelaValue > 0.f ? (1.f / maxCandelaValue) : 0.f; return retval; } - static inline float32_t eval(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(SInfo) info, NBL_CONST_REF_ARG(float32_t2) uv) + // NOTE: DXC fails overload resolution for templated operator() in HLSL, so we use templated __call instead. + template + inline float32_t __call(NBL_CONST_REF_ARG(Accessor) accessor, NBL_CONST_REF_ARG(float32_t2) uv) NBL_CONST_MEMBER_FUNC { - // We don't currently support generating IES images that exploit symmetries or reduced domains, all are full octahederal mappings of a sphere. - // If we did, we'd rely on MIRROR and CLAMP samplers to do some of the work for us while handling the discontinuity due to corner sampling. - const float32_t3 dir = octahedral_t::uvToDir(uv); - const polar_t polar = polar_t::createFromCartesian(dir); - - sampler_t sampler; - const float32_t intensity = sampler.sample(accessor, polar); - - //! blend the IES texture with "flatten" - float32_t blendV = intensity * (1.f - info.flatten); - - const bool inDomain = (info.domainLo <= polar.theta) && (polar.theta <= info.domainHi); - - if ((info.fullDomainFlatten && inDomain) || intensity > 0.0f) - blendV += info.flattenTarget * info.flatten; - - blendV *= info.maxValueRecip; + const float32_t2 halfMinusHalfPixel = float32_t2(0.5f, 0.5f) / (float32_t2(1.f, 1.f) + info.lastTexelRcp); + const float32_t2 ndc = (uv - float32_t2(0.5f, 0.5f)) / halfMinusHalfPixel; + return __evalNDC(accessor, ndc); + } - return blendV; + template + inline float32_t __call(NBL_CONST_REF_ARG(Accessor) accessor, NBL_CONST_REF_ARG(uint32_t2) coord) NBL_CONST_MEMBER_FUNC + { + const float32_t2 ndc = float32_t2(coord) * info.lastTexelRcp * float32_t2(2.f, 2.f) - float32_t2(1.f, 1.f); + return __evalNDC(accessor, ndc); } - static inline float32_t eval(NBL_CONST_REF_ARG(accessor_t) accessor, NBL_CONST_REF_ARG(SInfo) info, NBL_CONST_REF_ARG(uint32_t2) position) + template + inline float32_t __evalNDC(NBL_CONST_REF_ARG(Accessor) accessor, NBL_CONST_REF_ARG(float32_t2) ndc) NBL_CONST_MEMBER_FUNC { - const float32_t2 uv = float32_t2(position) * info.inv; - return eval(accessor, info, uv); + // We don't currently support generating IES images that exploit symmetries or reduced domains, + // all are full octahederal mappings of a sphere. + // If we did, we'd rely on MIRROR and CLAMP samplers to do some of the work for us while handling the discontinuity due to corner sampling. + const float32_t3 dir = octahedral_t::ndcToDir(ndc); + const polar_t polar = polar_t::createFromCartesian(dir); + CandelaSampler _sampler = CandelaSampler::create(info.lastTexelRcp); + const float32_t intensity = _sampler(accessor, polar); + return intensity * info.maxValueRecip; } }; diff --git a/include/nbl/builtin/hlsl/math/octahedral.hlsl b/include/nbl/builtin/hlsl/math/octahedral.hlsl index 45fe35b2d8..5f12fc79c0 100644 --- a/include/nbl/builtin/hlsl/math/octahedral.hlsl +++ b/include/nbl/builtin/hlsl/math/octahedral.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/numbers.hlsl" +#include "nbl/builtin/hlsl/math/functions.hlsl" namespace nbl { @@ -23,49 +24,54 @@ struct OctahedralTransform using vector2_type = vector; using vector3_type = vector; - // F : [0, 1]^2 -> S^2 - static vector3_type uvToDir(NBL_CONST_REF_ARG(vector2_type) uv) + // F : [-1, 1]^2 -> S^2 + static vector3_type ndcToDir(NBL_CONST_REF_ARG(vector2_type) ndc) { - vector3_type p = vector3_type((uv * scalar_type(2) - scalar_type(1)), scalar_type(0)); - const scalar_type a_x = abs(p.x); const scalar_type a_y = abs(p.y); + const vector2_type a = abs(ndc); + vector3_type p = vector3_type(ndc, scalar_type(1) - a.x - a.y); - p.z = scalar_type(1) - a_x - a_y; - - if (p.z < scalar_type(0)) - { - p.x = (p.x < scalar_type(0) ? scalar_type(-1) : scalar_type(1)) * (scalar_type(1) - a_y); - p.y = (p.y < scalar_type(0) ? scalar_type(-1) : scalar_type(1)) * (scalar_type(1) - a_x); - } + if (p.z < scalar_type(0)) + p.xy = __foldToUpperHemisphere(ndc); return hlsl::normalize(p); } + // F : [0, 1]^2 -> S^2 (UV with half-texel handling) + static vector3_type uvToDir(NBL_CONST_REF_ARG(vector2_type) uv, NBL_CONST_REF_ARG(vector2_type) halfMinusHalfPixel) + { + const vector2_type ndc = (uv - vector2_type(scalar_type(0.5), scalar_type(0.5))) / halfMinusHalfPixel; + return ndcToDir(ndc); + } + // F^-1 : S^2 -> [-1, 1]^2 static vector2_type dirToNDC(NBL_CONST_REF_ARG(vector3_type) d) { - vector3_type dir = hlsl::normalize(d); - const scalar_type sum = dot(vector3_type(scalar_type(1), scalar_type(1), scalar_type(1)), abs(dir)); - vector3_type s = dir / sum; + const scalar_type sum = lpNorm(d); + vector3_type s = d / sum; if (s.z < scalar_type(0)) - { - s.x = (s.x < scalar_type(0) ? scalar_type(-1) : scalar_type(1)) * (scalar_type(1) - abs(s.y)); - s.y = (s.y < scalar_type(0) ? scalar_type(-1) : scalar_type(1)) * (scalar_type(1) - abs(s.x)); - } + s.xy = __foldToUpperHemisphere(s.xy); return s.xy; } - // transforms direction vector into UV for corner sampling + // transforms direction vector into UV with half-texel handling // dir in S^2, halfMinusHalfPixel in [0, 0.5)^2, // where halfMinusHalfPixel = 0.5-0.5/texSize - // and texSize.x >= 1, texSize.y >= 1 - // NOTE/TODO: not best place to keep it here - static vector2_type toCornerSampledUV(NBL_CONST_REF_ARG(vector3_type) dir, NBL_CONST_REF_ARG(vector2_type) halfMinusHalfPixel) + static vector2_type dirToUV(NBL_CONST_REF_ARG(vector3_type) dir, NBL_CONST_REF_ARG(vector2_type) halfMinusHalfPixel) { - // note: cornerSampled(NDC*0.5+0.5) = NDC*0.5*(1-1/texSize)+0.5 return dirToNDC(dir) * halfMinusHalfPixel + scalar_type(0.5); } + + static vector2_type __foldToUpperHemisphere(NBL_CONST_REF_ARG(vector2_type) v) + { + // Use copySign instead of sign() to preserve -0 and avoid DXC corner cases. + const vector2_type factor = vector2_type( + ieee754::copySign(scalar_type(1), v.x), + ieee754::copySign(scalar_type(1), v.y)); + const vector2_type swapped = vector2_type(v.y, v.x); + return factor * (vector2_type(scalar_type(1), scalar_type(1)) - abs(swapped)); + } }; } diff --git a/include/nbl/builtin/hlsl/math/polar.hlsl b/include/nbl/builtin/hlsl/math/polar.hlsl index 01a95f61ef..98d37a3978 100644 --- a/include/nbl/builtin/hlsl/math/polar.hlsl +++ b/include/nbl/builtin/hlsl/math/polar.hlsl @@ -22,20 +22,20 @@ struct Polar using vector3_type = vector; // input must be normalized - static Polar createFromCartesian(NBL_CONST_REF_ARG(vector3_type) dir) + static Polar createFromCartesian(const vector3_type dir) { Polar retval; - retval.theta = acos(dir.z); - retval.phi = atan2(dir.y, dir.x); + retval.theta = hlsl::acos(dir.z); + retval.phi = hlsl::atan2(dir.y, dir.x); return retval; } - static vector3_type ToCartesian(NBL_CONST_REF_ARG(scalar_type) theta, NBL_CONST_REF_ARG(scalar_type) phi) + static vector3_type ToCartesian(const scalar_type theta, const scalar_type phi) { return vector( - cos(phi) * cos(theta), - sin(phi) * cos(theta), - sin(theta) + hlsl::cos(phi) * hlsl::cos(theta), + hlsl::sin(phi) * hlsl::cos(theta), + hlsl::sin(theta) ); } diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl index 1ad16dc28d..4c91a13c0d 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ #include +#include namespace nbl { @@ -125,7 +126,7 @@ inline matrix buildCameraLookAtMatrixRH( //! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged template -inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(core::quaternion) quat) +inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(math::quaternion) quat) { static_assert(N == 3 || N == 4); @@ -232,4 +233,4 @@ inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, flo } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/surface_transform.h b/include/nbl/builtin/hlsl/surface_transform.h index 0b93434fe0..39d3011072 100644 --- a/include/nbl/builtin/hlsl/surface_transform.h +++ b/include/nbl/builtin/hlsl/surface_transform.h @@ -4,6 +4,7 @@ #ifndef _NBL_BUILTIN_HLSL_SURFACE_TRANSFORM_INCLUDED_ #define _NBL_BUILTIN_HLSL_SURFACE_TRANSFORM_INCLUDED_ #include +#include #include namespace nbl @@ -64,7 +65,7 @@ inline float32_t2x2 transformMatrix(const FLAG_BITS transform) return float32_t2x2(_nan,_nan,_nan,_nan); } -//! [width,height] might switch to [height, width] in orientations such as 90°CW +//! [width,height] might switch to [height, width] in orientations such as 90�CW //! Usecase: Find out how big the viewport has to be after or before a tranform is applied inline uint16_t2 transformedExtents(const FLAG_BITS transform, const uint16_t2 screenSize) { @@ -93,7 +94,7 @@ inline float transformedAspectRatio(const FLAG_BITS transform, const uint16_t2 s } //! Use this function to apply the INVERSE of swapchain tranformation to the screenspace coordinate `coord` -//! For example when the device orientation is 90°CW then this transforms the point 90°CCW. +//! For example when the device orientation is 90�CW then this transforms the point 90�CCW. //! Usecase = [Gather]: //! Applications such as raytracing in shaders where you would want to generate rays from screen space coordinates. //! Warnings: @@ -174,7 +175,7 @@ inline float32_t2 applyToNDC(const FLAG_BITS transform, const float32_t2 ndc) template TwoColumns applyToDerivatives(const FLAG_BITS transform, TwoColumns dDx_dDy) { - return mul(inverse(transformMatrix(transform)),dDx_dDy); + return mul(inverse(transformMatrix(transform)), dDx_dDy); } } @@ -183,3 +184,4 @@ TwoColumns applyToDerivatives(const FLAG_BITS transform, TwoColumns dDx_dDy) #endif // _NBL_BUILTIN_HLSL_SURFACE_TRANSFORM_INCLUDED_ + diff --git a/include/nbl/ext/ScreenShot/ScreenShot.h b/include/nbl/ext/ScreenShot/ScreenShot.h index 4e71749cd7..64f5e526d1 100644 --- a/include/nbl/ext/ScreenShot/ScreenShot.h +++ b/include/nbl/ext/ScreenShot/ScreenShot.h @@ -27,7 +27,12 @@ inline core::smart_refctd_ptr createScreenShot( const ACCESS_FLAGS accessMask, const IImage::LAYOUT imageLayout) { - assert(bool(logicalDevice->getPhysicalDevice()->getQueueFamilyProperties().begin()[queue->getFamilyIndex()].queueFlags.value & IQueue::FAMILY_FLAGS::TRANSFER_BIT)); + { + const auto queueFlags = logicalDevice->getPhysicalDevice()->getQueueFamilyProperties().begin()[queue->getFamilyIndex()].queueFlags; + const auto required = core::bitflag(IQueue::FAMILY_FLAGS::TRANSFER_BIT) | IQueue::FAMILY_FLAGS::GRAPHICS_BIT | IQueue::FAMILY_FLAGS::COMPUTE_BIT; + if (!queueFlags.hasAnyFlag(required)) + logicalDevice->getLogger()->log("ScreenShot: queue family %u lacks transfer/graphics/compute flags; continuing anyway.", system::ILogger::ELL_WARNING, queue->getFamilyIndex()); + } auto fetchedImageViewParmas = gpuImageView->getCreationParameters(); auto gpuImage = fetchedImageViewParmas.image; @@ -35,12 +40,17 @@ inline core::smart_refctd_ptr createScreenShot( if(!fetchedGpuImageParams.usage.hasFlags(IImage::EUF_TRANSFER_SRC_BIT)) { - assert(false); + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: source image missing TRANSFER_SRC usage.", system::ILogger::ELL_ERROR); return nullptr; } if (isBlockCompressionFormat(fetchedGpuImageParams.format)) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: block-compressed formats are not supported.", system::ILogger::ELL_ERROR); return nullptr; + } core::smart_refctd_ptr gpuTexelBuffer; @@ -48,10 +58,28 @@ inline core::smart_refctd_ptr createScreenShot( { // commandbuffer should refcount the pool, so it should be 100% legal to drop at the end of the scope auto gpuCommandPool = logicalDevice->createCommandPool(queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!gpuCommandPool) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create command pool.", system::ILogger::ELL_ERROR); + return nullptr; + } gpuCommandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &gpuCommandBuffer); - assert(gpuCommandBuffer); + if (!gpuCommandBuffer) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create command buffer.", system::ILogger::ELL_ERROR); + return nullptr; + } + } + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: recording command buffer.", system::ILogger::ELL_INFO); + if (!gpuCommandBuffer->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to begin command buffer.", system::ILogger::ELL_ERROR); + return nullptr; } - gpuCommandBuffer->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); { auto extent = gpuImage->getMipSize(); @@ -68,9 +96,27 @@ inline core::smart_refctd_ptr createScreenShot( bufferCreationParams.size = extent.x*extent.y*extent.z*getTexelOrBlockBytesize(fetchedGpuImageParams.format); bufferCreationParams.usage = IBuffer::EUF_TRANSFER_DST_BIT; gpuTexelBuffer = logicalDevice->createBuffer(std::move(bufferCreationParams)); + if (!gpuTexelBuffer) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create GPU texel buffer.", system::ILogger::ELL_ERROR); + return nullptr; + } auto gpuTexelBufferMemReqs = gpuTexelBuffer->getMemoryReqs(); gpuTexelBufferMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDownStreamingMemoryTypeBits(); + if (!gpuTexelBufferMemReqs.memoryTypeBits) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: no down-streaming memory type for texel buffer.", system::ILogger::ELL_ERROR); + return nullptr; + } auto gpuTexelBufferMem = logicalDevice->allocate(gpuTexelBufferMemReqs, gpuTexelBuffer.get()); + if (!gpuTexelBufferMem.isValid()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to allocate texel buffer memory.", system::ILogger::ELL_ERROR); + return nullptr; + } IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {}; decltype(info)::image_barrier_t barrier = {}; @@ -102,7 +148,12 @@ inline core::smart_refctd_ptr createScreenShot( gpuCommandBuffer->pipelineBarrier(EDF_NONE,info); } } - gpuCommandBuffer->end(); + if (!gpuCommandBuffer->end()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to end command buffer.", system::ILogger::ELL_ERROR); + return nullptr; + } auto signalSemaphore = logicalDevice->createSemaphore(0); @@ -124,22 +175,63 @@ inline core::smart_refctd_ptr createScreenShot( info.waitSemaphores = { &waitSemaphoreInfo, &waitSemaphoreInfo + 1 }; } - queue->submit({ &info, &info + 1}); + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: submitting copy command buffer.", system::ILogger::ELL_INFO); + if (queue->submit({ &info, &info + 1}) != IQueue::RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to submit copy command buffer.", system::ILogger::ELL_ERROR); + return nullptr; + } ISemaphore::SWaitInfo waitInfo{ signalSemaphore.get(), 1u}; + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: waiting for copy completion.", system::ILogger::ELL_INFO); if (logicalDevice->blockForSemaphores({&waitInfo, &waitInfo + 1}) != ISemaphore::WAIT_RESULT::SUCCESS) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to wait for copy completion.", system::ILogger::ELL_ERROR); return nullptr; + } core::smart_refctd_ptr cpuImageView; { const auto gpuTexelBufferSize = gpuTexelBuffer->getSize(); // If you get validation errors from the `invalidateMappedMemoryRanges` we need to expose VK_WHOLE_BUFFER equivalent constant - ILogicalDevice::MappedMemoryRange mappedMemoryRange(gpuTexelBuffer->getBoundMemory().memory,0u,gpuTexelBufferSize); + auto* allocation = gpuTexelBuffer->getBoundMemory().memory; + if (!allocation) + return nullptr; - if (gpuTexelBuffer->getBoundMemory().memory->haveToMakeVisible()) + bool mappedHere = false; + if (!allocation->getMappedPointer()) + { + const IDeviceMemoryAllocation::MemoryRange range = { 0u, gpuTexelBufferSize }; + if (!allocation->map(range, IDeviceMemoryAllocation::EMCAF_READ)) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to map texel buffer memory.", system::ILogger::ELL_ERROR); + return nullptr; + } + mappedHere = true; + } + + ILogicalDevice::MappedMemoryRange mappedMemoryRange(allocation,0u,gpuTexelBufferSize); + if (allocation->haveToMakeVisible()) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: invalidating mapped range.", system::ILogger::ELL_INFO); logicalDevice->invalidateMappedMemoryRanges(1u,&mappedMemoryRange); + } auto cpuNewImage = ICPUImage::create(std::move(fetchedGpuImageParams)); + if (!cpuNewImage) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create CPU image.", system::ILogger::ELL_ERROR); + if (mappedHere) + allocation->unmap(); + return nullptr; + } auto regions = core::make_refctd_dynamic_array>(1u); ICPUImage::SBufferCopy& region = regions->front(); @@ -155,10 +247,22 @@ inline core::smart_refctd_ptr createScreenShot( region.imageExtent = cpuNewImage->getCreationParameters().extent; auto cpuNewTexelBuffer = ICPUBuffer::create({ gpuTexelBufferSize }); + if (!cpuNewTexelBuffer) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: failed to create CPU buffer.", system::ILogger::ELL_ERROR); + if (mappedHere) + allocation->unmap(); + return nullptr; + } + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: copying GPU data to CPU buffer.", system::ILogger::ELL_INFO); { - memcpy(cpuNewTexelBuffer->getPointer(), gpuTexelBuffer->getBoundMemory().memory->getMappedPointer(), gpuTexelBuffer->getSize()); + memcpy(cpuNewTexelBuffer->getPointer(), allocation->getMappedPointer(), gpuTexelBuffer->getSize()); } cpuNewImage->setBufferAndRegions(core::smart_refctd_ptr(cpuNewTexelBuffer), regions); + if (mappedHere) + allocation->unmap(); { auto newCreationParams = cpuNewImage->getCreationParameters(); @@ -190,6 +294,12 @@ inline bool createScreenShot( { assert(outFile->getFlags()&system::IFile::ECF_WRITE); auto cpuImageView = createScreenShot(logicalDevice,queue,semaphore,gpuImageView,accessMask,imageLayout); + if (!cpuImageView) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: GPU readback failed, no image to write.", system::ILogger::ELL_ERROR); + return false; + } IAssetWriter::SAssetWriteParams writeParams(cpuImageView.get()); return assetManager->writeAsset(outFile,writeParams); } @@ -205,6 +315,12 @@ inline bool createScreenShot( const ACCESS_FLAGS accessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS) { auto cpuImageView = createScreenShot(logicalDevice,queue,semaphore,gpuImageView,accessMask,imageLayout); + if (!cpuImageView) + { + if (auto* logger = logicalDevice->getLogger()) + logger->log("ScreenShot: GPU readback failed, no image to write.", system::ILogger::ELL_ERROR); + return false; + } IAssetWriter::SAssetWriteParams writeParams(cpuImageView.get()); return assetManager->writeAsset(filename.string(),writeParams); // TODO: Use std::filesystem::path } @@ -212,4 +328,4 @@ inline bool createScreenShot( } // namespace nbl::ext::ScreenShot -#endif \ No newline at end of file +#endif diff --git a/src/nbl/asset/interchange/CIESProfileLoader.cpp b/src/nbl/asset/interchange/CIESProfileLoader.cpp index a78c9af0a2..1bf60be905 100644 --- a/src/nbl/asset/interchange/CIESProfileLoader.cpp +++ b/src/nbl/asset/interchange/CIESProfileLoader.cpp @@ -15,11 +15,10 @@ bool CIESProfileLoader::isALoadableFileFormat(system::IFile* _file, const system for (const auto& it : CIESProfileParser::VALID_SIGNATURES) if (versionBuffer.find(it.data()) != std::string::npos) return true; - - logger.log("%s: Invalid IES signature for \"%s\" file!", system::ILogger::ELL_ERROR, __FUNCTION__, fName); + logger.log("%s: Invalid IES signature for \"%s\" file!", system::ILogger::ELL_DEBUG, __FUNCTION__, fName); } else - logger.log("%s: Failed to read \"%s\" file!", system::ILogger::ELL_ERROR, __FUNCTION__, fName); + logger.log("%s: Failed to read \"%s\" file!", system::ILogger::ELL_DEBUG, __FUNCTION__, fName); return false; } @@ -61,7 +60,7 @@ asset::SAssetBundle CIESProfileLoader::loadAsset(system::IFile* _file, const ass else { const auto optimalResolution = profile.getAccessor().properties.optimalIESResolution; - cpuImageView = profile.createIESTexture(0.f, false, optimalResolution.x, optimalResolution.y); + cpuImageView = profile.createIESTexture(optimalResolution); } return asset::SAssetBundle(std::move(meta), { core::smart_refctd_ptr(cpuImageView) }); diff --git a/src/nbl/asset/interchange/CIESProfileLoader.h b/src/nbl/asset/interchange/CIESProfileLoader.h index 809b1840da..ba89915278 100644 --- a/src/nbl/asset/interchange/CIESProfileLoader.h +++ b/src/nbl/asset/interchange/CIESProfileLoader.h @@ -9,8 +9,6 @@ #include "nbl/asset/IAssetManager.h" #include "nbl/asset/interchange/IAssetLoader.h" -#if 0 // TODO: Arek - #include "nbl/asset/utils/CIESProfileParser.h" // TODO: move to `src/asset/interchange` #include "nbl/asset/metadata/CIESProfileMetadata.h" @@ -46,6 +44,5 @@ class CIESProfileLoader final : public asset::IAssetLoader asset::SAssetBundle loadAsset(system::IFile* _file, const asset::IAssetLoader::SAssetLoadParams& _params, asset::IAssetLoader::IAssetLoaderOverride* _override = nullptr, uint32_t _hierarchyLevel = 0u) override; }; } // namespace nbl::asset -#endif // end TODO: Arek #endif // __NBL_ASSET_C_IES_PROFILE_LOADER_H_INCLUDED__ diff --git a/src/nbl/asset/utils/CIESProfile.cpp b/src/nbl/asset/utils/CIESProfile.cpp index d4289191f6..24f1427481 100644 --- a/src/nbl/asset/utils/CIESProfile.cpp +++ b/src/nbl/asset/utils/CIESProfile.cpp @@ -7,20 +7,29 @@ using namespace nbl; using namespace asset; template -core::smart_refctd_ptr CIESProfile::createIESTexture(ExecutionPolicy&& policy, const float flatten, const bool fullDomainFlatten, uint32_t width, uint32_t height) const +core::smart_refctd_ptr CIESProfile::createIESTexture(ExecutionPolicy&& policy, hlsl::uint32_t2 resolution) const { - const bool inFlattenDomain = flatten >= 0.0 && flatten <= 1.0; // [0, 1] range for blend equation, 1 is normally invalid but we use it to for special implied domain flatten mode - assert(inFlattenDomain); + uint32_t width = resolution.x; + uint32_t height = resolution.y; - if (width > properties_t::CDC_MAX_TEXTURE_WIDTH) - width = properties_t::CDC_MAX_TEXTURE_WIDTH; + if (width > texture_t::MaxTextureWidth) + width = texture_t::MaxTextureWidth; - if (height > properties_t::CDC_MAX_TEXTURE_HEIGHT) - height = properties_t::CDC_MAX_TEXTURE_HEIGHT; + if (height > texture_t::MaxTextureHeight) + height = texture_t::MaxTextureHeight; - // TODO: If no symmetry (no folding in half and abuse of mirror sampler) make dimensions odd-sized so middle texel taps the south pole - width = core::max(width,properties_t::CDC_MIN_TEXTURE_WIDTH); - height = core::max(height,properties_t::CDC_MIN_TEXTURE_HEIGHT); + width = core::max(width, texture_t::MinTextureWidth); + height = core::max(height, texture_t::MinTextureHeight); + + auto makeOdd = [](uint32_t value, const uint32_t maxValue) -> uint32_t + { + if (value & 1u) + return value; + return (value < maxValue) ? (value + 1u) : (value - 1u); + }; + // TODO: remove this once we exploit symmetries and fold the domain. + width = makeOdd(width, texture_t::MaxTextureWidth); + height = makeOdd(height, texture_t::MaxTextureHeight); asset::ICPUImage::SCreationParams imgInfo; imgInfo.type = asset::ICPUImage::ET_2D; @@ -65,11 +74,11 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu state.outRange.extent = creationParams.extent; const IImageFilter::IState::ColorValue::WriteMemoryInfo wInfo(creationParams.format, outImg->getBuffer()->getPointer()); - const auto tInfo = texture_t::createInfo(accessor, hlsl::uint32_t2(width, height), flatten, fullDomainFlatten); + const auto texture = texture_t::create(accessor.properties.maxCandelaValue, hlsl::uint32_t2(width, height)); auto fill = [&](uint32_t blockArrayOffset, core::vectorSIMDu32 position) -> void { - auto texel = texture_t::eval(accessor, tInfo, hlsl::uint32_t2(position.x, position.y)); + const auto texel = texture.__call(accessor, hlsl::uint32_t2(position.x, position.y)); asset::IImageFilter::IState::ColorValue color; constexpr float UI16_MAX_D = static_cast(std::numeric_limits::max()); @@ -97,11 +106,11 @@ core::smart_refctd_ptr CIESProfile::createIESTexture(Execu } //! Explicit instantiations -template core::smart_refctd_ptr CIESProfile::createIESTexture(const std::execution::sequenced_policy&, const float, const bool, uint32_t, uint32_t) const; -template core::smart_refctd_ptr CIESProfile::createIESTexture(const std::execution::parallel_policy&, const float, const bool, uint32_t, uint32_t) const; -template core::smart_refctd_ptr CIESProfile::createIESTexture(const std::execution::parallel_unsequenced_policy&, const float, const bool, uint32_t, uint32_t) const; +template core::smart_refctd_ptr CIESProfile::createIESTexture(const std::execution::sequenced_policy&, hlsl::uint32_t2) const; +template core::smart_refctd_ptr CIESProfile::createIESTexture(const std::execution::parallel_policy&, hlsl::uint32_t2) const; +template core::smart_refctd_ptr CIESProfile::createIESTexture(const std::execution::parallel_unsequenced_policy&, hlsl::uint32_t2) const; -core::smart_refctd_ptr CIESProfile::createIESTexture(const float flatten, const bool fullDomainFlatten, uint32_t width, uint32_t height) const +core::smart_refctd_ptr CIESProfile::createIESTexture(hlsl::uint32_t2 resolution) const { - return createIESTexture(std::execution::seq, flatten, fullDomainFlatten, width, height); -} \ No newline at end of file + return createIESTexture(std::execution::seq, resolution); +} diff --git a/src/nbl/asset/utils/CIESProfile.h b/src/nbl/asset/utils/CIESProfile.h index 2a063e7b15..f84f2753e5 100644 --- a/src/nbl/asset/utils/CIESProfile.h +++ b/src/nbl/asset/utils/CIESProfile.h @@ -6,7 +6,8 @@ #define __NBL_ASSET_C_IES_PROFILE_H_INCLUDED__ #include "nbl/asset/metadata/CIESProfileMetadata.h" -#include "nbl/builtin/hlsl/ies/texture.hlsl" +#include "nbl/builtin/hlsl/ies/profile.hlsl" +namespace nbl { namespace hlsl { namespace ies { struct SProceduralTexture; } } } namespace nbl { @@ -27,42 +28,41 @@ class CIESProfile struct accessor_t { - using key_t = uint32_t; - using key_t2 = hlsl::uint32_t2; - using value_t = hlsl::float32_t; + using angle_t = hlsl::float32_t; + using candela_t = hlsl::float32_t; accessor_t() = default; - accessor_t(const key_t2& resolution, const properties_t& props) : hAngles(resolution.x), vAngles(resolution.y), data(resolution.x * resolution.y), properties(props) {} + accessor_t(const hlsl::uint32_t2& resolution, const properties_t& props) : hAngles(resolution.x), vAngles(resolution.y), data(resolution.x * resolution.y), properties(props) {} ~accessor_t() = default; - template) - inline value_t vAngle(T j) const { return (value_t)vAngles[j]; } - - template) - inline value_t hAngle(T i) const { return (value_t)hAngles[i]; } - - template) - inline value_t value(T ij) const { return (value_t)data[vAnglesCount() * ij.x + ij.y]; } - - template) - inline void setValue(T ij, value_t val) { data[vAnglesCount() * ij.x + ij.y] = val; } + inline angle_t vAngle(const uint32_t idx) const { return vAngles[idx]; } + inline angle_t hAngle(const uint32_t idx) const { return hAngles[idx]; } + inline uint32_t vAnglesCount() const { return static_cast(vAngles.size()); } + inline uint32_t hAnglesCount() const { return static_cast(hAngles.size()); } + inline candela_t value(hlsl::uint32_t2 ij) const { const uint32_t vCount = static_cast(vAngles.size()); return data[vCount * ij.x + ij.y]; } + inline void setValue(hlsl::uint32_t2 ij, candela_t val) { const uint32_t vCount = static_cast(vAngles.size()); data[vCount * ij.x + ij.y] = val; } - inline key_t vAnglesCount() const { return (key_t)vAngles.size(); } - inline key_t hAnglesCount() const { return (key_t)hAngles.size(); } - inline const properties_t::base_t& getProperties() const { return static_cast(properties); } + inline properties_t::base_t getProperties() const { return properties; } - core::vector hAngles; //! The angular displacement indegreesfrom straight down, a value represents spherical coordinate "theta" with physics convention. Note that if symmetry is OTHER_HALF_SYMMETRIC then real horizontal angle provided by IES data is (hAngles[index] + 90) - the reason behind it is we patch 1995 IES OTHER_HALF_SYMETRIC case to be HALF_SYMETRIC - core::vector vAngles; //! Measurements in degrees of angular displacement measured counterclockwise in a horizontal plane for Type C photometry and clockwise for Type A and B photometry, a value represents spherical coordinate "phi" with physics convention - core::vector data; //! Candela scalar values + core::vector hAngles; //! The angular displacement indegreesfrom straight down, a value represents spherical coordinate "theta" with physics convention. Note that if symmetry is OTHER_HALF_SYMMETRIC then real horizontal angle provided by IES data is (hAngles[index] + 90) - the reason behind it is we patch 1995 IES OTHER_HALF_SYMETRIC case to be HALF_SYMETRIC + core::vector vAngles; //! Measurements in degrees of angular displacement measured counterclockwise in a horizontal plane for Type C photometry and clockwise for Type A and B photometry, a value represents spherical coordinate "phi" with physics convention + core::vector data; //! Candela scalar values properties_t properties; //! Profile properties }; - using texture_t = nbl::hlsl::ies::Texture; + using texture_t = nbl::hlsl::ies::SProceduralTexture; inline const accessor_t& getAccessor() const { return accessor; } + inline float getMaxCandelaValue() const { return accessor.properties.maxCandelaValue; } + inline hlsl::uint32_t2 getOptimalIESResolution() const { return accessor.properties.optimalIESResolution; } + inline float getAvgEmmision(const bool fullDomain) const { return fullDomain ? accessor.properties.fullDomainAvgEmission : accessor.properties.avgEmmision; } template - core::smart_refctd_ptr createIESTexture(ExecutionPolicy&& policy, const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = properties_t::CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = properties_t::CDC_DEFAULT_TEXTURE_HEIGHT) const; - core::smart_refctd_ptr createIESTexture(const float flatten = 0.0, const bool fullDomainFlatten=false, uint32_t width = properties_t::CDC_DEFAULT_TEXTURE_WIDTH, uint32_t height = properties_t::CDC_DEFAULT_TEXTURE_HEIGHT) const; + core::smart_refctd_ptr createIESTexture(ExecutionPolicy&& policy, hlsl::uint32_t2 resolution) const; + core::smart_refctd_ptr createIESTexture(hlsl::uint32_t2 resolution) const; + + template + inline core::smart_refctd_ptr createIESTexture(ExecutionPolicy&& policy) const { const auto res = getOptimalIESResolution(); return createIESTexture(policy, res); } + inline core::smart_refctd_ptr createIESTexture() const { const auto res = getOptimalIESResolution(); return createIESTexture(res); } private: CIESProfile(const properties_t& props, const hlsl::uint32_t2& resolution) : accessor(resolution, props) {} @@ -72,4 +72,6 @@ class CIESProfile } } -#endif // __NBL_ASSET_C_IES_PROFILE_H_INCLUDED__ \ No newline at end of file +#include "nbl/builtin/hlsl/ies/texture.hlsl" + +#endif // __NBL_ASSET_C_IES_PROFILE_H_INCLUDED__ diff --git a/src/nbl/asset/utils/CIESProfileParser.cpp b/src/nbl/asset/utils/CIESProfileParser.cpp index 6d9bf1ea32..c86fff9f7b 100644 --- a/src/nbl/asset/utils/CIESProfileParser.cpp +++ b/src/nbl/asset/utils/CIESProfileParser.cpp @@ -112,9 +112,12 @@ bool CIESProfileParser::parse(CIESProfile& result) if (vSize < 2) return false; + using angle_t = CIESProfile::accessor_t::angle_t; + using candela_t = CIESProfile::accessor_t::candela_t; + auto& vAngles = result.accessor.vAngles; for (int i = 0; i < vSize; i++) { - vAngles[i] = static_cast(getDouble("vertical angle truncated")); + vAngles[i] = static_cast(getDouble("vertical angle truncated")); } if (!std::is_sorted(vAngles.begin(), vAngles.end())) { errorMsg = "Vertical angles should be sorted"; @@ -131,7 +134,7 @@ bool CIESProfileParser::parse(CIESProfile& result) auto& hAngles = result.accessor.hAngles; for (int i = 0; i < hSize; i++) { - hAngles[i] = static_cast(getDouble("horizontal angle truncated")); + hAngles[i] = static_cast(getDouble("horizontal angle truncated")); if (i != 0 && hAngles[i - 1] > hAngles[i]) return false; // Angles should be sorted } @@ -175,7 +178,7 @@ bool CIESProfileParser::parse(CIESProfile& result) const double factor = ballastFactor * candelaMultiplier; for (int i = 0; i < hSize; i++) for (int j = 0; j < vSize; j++) - result.accessor.setValue(hlsl::uint32_t2(i, j), static_cast(factor * getDouble("intensity value truncated"))); + result.accessor.setValue(hlsl::uint32_t2(i, j), static_cast(factor * getDouble("intensity value truncated"))); } float totalEmissionIntegral = 0.0, nonZeroEmissionDomainSize = 0.0; @@ -227,8 +230,8 @@ bool CIESProfileParser::parse(CIESProfile& result) const uint32_t maxDimMeasureSize = core::sqrt(FULL_SOLID_ANGLE/smallestRangeSolidAngle); result.accessor.properties.optimalIESResolution = decltype(result.accessor.properties.optimalIESResolution){ maxDimMeasureSize, maxDimMeasureSize }; auto& res = result.accessor.properties.optimalIESResolution *= 2u; // safe bias for our bilinear interpolation to work nicely and increase resolution of a profile - res.x = core::max(res.x,CIESProfile::properties_t::CDC_MIN_TEXTURE_WIDTH); - res.y = core::max(res.y,CIESProfile::properties_t::CDC_MIN_TEXTURE_HEIGHT); + res.x = core::max(res.x, CIESProfile::texture_t::MinTextureWidth); + res.y = core::max(res.y, CIESProfile::texture_t::MinTextureHeight); } assert(nonZeroEmissionDomainSize >= 0.f); @@ -246,4 +249,4 @@ bool CIESProfileParser::parse(CIESProfile& result) } return !error; -} \ No newline at end of file +} diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 665f3134f8..2ce9b8357a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -232,6 +232,10 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/polar.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/angle_adding.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/octahedral.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternions.hlsl") +# ies +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ies/profile.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ies/sampler.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ies/texture.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") diff --git a/src/nbl/ext/ImGui/ImGui.cpp b/src/nbl/ext/ImGui/ImGui.cpp index e449484bdc..a0de287559 100644 --- a/src/nbl/ext/ImGui/ImGui.cpp +++ b/src/nbl/ext/ImGui/ImGui.cpp @@ -978,26 +978,48 @@ bool UI::createMDIBuffer(SCreationParameters& creationParams) return flags; }; + auto* device = creationParams.utilities->getLogicalDevice(); + const auto* physDev = device->getPhysicalDevice(); + const auto upStreamingBits = physDev->getUpStreamingMemoryTypeBits(); + const auto hostVisibleBits = physDev->getHostVisibleMemoryTypeBits(); + bool usedFallback = false; + if (!creationParams.streamingBuffer) { IGPUBuffer::SCreationParams mdiCreationParams = {}; mdiCreationParams.usage = SCachedCreationParams::RequiredUsageFlags; mdiCreationParams.size = mdiBufferDefaultSize; - auto buffer = creationParams.utilities->getLogicalDevice()->createBuffer(std::move(mdiCreationParams)); + auto buffer = device->createBuffer(std::move(mdiCreationParams)); buffer->setObjectDebugName("MDI Upstream Buffer"); - auto memoryReqs = buffer->getMemoryReqs(); - memoryReqs.memoryTypeBits &= creationParams.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); + const auto baseReqs = buffer->getMemoryReqs(); + + auto tryAllocate = [&](uint32_t typeBits)->IDeviceMemoryAllocator::SAllocation + { + auto reqs = baseReqs; + reqs.memoryTypeBits &= typeBits; + if (!reqs.memoryTypeBits) + return {}; + return device->allocate(reqs,buffer.get(),SCachedCreationParams::RequiredAllocateFlags); + }; - auto allocation = creationParams.utilities->getLogicalDevice()->allocate(memoryReqs,buffer.get(),SCachedCreationParams::RequiredAllocateFlags); + auto allocation = tryAllocate(upStreamingBits); + if (!allocation.isValid()) { - const bool allocated = allocation.isValid(); - assert(allocated); + allocation = tryAllocate(hostVisibleBits); + usedFallback = allocation.isValid(); + if (usedFallback) + creationParams.utilities->getLogger()->log("ImGui MDI buffer: up-streaming allocation failed, falling back to host-visible memory.", ILogger::ELL_WARNING); + } + if (!allocation.isValid()) + { + creationParams.utilities->getLogger()->log("ImGui MDI buffer: failed to allocate device memory!", ILogger::ELL_ERROR); + return false; } auto memory = allocation.memory; - if (!memory->map({ 0ull, memoryReqs.size }, getRequiredAccessFlags(memory->getMemoryPropertyFlags()))) + if (!memory->map({ 0ull, baseReqs.size }, getRequiredAccessFlags(memory->getMemoryPropertyFlags()))) creationParams.utilities->getLogger()->log("Could not map device memory!", ILogger::ELL_ERROR); creationParams.streamingBuffer = make_smart_refctd_ptr(SBufferRange{0ull,mdiCreationParams.size,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize); @@ -1009,7 +1031,7 @@ bool UI::createMDIBuffer(SCreationParameters& creationParams) const auto validation = std::to_array ({ std::make_pair(buffer->getCreationParams().usage.hasFlags(SCachedCreationParams::RequiredUsageFlags), "MDI buffer must be created with IBuffer::EUF_INDIRECT_BUFFER_BIT | IBuffer::EUF_INDEX_BUFFER_BIT | IBuffer::EUF_VERTEX_BUFFER_BIT | IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT enabled!"), - std::make_pair(bool(buffer->getMemoryReqs().memoryTypeBits & creationParams.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits()), "MDI buffer must have up-streaming memory type bits enabled!"), + std::make_pair(bool(buffer->getMemoryReqs().memoryTypeBits & (usedFallback ? hostVisibleBits : upStreamingBits)), "MDI buffer must have suitable host-visible memory type bits enabled!"), std::make_pair(binding.memory->getAllocateFlags().hasFlags(SCachedCreationParams::RequiredAllocateFlags), "MDI buffer's memory must be allocated with IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT enabled!"), std::make_pair(binding.memory->isCurrentlyMapped(), "MDI buffer's memory must be mapped!"), // streaming buffer contructor already validates it, but cannot assume user won't unmap its own buffer for some reason (sorry if you have just hit it) std::make_pair(binding.memory->getCurrentMappingAccess().hasFlags(getRequiredAccessFlags(binding.memory->getMemoryPropertyFlags())), "MDI buffer's memory current mapping access flags don't meet requirements!") @@ -1496,4 +1518,4 @@ void UI::setContext(void* imguiContext) { ImGui::SetCurrentContext(reinterpret_cast(imguiContext)); } -} \ No newline at end of file +} diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.cpp index a3c1c0949a..45a2592445 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.cpp @@ -119,24 +119,9 @@ CMitsubaMaterialCompilerFrontend::EmitterNode* CMitsubaMaterialCompilerFrontend: profile.view = core::normalize(worldSpaceIESTransform[2]); } - float flatten = inProfile->flatten; - - // negative means full domain - const bool fullDomain = flatten < 0.f; - if (fullDomain) - flatten = -flatten; - - if (flatten > 1.f) - { - flatten = 1.f; - os::Printer::log("ERROR: Flatten property = " + std::to_string(inProfile->flatten) + " is outside it's [0, 1] domain, clamping!", ELL_ERROR); - } - else if (inProfile->flatten < std::numeric_limits::epsilon()-1) - os::Printer::log("WARNING: Full domain flatten mode detected with abs(flatten) = " + std::to_string(flatten), ELL_WARNING); - - core::smart_refctd_ptr flattenIES = nullptr; + core::smart_refctd_ptr iesTexture = nullptr; { - const auto cacheName = inProfile->filename + "?flatten=" + std::to_string(flatten); + const auto cacheName = inProfile->filename + "?ies"; // try cache { @@ -144,26 +129,26 @@ CMitsubaMaterialCompilerFrontend::EmitterNode* CMitsubaMaterialCompilerFrontend: asset::SAssetBundle bundle = m_loaderContext->override_->findCachedAsset(cacheName,types,m_loaderContext->inner,0u); auto contents = bundle.getContents(); if (!contents.empty() && bundle.getAssetType() == asset::IAsset::ET_IMAGE_VIEW) - flattenIES = core::smart_refctd_ptr_static_cast(*contents.begin()); + iesTexture = core::smart_refctd_ptr_static_cast(*contents.begin()); } // failed to find, have to create - if (!flattenIES) + if (!iesTexture) { const auto optimalResolution = meta->profile.getOptimalIESResolution(); - flattenIES = meta->profile.createIESTexture(flatten, fullDomain, optimalResolution.x, optimalResolution.y); + iesTexture = meta->profile.createIESTexture(optimalResolution); } // now must be loaded to proceed - if (!flattenIES) + if (!iesTexture) return false; // insert into cache asset::IAssetLoader::SAssetLoadContext ctx = { {}, nullptr }; - asset::SAssetBundle bundle = asset::SAssetBundle(nullptr, { core::smart_refctd_ptr(flattenIES) }); + asset::SAssetBundle bundle = asset::SAssetBundle(nullptr, { core::smart_refctd_ptr(iesTexture) }); m_loaderContext->override_->insertAssetIntoCache(bundle, cacheName, m_loaderContext->inner, 0u); } - profile.texture = { flattenIES, sampler, 1.f }; + profile.texture = { iesTexture, sampler, 1.f }; // success res->emissionProfile = profile; @@ -174,14 +159,11 @@ CMitsubaMaterialCompilerFrontend::EmitterNode* CMitsubaMaterialCompilerFrontend: { case CElementEmissionProfile::EN_UNIT_MAX: { - // true Max value changes because of flatten - // can be reverted back to do nothing if the TODO about adjusting max-value while flattening gets done - res->intensity *= maxIntesity/(maxIntesity+(meta->profile.getAvgEmmision(fullDomain)-maxIntesity)*flatten); + // already normalized to max } break; case CElementEmissionProfile::EN_UNIT_AVERAGE_OVER_IMPLIED_DOMAIN: { - // because negative flatten (`!fullDomain`) expands the domain so implied==full - res->intensity *= maxIntesity / meta->profile.getAvgEmmision(fullDomain); + res->intensity *= maxIntesity / meta->profile.getAvgEmmision(false); } break; case CElementEmissionProfile::EN_UNIT_AVERAGE_OVER_FULL_DOMAIN: { @@ -681,4 +663,4 @@ auto CMitsubaMaterialCompilerFrontend::compileToIRTree(asset::material_compiler: return { frontroot, backroot }; } -} \ No newline at end of file +} diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 69346a4049..d7f2d7dbbc 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -2480,7 +2480,7 @@ class MetaDeviceMemoryAllocator final // bind everything for (auto i=0; i CAssetConverter::convert_impl(SReserveResul } #endif } -} \ No newline at end of file +} From 4c0da882416638c2f646b819ce246ef503953b54 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 31 Jan 2026 15:23:51 +0100 Subject: [PATCH 461/472] adjust to comments --- include/nbl/builtin/hlsl/surface_transform.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/surface_transform.h b/include/nbl/builtin/hlsl/surface_transform.h index 39d3011072..475f0a6163 100644 --- a/include/nbl/builtin/hlsl/surface_transform.h +++ b/include/nbl/builtin/hlsl/surface_transform.h @@ -65,7 +65,7 @@ inline float32_t2x2 transformMatrix(const FLAG_BITS transform) return float32_t2x2(_nan,_nan,_nan,_nan); } -//! [width,height] might switch to [height, width] in orientations such as 90�CW +//! [width,height] might switch to [height, width] in orientations such as 90° CW //! Usecase: Find out how big the viewport has to be after or before a tranform is applied inline uint16_t2 transformedExtents(const FLAG_BITS transform, const uint16_t2 screenSize) { @@ -94,7 +94,7 @@ inline float transformedAspectRatio(const FLAG_BITS transform, const uint16_t2 s } //! Use this function to apply the INVERSE of swapchain tranformation to the screenspace coordinate `coord` -//! For example when the device orientation is 90�CW then this transforms the point 90�CCW. +//! For example when the device orientation is 90° CW then this transforms the point 90° CCW. //! Usecase = [Gather]: //! Applications such as raytracing in shaders where you would want to generate rays from screen space coordinates. //! Warnings: From e895a3fa803a7d8c02560482cb6172e6b474f293 Mon Sep 17 00:00:00 2001 From: devsh Date: Sat, 31 Jan 2026 17:15:42 +0100 Subject: [PATCH 462/472] post merge submodule update --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 11e54fd525..779ce1fe46 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 11e54fd525cf45f96c8e43869ddf1d15f3f18f9b +Subproject commit 779ce1fe4630135f82c1bcee1c57e3a37c3fa5cf From 73f8f5a43c0e3d57aabf71e89824c23dd6f5bc4e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 1 Feb 2026 20:40:18 +0100 Subject: [PATCH 463/472] cleanup --- examples_tests | 2 +- include/nbl/asset/utils/CQuantQuaternionCache.h | 3 ++- .../transformation_matrix_utils.hlsl => matrix_utils.hlsl} | 7 +++---- .../linalg}/transformation_matrix_utils.hlsl | 6 +++--- include/nbl/builtin/hlsl/math/quaternions.hlsl | 2 +- .../linalg => matrix_utils}/matrix_runtime_traits.hlsl | 6 +++--- include/nbl/core/math/plane3dSIMD.h | 3 +-- include/nbl/ext/MitsubaLoader/CElementShape.h | 5 ++--- src/nbl/builtin/CMakeLists.txt | 5 +++-- 9 files changed, 19 insertions(+), 20 deletions(-) rename include/nbl/builtin/hlsl/math/linalg/{matrix_utils/transformation_matrix_utils.hlsl => matrix_utils.hlsl} (89%) rename include/nbl/builtin/hlsl/{matrix_utils => math/linalg}/transformation_matrix_utils.hlsl (97%) rename include/nbl/builtin/hlsl/{math/linalg => matrix_utils}/matrix_runtime_traits.hlsl (90%) diff --git a/examples_tests b/examples_tests index 779ce1fe46..b784970abd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 779ce1fe4630135f82c1bcee1c57e3a37c3fa5cf +Subproject commit b784970abd76f1feabb76902ec922e9edf37ef0c diff --git a/include/nbl/asset/utils/CQuantQuaternionCache.h b/include/nbl/asset/utils/CQuantQuaternionCache.h index dc8d18545a..c4b13e9c38 100644 --- a/include/nbl/asset/utils/CQuantQuaternionCache.h +++ b/include/nbl/asset/utils/CQuantQuaternionCache.h @@ -7,6 +7,7 @@ #include "nbl/asset/utils/CDirQuantCacheBase.h" +#include "nbl/builtin/hlsl/math/quaternions.hlsl" namespace nbl @@ -68,4 +69,4 @@ class CQuantQuaternionCache : public CDirQuantCacheBase +#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_UTILS_INCLUDED_ // TODO: remove this header when deleting vectorSIMDf.hlsl #ifndef __HLSL_VERSION #include @@ -94,4 +93,4 @@ namespace impl } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/math/linalg/transformation_matrix_utils.hlsl similarity index 97% rename from include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl rename to include/nbl/builtin/hlsl/math/linalg/transformation_matrix_utils.hlsl index dc2cdf3f02..92666d3b28 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/transformation_matrix_utils.hlsl @@ -1,5 +1,5 @@ -#ifndef _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ -#define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_LINALG_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ #include #include @@ -219,4 +219,4 @@ inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, flo } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index 25fa61162d..c510dea477 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -6,7 +6,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/tgmath.hlsl" -#include "nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl" +#include "nbl/builtin/hlsl/matrix_utils/matrix_runtime_traits.hlsl" namespace nbl { diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl b/include/nbl/builtin/hlsl/matrix_utils/matrix_runtime_traits.hlsl similarity index 90% rename from include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl rename to include/nbl/builtin/hlsl/matrix_utils/matrix_runtime_traits.hlsl index 3ed2f549c9..4a7a0c2df3 100644 --- a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/matrix_runtime_traits.hlsl @@ -1,8 +1,8 @@ // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_RUNTIME_TRAITS_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_RUNTIME_TRAITS_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_MATRIX_UTILS_MATRIX_RUNTIME_TRAITS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATRIX_UTILS_MATRIX_RUNTIME_TRAITS_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/tgmath.hlsl" @@ -56,7 +56,7 @@ struct RuntimeTraits bool invertible; bool orthogonal; - scalar_t uniformScaleSq; // TODO: rename to `uniformColumnSqNorm` and move this whole header to `nbl/builtin/hlsl/matrix_utils/` and associated namespace + scalar_t uniformScaleSq; // TODO: rename to `uniformColumnSqNorm` bool orthonormal; }; diff --git a/include/nbl/core/math/plane3dSIMD.h b/include/nbl/core/math/plane3dSIMD.h index edad0a1287..25451553c1 100644 --- a/include/nbl/core/math/plane3dSIMD.h +++ b/include/nbl/core/math/plane3dSIMD.h @@ -4,8 +4,7 @@ // See the original file in irrlicht source for authors #include "vectorSIMD.h" -#include -#include +#include #ifndef __NBL_CORE_PLANE_3D_H_INCLUDED__ #define __NBL_CORE_PLANE_3D_H_INCLUDED__ diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index db0ca020e2..862fbe159e 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -10,8 +10,7 @@ #include "nbl/ext/MitsubaLoader/CElementBSDF.h" #include "nbl/ext/MitsubaLoader/CElementEmitter.h" -// awful path -#include "nbl/builtin/hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl" +#include "nbl/builtin/hlsl/math/linalg/matrix_utils.hlsl" namespace nbl::ext::MitsubaLoader @@ -295,4 +294,4 @@ class CElementShape final : public IElement } -#endif \ No newline at end of file +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 9a439987b5..a2d27b8529 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -161,6 +161,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754/impl.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/array_accessors.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/vector_utils/vector_traits.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/matrix_traits.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/matrix_runtime_traits.hlsl") #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/fragment_shader_pixel_interlock.hlsl") @@ -222,8 +223,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format.hlsl") #linear algebra LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transform.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/matrix_utils/transformation_matrix_utils.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/matrix_runtime_traits.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/matrix_utils.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transformation_matrix_utils.hlsl") # TODO: rename `equations` to `polynomials` probably LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") From 0270548f009dc163b008bb68668df877fca921a8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 1 Feb 2026 21:03:34 +0100 Subject: [PATCH 464/472] cleanup --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 10 +++++----- .../hlsl/matrix_utils/matrix_runtime_traits.hlsl | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index c510dea477..49a8f95d22 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -102,12 +102,12 @@ struct quaternion static this_t create(NBL_CONST_REF_ARG(matrix_type) _m, const bool dontAssertValidMatrix=false) { - scalar_type uniformScaleSq; + scalar_type uniformColumnSqNorm; { // only orthogonal and uniform scale mats can be converted linalg::RuntimeTraits traits = linalg::RuntimeTraits::create(_m); - bool valid = traits.orthogonal && !hlsl::isnan(traits.uniformScaleSq); - uniformScaleSq = traits.uniformScaleSq; + bool valid = traits.orthogonal && !hlsl::isnan(traits.uniformColumnSqNorm); + uniformColumnSqNorm = traits.uniformColumnSqNorm; if (dontAssertValidMatrix) { @@ -121,14 +121,14 @@ struct quaternion else assert(valid); } - if (uniformScaleSq < numeric_limits::min) + if (uniformColumnSqNorm < numeric_limits::min) { this_t retval; retval.data = hlsl::promote(bit_cast(numeric_limits::quiet_NaN)); return retval; } - const scalar_type uniformScale = hlsl::sqrt(uniformScaleSq); + const scalar_type uniformScale = hlsl::sqrt(uniformColumnSqNorm); matrix_type m = _m; m /= uniformScale; diff --git a/include/nbl/builtin/hlsl/matrix_utils/matrix_runtime_traits.hlsl b/include/nbl/builtin/hlsl/matrix_utils/matrix_runtime_traits.hlsl index 4a7a0c2df3..02f28dafde 100644 --- a/include/nbl/builtin/hlsl/matrix_utils/matrix_runtime_traits.hlsl +++ b/include/nbl/builtin/hlsl/matrix_utils/matrix_runtime_traits.hlsl @@ -38,25 +38,25 @@ struct RuntimeTraits } { const matrix_t m_T = hlsl::transpose(m); - scalar_t uniformScaleSq = hlsl::dot(m_T[0], m_T[0]); + scalar_t uniformColumnSqNorm = hlsl::dot(m_T[0], m_T[0]); NBL_UNROLL for (uint16_t i = 1; i < N; i++) { - if (!testing::relativeApproxCompare(hlsl::dot(m_T[i], m_T[i]), uniformScaleSq, 1e-4)) + if (!testing::relativeApproxCompare(hlsl::dot(m_T[i], m_T[i]), uniformColumnSqNorm, 1e-4)) { - uniformScaleSq = bit_cast(numeric_limits::quiet_NaN); + uniformColumnSqNorm = bit_cast(numeric_limits::quiet_NaN); break; } } - retval.uniformScaleSq = uniformScaleSq; - retval.orthonormal = retval.orthogonal && testing::relativeApproxCompare(uniformScaleSq, scalar_t(1.0), 1e-5); + retval.uniformColumnSqNorm = uniformColumnSqNorm; + retval.orthonormal = retval.orthogonal && testing::relativeApproxCompare(uniformColumnSqNorm, scalar_t(1.0), 1e-5); } return retval; } bool invertible; bool orthogonal; - scalar_t uniformScaleSq; // TODO: rename to `uniformColumnSqNorm` + scalar_t uniformColumnSqNorm; bool orthonormal; }; From ca783f8559ccc0c3553731904830be0aebaf1d5f Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 1 Feb 2026 21:53:04 +0100 Subject: [PATCH 465/472] address comments --- src/nbl/asset/utils/CWaveStringResolver.cpp | 17 +++--- tools/nsc/main.cpp | 58 ++++++++++----------- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/src/nbl/asset/utils/CWaveStringResolver.cpp b/src/nbl/asset/utils/CWaveStringResolver.cpp index 8d8b81da79..d49ca1ac32 100644 --- a/src/nbl/asset/utils/CWaveStringResolver.cpp +++ b/src/nbl/asset/utils/CWaveStringResolver.cpp @@ -59,17 +59,18 @@ namespace nbl::wave // to match boost wave syntax // https://www.boost.org/doc/libs/1_82_0/libs/wave/doc/class_reference_context.html#:~:text=Maintain%20defined%20macros-,add_macro_definition,-bool%20add_macro_definition - for (const auto& define : preprocessOptions.extraDefines) - { - const std::string macroDefinition = define.identifier.data() + core::string("=") + define.definition.data(); - const bool isMacroAdded = context.add_macro_definition(macroDefinition); - assert(isMacroAdded); - } - // preprocess core::string resolvedString; try { + for (const auto& define : preprocessOptions.extraDefines) + { + std::string macroDefinition(define.identifier); + macroDefinition.push_back('='); + macroDefinition.append(define.definition); + context.add_macro_definition(macroDefinition); + } + auto stream = std::stringstream(); for (auto i = context.begin(); i != context.end(); i++) stream << i->get_value(); @@ -90,4 +91,4 @@ namespace nbl::wave return resolvedString; } -} \ No newline at end of file +} diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 806af7f6e7..64ad684b0c 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -472,34 +472,6 @@ class ShaderCompiler final : public IApplicationFramework for (const auto& p : m_include_search_paths) includeFinder->addSearchPath(p, includeLoader); - if (preprocessOnly) - { - CHLSLCompiler::SPreprocessorOptions opt = {}; - opt.sourceIdentifier = sourceIdentifier; - opt.logger = m_logger.get(); - opt.includeFinder = includeFinder.get(); - opt.depfile = dep.enabled; - opt.depfilePath = dep.path; - - const char* codePtr = (const char*)shader->getContent()->getPointer(); - std::string_view code(codePtr, std::strlen(codePtr)); - - r.text = hlslcompiler->preprocessShader(std::string(code), shaderStage, opt, nullptr); - r.ok = !r.text.empty(); - r.view = r.text; - return r; - } - - CHLSLCompiler::SOptions opt = {}; - opt.stage = shaderStage; - opt.preprocessorOptions.sourceIdentifier = sourceIdentifier; - opt.preprocessorOptions.logger = m_logger.get(); - opt.preprocessorOptions.includeFinder = includeFinder.get(); - opt.preprocessorOptions.depfile = dep.enabled; - opt.preprocessorOptions.depfilePath = dep.path; - opt.debugInfoFlags = bitflag(IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT); - opt.dxcOptions = std::span(m_arguments); - // need this struct becuase fields of IShaderCompiler::SMacroDefinition are string views struct SMacroDefinitionBuffer { @@ -524,7 +496,7 @@ class ShaderCompiler final : public IApplicationFramework if (equalPos == std::string::npos) { identifier = argumentTmp; - definition = ""; + definition = "1"; } else { @@ -541,7 +513,35 @@ class ShaderCompiler final : public IApplicationFramework macroDefinitions.emplace_back(macroDefinitionBuffer.identifier, macroDefinitionBuffer.definition); } + if (preprocessOnly) + { + CHLSLCompiler::SPreprocessorOptions opt = {}; + opt.sourceIdentifier = sourceIdentifier; + opt.logger = m_logger.get(); + opt.includeFinder = includeFinder.get(); + opt.depfile = dep.enabled; + opt.depfilePath = dep.path; + opt.extraDefines = macroDefinitions; + + const char* codePtr = (const char*)shader->getContent()->getPointer(); + std::string_view code(codePtr, std::strlen(codePtr)); + + r.text = hlslcompiler->preprocessShader(std::string(code), shaderStage, opt, nullptr); + r.ok = !r.text.empty(); + r.view = r.text; + return r; + } + + CHLSLCompiler::SOptions opt = {}; + opt.stage = shaderStage; + opt.preprocessorOptions.sourceIdentifier = sourceIdentifier; + opt.preprocessorOptions.logger = m_logger.get(); + opt.preprocessorOptions.includeFinder = includeFinder.get(); + opt.preprocessorOptions.depfile = dep.enabled; + opt.preprocessorOptions.depfilePath = dep.path; opt.preprocessorOptions.extraDefines = macroDefinitions; + opt.debugInfoFlags = bitflag(IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT); + opt.dxcOptions = std::span(m_arguments); r.compiled = hlslcompiler->compileToSPIRV((const char*)shader->getContent()->getPointer(), opt); r.ok = bool(r.compiled); From e3a57df94bf0fac8aaf840c67148b5d7fc152284 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 2 Feb 2026 12:33:00 +0100 Subject: [PATCH 466/472] update examples_tests submodule pointer --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index bf34d4e030..e7c20f7715 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit bf34d4e03023903dc10fbe03ef5670c83eab3df9 +Subproject commit e7c20f7715895288c7dca16f3f271f207ac11ca4 From c327eb97da4d814cd003cc8750f8ea3a5499a1bf Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 2 Feb 2026 18:55:54 +0100 Subject: [PATCH 467/472] adjust to comments --- examples_tests | 2 +- .../linalg/{matrix_utils.hlsl => basic.hlsl} | 70 +++++- .../builtin/hlsl/math/linalg/fast_affine.hlsl | 24 +- .../builtin/hlsl/math/linalg/transform.hlsl | 91 +------ .../linalg/transformation_matrix_utils.hlsl | 222 ------------------ include/nbl/ext/MitsubaLoader/CElementShape.h | 2 +- src/nbl/asset/utils/CGeometryCreator.cpp | 4 +- src/nbl/builtin/CMakeLists.txt | 3 +- src/nbl/ext/MitsubaLoader/PropertyElement.cpp | 9 +- 9 files changed, 100 insertions(+), 327 deletions(-) rename include/nbl/builtin/hlsl/math/linalg/{matrix_utils.hlsl => basic.hlsl} (51%) delete mode 100644 include/nbl/builtin/hlsl/math/linalg/transformation_matrix_utils.hlsl diff --git a/examples_tests b/examples_tests index b784970abd..f90ce30d64 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b784970abd76f1feabb76902ec922e9edf37ef0c +Subproject commit f90ce30d643ba03c6e0585f2db015009e27aa912 diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_utils.hlsl b/include/nbl/builtin/hlsl/math/linalg/basic.hlsl similarity index 51% rename from include/nbl/builtin/hlsl/math/linalg/matrix_utils.hlsl rename to include/nbl/builtin/hlsl/math/linalg/basic.hlsl index 1c53593e1b..15b9014998 100644 --- a/include/nbl/builtin/hlsl/math/linalg/matrix_utils.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/basic.hlsl @@ -1,10 +1,11 @@ -#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_UTILS_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_UTILS_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_BASIC_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_LINALG_BASIC_INCLUDED_ // TODO: remove this header when deleting vectorSIMDf.hlsl #ifndef __HLSL_VERSION #include #include "vectorSIMD.h" #endif +#include #include #include @@ -52,10 +53,50 @@ inline matrix truncate(const NBL_CONST_REF_ARG(matrix -inline matrix getSub3x3(NBL_CONST_REF_ARG(matrix) mat) +namespace impl +{ +template +struct zero_expand_helper +{ + static vector __call(const vector inVec) + { + return vector(inVec, vector(0)); + } +}; +template +struct zero_expand_helper { - return matrix(mat); + static vector __call(const vector inVec) + { + return inVec; + } +}; +} + +template= MIn) +vector zero_expand(vector inVec) +{ + return impl::zero_expand_helper::__call(inVec); +} + +template = NIn && MOut >= MIn) +matrix promote_affine(const matrix inMatrix) +{ + matrix retval; + + using out_row_t = hlsl::vector; + + NBL_UNROLL for (uint32_t row_i = 0; row_i < NIn; row_i++) + { + retval[row_i] = zero_expand(inMatrix[row_i]); + } + NBL_UNROLL for (uint32_t row_i = NIn; row_i < NOut; row_i++) + { + retval[row_i] = promote(0.0); + if (row_i < MOut) + retval[row_i][row_i] = T(1.0); + } + return retval; } } @@ -88,6 +129,25 @@ namespace impl return retval; } }; + + template + struct static_cast_helper, vector, void> + { + using To = vector; + using From = vector; + + static inline To cast(From vec) + { + To retval; + + NBL_UNROLL for (int i = 0; i < N; ++i) + { + retval[i] = hlsl::_static_cast(vec[i]); + } + + return retval; + } + }; } } diff --git a/include/nbl/builtin/hlsl/math/linalg/fast_affine.hlsl b/include/nbl/builtin/hlsl/math/linalg/fast_affine.hlsl index f9d7cd3546..605a107b83 100644 --- a/include/nbl/builtin/hlsl/math/linalg/fast_affine.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/fast_affine.hlsl @@ -8,6 +8,7 @@ #include #include #include +#include namespace nbl @@ -77,6 +78,27 @@ vector promoted_mul(NBL_CONST_REF_ARG(matrix) lhs, const vector return retval; } +template +inline void setRotation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(math::quaternion) quat) +{ + static_assert(N == 3 || N == 4); + matrix mat = _static_cast >(quat); + + outMat[0] = mat[0]; + outMat[1] = mat[1]; + outMat[2] = mat[2]; +} + +template +inline void setTranslation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) translation) +{ + static_assert(N == 3 || N == 4); + + outMat[0].w = translation.x; + outMat[1].w = translation.y; + outMat[2].w = translation.z; +} + // useful for fast computation of a Normal Matrix template struct cofactors_base; @@ -173,4 +195,4 @@ Mat3x4 pseudoInverse3x4(NBL_CONST_REF_ARG(Mat3x4) tform) } } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl index e13a333ade..e46dfe997b 100644 --- a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl @@ -7,6 +7,7 @@ #include #include #include +#include namespace nbl { @@ -17,83 +18,6 @@ namespace math namespace linalg { -/// Builds a rotation 3 * 3 matrix created from an axis vector and an angle. -/// -/// @param angle Rotation angle expressed in radians. -/// @param axis Rotation axis, must be normalized. -/// -/// @tparam T A floating-point scalar type -template -matrix rotation_mat(T angle, const vector axis) -{ - const T a = angle; - const T c = cos(a); - const T s = sin(a); - - vector temp = hlsl::promote >((T(1.0) - c) * axis); - - matrix rotation; - rotation[0][0] = c + temp[0] * axis[0]; - rotation[0][1] = temp[1] * axis[0] - s * axis[2]; - rotation[0][2] = temp[2] * axis[0] + s * axis[1]; - - rotation[1][0] = temp[0] * axis[1] + s * axis[2]; - rotation[1][1] = c + temp[1] * axis[1]; - rotation[1][2] = temp[2] * axis[1] - s * axis[0]; - - rotation[2][0] = temp[0] * axis[2] - s * axis[1]; - rotation[2][1] = temp[1] * axis[2] + s * axis[0]; - rotation[2][2] = c + temp[2] * axis[2]; - - return rotation; -} - -namespace impl -{ -template -struct zero_expand_helper -{ - static vector __call(const vector inVec) - { - return vector(inVec, vector(0)); - } -}; -template -struct zero_expand_helper -{ - static vector __call(const vector inVec) - { - return inVec; - } -}; -} - -template= MIn) -vector zero_expand(vector inVec) -{ - return impl::zero_expand_helper::__call(inVec); -} - -template = NIn && MOut >= MIn) -matrix promote_affine(const matrix inMatrix) -{ - matrix retval; - - using out_row_t = hlsl::vector; - - NBL_UNROLL for (uint32_t row_i = 0; row_i < NIn; row_i++) - { - retval[row_i] = zero_expand(inMatrix[row_i]); - } - NBL_UNROLL for (uint32_t row_i = NIn; row_i < NOut; row_i++) - { - retval[row_i] = promote(0.0); - if (row_i < MOut) - retval[row_i][row_i] = T(1.0); - } - return retval; -} - // /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) template inline matrix lhLookAt( @@ -131,19 +55,6 @@ inline matrix rhLookAt( return r; } -template -inline void setTranslation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) translation) -{ - // TODO: not sure if it will be compatible with hlsl - static_assert(M > 0 && N > 0); - static_assert(M >= VecN); - - NBL_CONSTEXPR int16_t indexOfTheLastRowComponent = M - 1; - - for(int i = 0; i < VecN; ++i) - outMat[i][indexOfTheLastRowComponent] = translation[i]; -} - } } } diff --git a/include/nbl/builtin/hlsl/math/linalg/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/math/linalg/transformation_matrix_utils.hlsl deleted file mode 100644 index 92666d3b28..0000000000 --- a/include/nbl/builtin/hlsl/math/linalg/transformation_matrix_utils.hlsl +++ /dev/null @@ -1,222 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_LINALG_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ - -#include -#include - -namespace nbl -{ -namespace hlsl -{ -//TODO: stolen from cameraz branch, don't have epsilonEqual here, maybe uncomment when merging from imguizmo-lights branch -//// TODO: -> move somewhere else and nbl:: to implement it -//template -//bool isOrthoBase(const T& x, const T& y, const T& z, const E epsilon = 1e-6) -//{ -// auto isNormalized = [](const auto& v, const auto& epsilon) -> bool -// { -// return glm::epsilonEqual(glm::length(v), 1.0, epsilon); -// }; -// -// auto isOrthogonal = [](const auto& a, const auto& b, const auto& epsilon) -> bool -// { -// return glm::epsilonEqual(glm::dot(a, b), 0.0, epsilon); -// }; -// -// return isNormalized(x, epsilon) && isNormalized(y, epsilon) && isNormalized(z, epsilon) && -// isOrthogonal(x, y, epsilon) && isOrthogonal(x, z, epsilon) && isOrthogonal(y, z, epsilon); -//} -//// <- - -template -matrix getMatrix3x4As4x4(const matrix& mat) -{ - matrix output; - for (int i = 0; i < 3; ++i) - output[i] = mat[i]; - output[3] = float32_t4(0.0f, 0.0f, 0.0f, 1.0f); - - return output; -} - -template -matrix getMatrix3x3As4x4(const matrix& mat) -{ - matrix output; - for (int i = 0; i < 3; ++i) - output[i] = float32_t4(mat[i], 1.0f); - output[3] = float32_t4(0.0f, 0.0f, 0.0f, 1.0f); - - return output; -} - -template -inline vector getCastedVector(const vector& in) -{ - vector out; - - for (int i = 0; i < N; ++i) - out[i] = (Tout)(in[i]); - - return out; -} - -template -inline matrix getCastedMatrix(const matrix& in) -{ - matrix out; - - for (int i = 0; i < N; ++i) - out[i] = getCastedVector(in[i]); - - return out; -} - -// TODO: remove -//! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) -template -inline matrix concatenateBFollowedByA(const matrix& a, const matrix& b) -{ - const auto a4x4 = getMatrix3x4As4x4(a); - const auto b4x4 = getMatrix3x4As4x4(b); - return matrix(mul(a4x4, b4x4)); -} - -// /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) - -template -inline matrix buildCameraLookAtMatrixLH( - const vector& position, - const vector& target, - const vector& upVector) -{ - const vector zaxis = glm::normalize(target - position); - const vector xaxis = glm::normalize(hlsl::cross(upVector, zaxis)); - const vector yaxis = hlsl::cross(zaxis, xaxis); - - matrix r; - r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); - r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); - r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); - - return r; -} - -template -inline matrix buildCameraLookAtMatrixRH( - const vector& position, - const vector& target, - const vector& upVector) -{ - const vector zaxis = glm::normalize(position - target); - const vector xaxis = glm::normalize(hlsl::cross(upVector, zaxis)); - const vector yaxis = hlsl::cross(zaxis, xaxis); - - matrix r; - r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); - r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); - r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); - - return r; -} - -// TODO: test, check if there is better implementation -// TODO: move quaternion to nbl::hlsl -// TODO: why NBL_REF_ARG(MatType) doesn't work????? - -//! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged -template -inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(math::quaternion) quat) -{ - static_assert(N == 3 || N == 4); - matrix mat = _static_cast>(quat); - - outMat[0] = mat[0]; - - outMat[1] = mat[1]; - - outMat[2] = mat[2]; -} - -template -inline void setTranslation(matrix& outMat, NBL_CONST_REF_ARG(vector) translation) -{ - static_assert(N == 3 || N == 4); - - outMat[0].w = translation.x; - outMat[1].w = translation.y; - outMat[2].w = translation.z; -} - - -template -inline matrix buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(w, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -h, 0.f, 0.f); - m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); - m[3] = vector(0.f, 0.f, -1.f, 0.f); - - return m; -} -template -inline matrix buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(w, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -h, 0.f, 0.f); - m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 1.f, 0.f); - - return m; -} - -template -inline matrix buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); - m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 0.f, 1.f); - - return m; -} - -template -inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix m; - m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); - m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); - m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); - m[3] = vector(0.f, 0.f, 0.f, 1.f); - - return m; -} - -} -} - -#endif diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index 862fbe159e..b641c964d2 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -10,7 +10,7 @@ #include "nbl/ext/MitsubaLoader/CElementBSDF.h" #include "nbl/ext/MitsubaLoader/CElementEmitter.h" -#include "nbl/builtin/hlsl/math/linalg/matrix_utils.hlsl" +#include "nbl/builtin/hlsl/math/linalg/basic.hlsl" namespace nbl::ext::MitsubaLoader diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 2aa2e08fe5..b8f9ffc4e2 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -6,6 +6,7 @@ #include "nbl/asset/utils/CGeometryCreator.h" #include "nbl/builtin/hlsl/tgmath.hlsl" #include "nbl/builtin/hlsl/math/linalg/transform.hlsl" +#include "nbl/builtin/hlsl/math/quaternions.hlsl" #include #include @@ -746,7 +747,8 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( geometries->push_back({ .geometry = cylinder }); - const auto coneTransform = hlsl::math::linalg::rotation_mat(hlsl::numbers::pi * -0.5f, hlsl::float32_t3(1.f, 0.f, 0.f)); + const auto coneRotation = hlsl::math::quaternion::create(hlsl::float32_t3(1.f, 0.f, 0.f), hlsl::numbers::pi * -0.5f); + const auto coneTransform = hlsl::math::linalg::promote_affine<3, 4>(hlsl::_static_cast(coneRotation)); geometries->push_back({ .transform = hlsl::math::linalg::promote_affine<3, 4>(coneTransform), .geometry = cone diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index a2d27b8529..631c531767 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -223,8 +223,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format.hlsl") #linear algebra LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transform.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/matrix_utils.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transformation_matrix_utils.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/basic.hlsl") # TODO: rename `equations` to `polynomials` probably LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") diff --git a/src/nbl/ext/MitsubaLoader/PropertyElement.cpp b/src/nbl/ext/MitsubaLoader/PropertyElement.cpp index 6283076825..5ea0353a48 100644 --- a/src/nbl/ext/MitsubaLoader/PropertyElement.cpp +++ b/src/nbl/ext/MitsubaLoader/PropertyElement.cpp @@ -12,6 +12,7 @@ #include "nbl/ext/MitsubaLoader/ParserUtil.h" #include "nbl/builtin/hlsl/math/linalg/transform.hlsl" +#include "nbl/builtin/hlsl/math/quaternions.hlsl" #include "glm/gtc/matrix_transform.hpp" @@ -202,9 +203,9 @@ std::optional CPropertyElementManager::createPropertyData invalidXMLFileStructure(logger,"Invalid element, name:\'"+result.name+"\' Axis can't be (0,0,0)"); return {}; } - // TODO: quaternion after the rework - using namespace nbl::hlsl::math;//::linalg; - result.mvalue = linalg::promote_affine<4,4>(linalg::rotation_mat(hlsl::radians(atof(desiredAttributes[0])),axis)); + using namespace nbl::hlsl::math; + const auto rotation = quaternion::create(axis, hlsl::radians(atof(desiredAttributes[0]))); + result.mvalue = linalg::promote_affine<4,4>(hlsl::_static_cast(rotation)); } break; case SPropertyElementData::Type::SCALE: @@ -376,4 +377,4 @@ hlsl::float32_t4 CPropertyElementManager::retrieveHex(const std::string_view& _d return retval/255.f; } -} \ No newline at end of file +} From b1ef469b8770facb26fc97927f25996e5f712ba1 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 2 Feb 2026 19:34:32 +0100 Subject: [PATCH 468/472] more cleanup --- .../hlsl/math/thin_lens_projection.hlsl | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl b/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl index 70c46fdb37..985ec8a6a3 100644 --- a/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl +++ b/include/nbl/builtin/hlsl/math/thin_lens_projection.hlsl @@ -82,7 +82,30 @@ inline matrix lhProjectionOrthoMatrix(FloatingPoint widthOf } } + +template) +inline matrix buildProjectionMatrixPerspectiveFovRH(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) +{ + return math::thin_lens::rhPerspectiveFovMatrix(fieldOfViewRadians, aspectRatio, zNear, zFar); +} +template) +inline matrix buildProjectionMatrixPerspectiveFovLH(FloatingPoint fieldOfViewRadians, FloatingPoint aspectRatio, FloatingPoint zNear, FloatingPoint zFar) +{ + return math::thin_lens::lhPerspectiveFovMatrix(fieldOfViewRadians, aspectRatio, zNear, zFar); +} + +template) +inline matrix buildProjectionMatrixOrthoRH(FloatingPoint widthOfViewVolume, FloatingPoint heightOfViewVolume, FloatingPoint zNear, FloatingPoint zFar) +{ + return math::thin_lens::rhProjectionOrthoMatrix(widthOfViewVolume, heightOfViewVolume, zNear, zFar); +} +template) +inline matrix buildProjectionMatrixOrthoLH(FloatingPoint widthOfViewVolume, FloatingPoint heightOfViewVolume, FloatingPoint zNear, FloatingPoint zFar) +{ + return math::thin_lens::lhProjectionOrthoMatrix(widthOfViewVolume, heightOfViewVolume, zNear, zFar); +} + } } -#endif \ No newline at end of file +#endif From 37a18e6335e9032e9b8385a5ce21ad1c2041d1c5 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 5 Feb 2026 17:05:18 +0100 Subject: [PATCH 469/472] submodule update --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 9723c5fe5b..fcb43e613a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 9723c5fe5beb6ee05a2903f4a8f37049f819539e +Subproject commit fcb43e613a9f9ec8ea6c4ad2835410cb5d907237 From 620ebe0bbbe687ede4400d24b4f413d9c1adb1e9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 5 Feb 2026 19:11:18 +0100 Subject: [PATCH 470/472] IES parser: avoid isspace assert on non-ASCII bytes --- src/nbl/asset/utils/CIESProfileParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/asset/utils/CIESProfileParser.cpp b/src/nbl/asset/utils/CIESProfileParser.cpp index c86fff9f7b..66196b1902 100644 --- a/src/nbl/asset/utils/CIESProfileParser.cpp +++ b/src/nbl/asset/utils/CIESProfileParser.cpp @@ -17,7 +17,7 @@ bool CIESProfileParser::parse(CIESProfile& result) { auto removeTrailingWhiteChars = [](std::string& str) -> void { - if (std::isspace(str.back())) + if (!str.empty() && std::isspace(static_cast(str.back()))) { auto it = str.rbegin(); while (it != str.rend() && std::isspace(static_cast(*it))) From e0a23fcda9a86fa817bd3f2d1d23eced63a0ecf6 Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 6 Feb 2026 21:48:45 +0100 Subject: [PATCH 471/472] change the signatures of IAssetLoader interm_get a little bit, and make our first indirect load example --- include/nbl/asset/IAssetManager.h | 16 +- include/nbl/asset/ICPUScene.h | 4 +- include/nbl/asset/interchange/IAssetLoader.h | 91 ++++++- include/nbl/ext/MitsubaLoader/SContext.h | 57 +---- src/nbl/asset/IAssetManager.cpp | 4 +- src/nbl/asset/interchange/IAssetLoader.cpp | 62 +++-- ...GLSLMitsubaLoaderBuiltinIncludeGenerator.h | 56 ----- src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 237 +++--------------- 8 files changed, 185 insertions(+), 342 deletions(-) delete mode 100644 src/nbl/ext/MitsubaLoader/CGLSLMitsubaLoaderBuiltinIncludeGenerator.h diff --git a/include/nbl/asset/IAssetManager.h b/include/nbl/asset/IAssetManager.h index 2105b6c4fe..d9995526bc 100644 --- a/include/nbl/asset/IAssetManager.h +++ b/include/nbl/asset/IAssetManager.h @@ -385,8 +385,10 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted uint32_t getAssetLoaderCount() { return static_cast(m_loaders.vector.size()); } //! @returns 0xdeadbeefu on failure or 0-based index on success. - uint32_t addAssetLoader(core::smart_refctd_ptr&& _loader) + inline uint32_t addAssetLoader(core::smart_refctd_ptr&& _loader) { + if (!_loader) + return 0xdeadbeefu; // there's no way it ever fails, so no 0xdeadbeef return const char** exts = _loader->getAssociatedFileExtensions(); size_t extIx = 0u; @@ -395,8 +397,10 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted m_loaders.pushToVector(std::move(_loader)); return static_cast(m_loaders.vector.size())-1u; } - void removeAssetLoader(IAssetLoader* _loader) + inline void removeAssetLoader(IAssetLoader* _loader) { + if (!_loader) + return; m_loaders.eraseFromVector( std::find_if(std::begin(m_loaders.vector), std::end(m_loaders.vector), [_loader](const core::smart_refctd_ptr& a)->bool { return a.get()==_loader; }) ); @@ -409,8 +413,10 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted // Asset Writers [FOLLOWING ARE NOT THREAD SAFE] uint32_t getAssetWriterCount() { return static_cast(m_writers.perType.getSize()); } // todo.. well, it's not really writer count.. but rather type<->writer association count - void addAssetWriter(core::smart_refctd_ptr&& _writer) + inline void addAssetWriter(core::smart_refctd_ptr&& _writer) { + if (!_writer) + return; const uint64_t suppTypes = _writer->getSupportedAssetTypesBitfield(); const char** exts = _writer->getAssociatedFileExtensions(); for (uint32_t i = 0u; i < IAsset::ET_STANDARD_TYPES_COUNT; ++i) @@ -425,8 +431,10 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted } } } - void removeAssetWriter(IAssetWriter* _writer) + inline void removeAssetWriter(IAssetWriter* _writer) { + if (!_writer) + return; const uint64_t suppTypes = _writer->getSupportedAssetTypesBitfield(); const char** exts = _writer->getAssociatedFileExtensions(); size_t extIx = 0u; diff --git a/include/nbl/asset/ICPUScene.h b/include/nbl/asset/ICPUScene.h index 6205e651fa..56a069c469 100644 --- a/include/nbl/asset/ICPUScene.h +++ b/include/nbl/asset/ICPUScene.h @@ -181,7 +181,7 @@ class ICPUScene final : public IAsset, public IScene Count }; // - inline bool addEnvLight(const EEnvLightType type, core::smart_refctd_ptr&& tex) + inline bool addEnvLight(const EEnvLightType type, core::smart_refctd_ptr&& tex) { if (!tex) return false; @@ -232,7 +232,7 @@ class ICPUScene final : public IAsset, public IScene // SInstanceStorage m_instances; // - core::vector> m_envLightTexs; + core::vector> m_envLightTexs; core::vector m_envLightTypes; // const uint8_t m_maxMorphTargetGeometryCountLog2; diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 09f842e659..a194f0e13e 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -177,6 +177,9 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted public: NBL_API2 IAssetLoaderOverride(IAssetManager* _manager); + // + inline IAssetManager* getManager() const {return m_manager;} + //! template inline std::pair,const IAssetMetadata*> findDefaultAsset(const std::string& inSearchKey, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) @@ -270,7 +273,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted //! After a successful load of an asset or sub-asset //TODO change name - virtual void insertAssetIntoCache(SAssetBundle& asset, const std::string& supposedKey, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel); + virtual void insertAssetIntoCache(SAssetBundle& asset, const std::string& supposedKey, const SAssetLoadParams& _params, const uint32_t hierarchyLevel); }; public: @@ -291,14 +294,18 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted //! Loads an asset from an opened file, returns nullptr in case of failure. virtual SAssetBundle loadAsset(system::IFile* _file, const SAssetLoadParams& _params, IAssetLoaderOverride* _override, uint32_t _hierarchyLevel = 0u) = 0; + // virtual void initialize() {} + // + static core::smart_refctd_ptr createDefaultImageView(core::smart_refctd_ptr&& image); + protected: // accessors for loaders - SAssetBundle interm_getAssetInHierarchy(IAssetManager* _mgr, system::IFile* _file, const std::string& _supposedFilename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override); - SAssetBundle interm_getAssetInHierarchy(IAssetManager* _mgr, const std::string& _filename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override); + SAssetBundle interm_getAssetInHierarchy(system::IFile* _file, const std::string& _supposedFilename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override); + SAssetBundle interm_getAssetInHierarchy(const std::string& _filename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override); // only the overload we use for now - SAssetBundle interm_getAssetInHierarchyWithAllContent(IAssetManager* _mgr, const std::string& _filename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override); + SAssetBundle interm_getAssetInHierarchyWithAllContent(const std::string& _filename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override); void interm_setAssetMutability(const IAssetManager* _mgr, IAsset* _asset, const bool _val); @@ -306,10 +313,86 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted bool insertBuiltinAssetIntoCache(IAssetManager* _mgr, core::smart_refctd_ptr& _asset, core::smart_refctd_ptr&& metadata, const std::string _path); bool insertBuiltinAssetIntoCache(IAssetManager* _mgr, core::smart_refctd_ptr&& _asset, core::smart_refctd_ptr&& metadata, const std::string _path); + // TODO: make static? inline void setAssetInBundle(SAssetBundle& bundle, const uint32_t offset, core::smart_refctd_ptr&& _asset) { bundle.setAsset(offset,std::move(_asset)); } + + // + template requires is_any_of_v + SAssetBundle interm_getImageViewInHierarchy(const PathOrFile& pathOrFile, const IAssetLoader::SAssetLoadParams& _params, const uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) + { + // TODO: first we needed to try-load to figure out the cache key to be used, but we could do it differently I guess + // TODO: this load shouldn't add to cache until we exit successfully + auto bundle = interm_getAssetInHierarchy(pathOrFile,_params,_hierarchyLevel,_override); + auto contentRange = bundle.getContents(); + if (contentRange.empty()) + return {}; + if (const auto origType=bundle.getAssetType(); origType==IAsset::E_TYPE::ET_IMAGE_VIEW) + return bundle; + else if (origType!=IAsset::E_TYPE::ET_IMAGE) + { + _params.logger.log( + "IAssetLoader::interm_getImageViewInHierarchy loaded assed with key \"%s\" with was of type %s not IMAGE", + system::ILogger::ELL_ERROR,bundle.getCacheKey().c_str(),system::to_string(origType).c_str() + ); + return {}; + } + + const auto cacheKey = bundle.getCacheKey()+"?view?IAssetLoader?default"; + SAssetLoadContext ctx(_params,nullptr); + // search the cache for the imageview + { + const asset::IAsset::E_TYPE types[]{asset::IAsset::ET_IMAGE_VIEW,asset::IAsset::ET_TERMINATING_ZERO}; + auto cachedBundle = _override->findCachedAsset(cacheKey,types,ctx,_hierarchyLevel); + // check if found + if (!cachedBundle.getContents().empty()) + return cachedBundle; + } + + // ok now create default views for all the images + auto container = core::make_refctd_dynamic_array(contentRange.size()); + auto outIt = container->begin(); + for (auto& asset : contentRange) + *(outIt++) = createDefaultImageView(core::smart_refctd_ptr_static_cast(asset)); + bundle = SAssetBundle(nullptr,std::move(container)); + _override->insertAssetIntoCache(bundle,cacheKey,_params,_hierarchyLevel); + return bundle; + } + +#if 0 + // should we have derivative, bump and normalmap getters or shall we support all 3 in Frontend? + static std::string imageViewCacheKey(const CElementTexture::Bitmap& bitmap, const CMitsubaMaterialCompilerFrontend::E_IMAGE_VIEW_SEMANTIC semantic) + { + std::string key = bitmap.filename.svalue; + switch (semantic) + { + case CMitsubaMaterialCompilerFrontend::EIVS_NORMAL_MAP: + key += "?deriv?n"; + break; + case CMitsubaMaterialCompilerFrontend::EIVS_BUMP_MAP: + key += "?deriv?h"; + { + static const char* wrap[5] + { + "?repeat", + "?mirror", + "?clamp", + "?zero", + "?one" + }; + key += wrap[bitmap.wrapModeU]; + key += wrap[bitmap.wrapModeV]; + } + break; + default: + break; + } + key += "?view"; + return key; + } +#endif }; } diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index a65854bedc..f3c952935c 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -6,7 +6,6 @@ #include "nbl/asset/ICPUPolygonGeometry.h" -//#include "nbl/asset/utils/IGeometryCreator.h" #include "nbl/asset/interchange/CIESProfileLoader.h" #include "nbl/ext/MitsubaLoader/CMitsubaMetadata.h" @@ -49,63 +48,13 @@ struct SContext final core::unordered_map groupCache; // core::unordered_map shapeCache; -#if 0 + +#if 0 // stuff that belongs in the Material Compiler backend //image, sampler using tex_ass_type = std::tuple,core::smart_refctd_ptr>; - //image, scale + //image, scale core::map,float> derivMapCache; - // - static std::string imageViewCacheKey(const CElementTexture::Bitmap& bitmap, const CMitsubaMaterialCompilerFrontend::E_IMAGE_VIEW_SEMANTIC semantic) - { - std::string key = bitmap.filename.svalue; - switch (bitmap.channel) - { - case CElementTexture::Bitmap::CHANNEL::R: - key += "?rrrr"; - break; - case CElementTexture::Bitmap::CHANNEL::G: - key += "?gggg"; - break; - case CElementTexture::Bitmap::CHANNEL::B: - key += "?bbbb"; - break; - case CElementTexture::Bitmap::CHANNEL::A: - key += "?aaaa"; - break; - default: - break; - } - switch (semantic) - { - case CMitsubaMaterialCompilerFrontend::EIVS_BLEND_WEIGHT: - key += "?blend"; - break; - case CMitsubaMaterialCompilerFrontend::EIVS_NORMAL_MAP: - key += "?deriv?n"; - break; - case CMitsubaMaterialCompilerFrontend::EIVS_BUMP_MAP: - key += "?deriv?h"; - { - static const char* wrap[5] - { - "?repeat", - "?mirror", - "?clamp", - "?zero", - "?one" - }; - key += wrap[bitmap.wrapModeU]; - key += wrap[bitmap.wrapModeV]; - } - break; - default: - break; - } - key += "?view"; - return key; - } - static asset::ISampler::SParams emissionProfileSamplerParams(const CElementEmissionProfile* profile, const asset::CIESProfileMetadata& meta) { return { diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index 5f48170c37..29930bccd9 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -236,14 +236,14 @@ SAssetBundle IAssetManager::getAssetInHierarchy_impl(system::IFile* _file, const ((levelFlags & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL) != IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL) && ((levelFlags & IAssetLoader::ECF_DUPLICATE_TOP_LEVEL) != IAssetLoader::ECF_DUPLICATE_TOP_LEVEL)) { - _override->insertAssetIntoCache(bundle, filename.string(), ctx, _hierarchyLevel); + _override->insertAssetIntoCache(bundle, filename.string(), ctx.params, _hierarchyLevel); } else if (bundle.getContents().empty()) { bool addToCache; bundle = _override->handleLoadFail(addToCache, file.get(), filename.string(), filename.string(), ctx, _hierarchyLevel); if (!bundle.getContents().empty() && addToCache) - _override->insertAssetIntoCache(bundle, filename.string(), ctx, _hierarchyLevel); + _override->insertAssetIntoCache(bundle, filename.string(), ctx.params, _hierarchyLevel); } return bundle; } diff --git a/src/nbl/asset/interchange/IAssetLoader.cpp b/src/nbl/asset/interchange/IAssetLoader.cpp index edec446747..9a881b300b 100644 --- a/src/nbl/asset/interchange/IAssetLoader.cpp +++ b/src/nbl/asset/interchange/IAssetLoader.cpp @@ -6,8 +6,8 @@ #include "nbl/asset/IAssetManager.h" -using namespace nbl; -using namespace asset; +using namespace nbl::core; +using namespace nbl::asset; // todo NEED DOCS IAssetLoader::IAssetLoaderOverride::IAssetLoaderOverride(IAssetManager* _manager) : m_manager(_manager), m_system(m_manager->getSystem()) @@ -26,28 +26,28 @@ SAssetBundle IAssetLoader::IAssetLoaderOverride::findCachedAsset(const std::stri return chooseRelevantFromFound(found->begin(), found->end(), ctx, hierarchyLevel); } -void IAssetLoader::IAssetLoaderOverride::insertAssetIntoCache(SAssetBundle& asset, const std::string& supposedKey, const SAssetLoadContext& ctx, const uint32_t hierarchyLevel) +void IAssetLoader::IAssetLoaderOverride::insertAssetIntoCache(SAssetBundle& asset, const std::string& supposedKey, const SAssetLoadParams& _params, const uint32_t hierarchyLevel) { m_manager->changeAssetKey(asset, supposedKey); - auto levelFlag = ctx.params.cacheFlags >> (uint64_t(hierarchyLevel) * 2ull); + auto levelFlag = _params.cacheFlags >> (uint64_t(hierarchyLevel) * 2ull); if (!(levelFlag&ECF_DONT_CACHE_TOP_LEVEL)) m_manager->insertAssetIntoCache(asset,ASSET_MUTABILITY_ON_CACHE_INSERT); } -SAssetBundle IAssetLoader::interm_getAssetInHierarchy(IAssetManager* _mgr, system::IFile* _file, const std::string& _supposedFilename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) +SAssetBundle IAssetLoader::interm_getAssetInHierarchy(system::IFile* _file, const std::string& _supposedFilename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) { - return _mgr->getAssetInHierarchy(_file, _supposedFilename, _params, _hierarchyLevel, _override); + return _override->getManager()->getAssetInHierarchy(_file, _supposedFilename, _params, _hierarchyLevel, _override); } -SAssetBundle IAssetLoader::interm_getAssetInHierarchy(IAssetManager* _mgr, const std::string& _filename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) +SAssetBundle IAssetLoader::interm_getAssetInHierarchy(const std::string& _filename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) { - return _mgr->getAssetInHierarchy(_filename, _params, _hierarchyLevel, _override); + return _override->getManager()->getAssetInHierarchy(_filename, _params, _hierarchyLevel, _override); } -SAssetBundle IAssetLoader::interm_getAssetInHierarchyWithAllContent(IAssetManager* _mgr, const std::string& _filename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) +SAssetBundle IAssetLoader::interm_getAssetInHierarchyWithAllContent(const std::string& _filename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) { - auto firstLoad = interm_getAssetInHierarchy(_mgr,_filename,_params,_hierarchyLevel,_override); + auto firstLoad = interm_getAssetInHierarchy(_filename,_params,_hierarchyLevel,_override); auto bundleHasAllContent = [](const SAssetBundle& retval)->bool { for (const auto& asset : retval.getContents()) @@ -60,10 +60,10 @@ SAssetBundle IAssetLoader::interm_getAssetInHierarchyWithAllContent(IAssetManage IAssetLoader::SAssetLoadParams paramCopy = _params; paramCopy.cacheFlags = ECF_DUPLICATE_REFERENCES; - auto secondLoad = interm_getAssetInHierarchy(_mgr,_filename,paramCopy,_hierarchyLevel,_override); + auto secondLoad = interm_getAssetInHierarchy(_filename,paramCopy,_hierarchyLevel,_override); if (bundleHasAllContent(secondLoad)) { - _mgr->removeAssetFromCache(firstLoad); + _override->getManager()->removeAssetFromCache(firstLoad); return secondLoad; } else @@ -83,15 +83,47 @@ bool IAssetLoader::insertBuiltinAssetIntoCache(IAssetManager* _mgr, SAssetBundle - -bool IAssetLoader::insertBuiltinAssetIntoCache(IAssetManager* _mgr, core::smart_refctd_ptr& _asset, core::smart_refctd_ptr&& metadata, const std::string _path) +// if I can figure out the template for this, move to header +bool IAssetLoader::insertBuiltinAssetIntoCache(IAssetManager* _mgr, smart_refctd_ptr& _asset, smart_refctd_ptr&& metadata, const std::string _path) { asset::SAssetBundle bundle(std::move(metadata), { _asset }); return insertBuiltinAssetIntoCache(_mgr, bundle, _path); } -bool IAssetLoader::insertBuiltinAssetIntoCache(IAssetManager* _mgr, core::smart_refctd_ptr&& _asset, core::smart_refctd_ptr&& metadata, const std::string _path) +bool IAssetLoader::insertBuiltinAssetIntoCache(IAssetManager* _mgr, smart_refctd_ptr&& _asset, smart_refctd_ptr&& metadata, const std::string _path) { asset::SAssetBundle bundle(std::move(metadata), { std::move(_asset) }); return insertBuiltinAssetIntoCache(_mgr, bundle, _path); } + + +smart_refctd_ptr IAssetLoader::createDefaultImageView(core::smart_refctd_ptr&& image) +{ + if (!image) + return nullptr; + const auto& imageParams = image->getCreationParameters(); + + using view_type_e = IImageViewBase::E_TYPE; + IImageViewBase::E_TYPE viewType; + switch (imageParams.type) + { + case ICPUImage::ET_1D: + viewType = view_type_e::ET_1D_ARRAY; + break; + case ICPUImage::ET_2D: + viewType = view_type_e::ET_2D_ARRAY; + break; + case ICPUImage::ET_3D: + viewType = view_type_e::ET_3D; + break; + default: + return nullptr; + } + + return ICPUImageView::create({ + //.subUsages = // shall we somehow narrow in-case the image itself has extended usage? + .image = std::move(image), + .viewType = viewType, + .format = imageParams.format + }); +} \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CGLSLMitsubaLoaderBuiltinIncludeGenerator.h b/src/nbl/ext/MitsubaLoader/CGLSLMitsubaLoaderBuiltinIncludeGenerator.h deleted file mode 100644 index d3ba886067..0000000000 --- a/src/nbl/ext/MitsubaLoader/CGLSLMitsubaLoaderBuiltinIncludeGenerator.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __C_GLSL_MITSUBA_LOADER_BUILTIN_INCLUDE_LOADER_H_INCLUDED__ -#define __C_GLSL_MITSUBA_LOADER_BUILTIN_INCLUDE_LOADER_H_INCLUDED__ - -#include "nbl/asset/utils/IShaderCompiler.h" - - -namespace nbl -{ -namespace ext -{ -namespace MitsubaLoader -{ - -class CGLSLMitsubaLoaderBuiltinIncludeGenerator : public IShaderCompiler::IIncludeGenerator -{ - public: - using Base = IShaderCompiler::IIncludeGenerator; - using Base::Base; - - std::string_view getPrefix() const override { return "nbl/builtin/glsl/ext/MitsubaLoader"; }; - - private: - static std::string getMaterialCompilerStuff(const std::string& _path) - { - auto args = parseArgumentsFromPath(_path.substr(_path.rfind(".glsl")+6, _path.npos)); - - const auto str = "#define _NBL_EXT_MITSUBA_LOADER_VT_STORAGE_VIEW_COUNT " + args.front() + "\n"; - - return str + - "#include \"nbl/builtin/glsl/ext/MitsubaLoader/material_compiler_compatibility_impl.glsl\"\n"; - } - - protected: - core::vector> getBuiltinNamesToFunctionMapping() const override - { - core::vector> retval; - - const std::string num = "[0-9]+"; - retval.insert(retval.begin(), - { - std::regex{"glsl/ext/MitsubaLoader/material_compiler_compatibility\\.glsl/"+num}, - &getMaterialCompilerStuff - } - ); - return retval; - } -}; - -}}} - - -#endif \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index 9b63f52d23..0151bc5578 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -12,7 +12,6 @@ #include "nbl/asset/utils/CDerivativeMapCreator.h" #include "nbl/ext/MitsubaLoader/CMitsubaSerializedMetadata.h" -#include "nbl/ext/MitsubaLoader/CGLSLMitsubaLoaderBuiltinIncludeLoader.h" #endif @@ -30,33 +29,26 @@ namespace ext::MitsubaLoader #if 0 // old material compiler _NBL_STATIC_INLINE_CONSTEXPR const char* FRAGMENT_SHADER_DEFINITIONS = R"( -#include - -layout (set = 1, binding = 0, row_major, std140) uniform UBO { - nbl_glsl_SBasicViewParameters params; -} CamData; - vec3 nbl_glsl_MC_getNormalizedWorldSpaceV() { - vec3 campos = nbl_glsl_SBasicViewParameters_GetEyePos(CamData.params.NormalMatAndEyePos); + vec3 campos = ....; return normalize(campos - WorldPos); } vec3 nbl_glsl_MC_getNormalizedWorldSpaceN() { return normalize(Normal); } -#ifdef TEX_PREFETCH_STREAM -mat2x3 nbl_glsl_perturbNormal_dPdSomething() {return mat2x3(dFdx(WorldPos),dFdy(WorldPos));} + +mat2x3 nbl_glsl_perturbNormal_dPdSomething() +{ + return mat2x3(dFdx(WorldPos),dFdy(WorldPos)); +} mat2 nbl_glsl_perturbNormal_dUVdSomething() { return mat2(dFdx(UV),dFdy(UV)); } -#endif -#define _NBL_USER_PROVIDED_MATERIAL_COMPILER_GLSL_BACKEND_FUNCTIONS_ )"; _NBL_STATIC_INLINE_CONSTEXPR const char* FRAGMENT_SHADER_IMPL = R"( -#include - #ifndef _NBL_BSDF_COS_EVAL_DEFINED_ #define _NBL_BSDF_COS_EVAL_DEFINED_ // Spectrum can be exchanged to a float for monochrome @@ -123,9 +115,7 @@ static core::smart_refctd_ptr createFragmentShader return createSpecShader(source.c_str(), asset::ISpecializedShader::ESS_FRAGMENT); } -#endif - -#if 0 +// TODO: move to IAssetLoader static core::smart_refctd_ptr createDerivMap(SContext& ctx, asset::ICPUImage* _heightMap, const ICPUSampler::SParams& _samplerParams, bool fromNormalMap) { core::smart_refctd_ptr derivmap_img; @@ -152,73 +142,10 @@ static core::smart_refctd_ptr createDerivMap(SContext& ctx, as } static core::smart_refctd_ptr createSingleChannelImage(const asset::ICPUImage* _img, const asset::ICPUImageView::SComponentMapping::E_SWIZZLE srcChannel) { - auto outParams = _img->getCreationParameters(); - const auto inFormat = outParams.format; - - asset::ICPUImage::SBufferCopy region; - // pick format - { - // TODO: redo the format selection when @Erfan's format promotor is operational - if (isSRGBFormat(inFormat)) - outParams.format = asset::EF_B8G8R8A8_SRGB; - else - { - const double prec = asset::getFormatPrecision(inFormat,srcChannel,0.0); - if (prec<=FLT_MIN) - outParams.format = asset::EF_R32G32B32A32_SFLOAT; - else if (prec<=1.0/65535.0) - outParams.format = asset::EF_R16G16B16A16_UNORM; - else if (prec<=exp2f(-14.f)) - outParams.format = asset::EF_R16G16B16A16_SFLOAT; - else if (prec<=1.0/1023.0) - outParams.format = asset::EF_A2B10G10R10_UNORM_PACK32; - else - outParams.format = asset::EF_R8G8B8A8_UNORM; - } - } - const size_t texelBytesz = asset::getTexelOrBlockBytesize(outParams.format); - region.bufferRowLength = asset::IImageAssetHandlerBase::calcPitchInBlocks(outParams.extent.width, texelBytesz); - auto buffer = core::make_smart_refctd_ptr(texelBytesz * region.bufferRowLength * outParams.extent.height); - region.imageOffset = { 0,0,0 }; - region.imageExtent = outParams.extent; - region.imageSubresource.baseArrayLayer = 0u; - region.imageSubresource.layerCount = 1u; - region.imageSubresource.mipLevel = 0u; - region.bufferImageHeight = 0u; - region.bufferOffset = 0u; - auto outImg = asset::ICPUImage::create(std::move(outParams)); - outImg->setBufferAndRegions(std::move(buffer), core::make_refctd_dynamic_array>(1ull, region)); - - using convert_filter_t = asset::CSwizzleAndConvertImageFilter; - convert_filter_t::state_type conv; - conv.extent = outParams.extent; - conv.layerCount = 1u; - conv.inMipLevel = 0u; - conv.outMipLevel = 0u; - conv.inBaseLayer = 0u; - conv.outBaseLayer = 0u; - conv.inOffset = { 0u,0u,0u }; - conv.outOffset = { 0u,0u,0u }; - conv.inImage = _img; - conv.outImage = outImg.get(); - if (srcChannel!=asset::ICPUImageView::SComponentMapping::E_SWIZZLE::ES_IDENTITY) - conv.swizzle = {srcChannel,srcChannel,srcChannel,srcChannel}; - else - { - conv.swizzle = {}; - for (auto i=asset::getFormatChannelCount(inFormat); i<4; i++) - conv.swizzle[i] = asset::ICPUImageView::SComponentMapping::E_SWIZZLE::ES_R; - } - - if (!convert_filter_t::execute(std::execution::par_unseq,&conv)) - { - os::Printer::log("Mitsuba XML Loader: blend weight texture creation failed!", ELL_ERROR); - _NBL_DEBUG_BREAK_IF(true); - } - - return outImg; + // deprecated will be expressed in Material Compiler Frontend AST as a swizzle } #endif +constexpr auto LoggerError = system::ILogger::ELL_ERROR; bool CMitsubaLoader::isALoadableFileFormat(system::IFile* _file, const system::logger_opt_ptr logger) const { @@ -295,6 +222,7 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: // ctx.scene->m_ambientLight = result.ambient; + // TODO: abstract/move away since many loaders will need to do this core::unordered_map> morphTargetCache; auto createMorphTargets = [&_params,&morphTargetCache](core::smart_refctd_ptr&& collection)->core::smart_refctd_ptr @@ -320,7 +248,7 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: auto targets = createMorphTargets(std::move(collection)); if (!targets) { - _params.logger.log("Failed to create ICPUMorphTargets for Shape with id %s",system::ILogger::ELL_ERROR,shape->id.c_str()); + _params.logger.log("Failed to create ICPUMorphTargets for Shape with id %s",LoggerError,shape->id.c_str()); return; } const auto index = instances.size(); @@ -360,7 +288,7 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: auto collection = core::make_smart_refctd_ptr(); if (!collection) { - _params.logger.log("Failed to create an ICPUGeometryCollection non-Instanced Shape with id %s",system::ILogger::ELL_ERROR,shapedef->id.c_str()); + _params.logger.log("Failed to create an ICPUGeometryCollection non-Instanced Shape with id %s",LoggerError,shapedef->id.c_str()); continue; } // we don't put a transform on the geometry, because we want the transform on the instance @@ -406,31 +334,14 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: if(emitter.element->type == ext::MitsubaLoader::CElementEmitter::Type::ENVMAP) { const auto& envmap = emitter.element->envmap; -#if 0 - SAssetBundle envmapImageBundle = interm_getAssetInHierarchy(m_assetMgr,envmap.filename,ctx.inner.params,_hierarchyLevel,ctx.override_); - auto contentRange = envmapImageBundle.getContents(); + SAssetBundle envmapBundle = interm_getImageViewInHierarchy(envmap.filename,ctx.inner.params,_hierarchyLevel,ctx.override_); + auto contentRange = envmapBundle.getContents(); if (contentRange.empty()) { - os::Printer::log(std::string("[ERROR] Could Not Find Envmap Image: ") + envfilename, ELL_ERROR); + _params.logger.log("Could not load Envnmap image from path: %s",LoggerError,envmap.filename); continue; } - core::smart_refctd_ptr view = {}; - switch(envmapImageBundle.getAssetType()) - { - case asset::IAsset::ET_IMAGE: - { - // TODO: create image view - } - [[fallthrough]]; - case asset::IAsset::ET_IMAGE_VIEW: - view = core::smart_refctd_ptr_static_cast(*contentRange.begin()); - break; - default: - os::Printer::log("[ERROR] Loaded an Asset for the Envmap but it wasn't an image, was E_ASSET_TYPE " + std::to_string(envmapImageBundle.getAssetType()), ELL_ERROR); - break; - } - ctx.scene->addEnvLight(ICPUScene::EEnvLightType::SphereMap,std::move(view)); -#endif + ctx.scene->addEnvLight(ICPUScene::EEnvLightType::SphereMap,core::smart_refctd_ptr_static_cast(contentRange[0])); } } @@ -448,7 +359,7 @@ void CMitsubaLoader::cacheEmissionProfile(SContext& ctx, const CElementEmissionP auto params = ctx.inner.params; params.loaderFlags = asset::IAssetLoader::ELPF_LOAD_METADATA_ONLY; - auto assetLoaded = interm_getAssetInHierarchy(m_assetMgr, profile->filename, params, 0u, ctx.override_); + auto assetLoaded = interm_getAssetInHierarchy( profile->filename, params, 0u, ctx.override_); if (!assetLoaded.getMetadata()) { @@ -468,109 +379,25 @@ void CMitsubaLoader::cacheTexture(SContext& ctx, uint32_t hierarchyLevel, const { // get sampler parameters const auto samplerParams = ctx.computeSamplerParameters(tex->bitmap); - - // search the cache for the imageview - const auto cacheKey = ctx.imageViewCacheKey(tex->bitmap,semantic); - const asset::IAsset::E_TYPE types[]{asset::IAsset::ET_IMAGE_VIEW,asset::IAsset::ET_TERMINATING_ZERO}; - // could not find view in the cache - if (ctx.override_->findCachedAsset(cacheKey,types,ctx.inner,hierarchyLevel).getContents().empty()) + + asset::SAssetBundle viewBundle = interm_getImageViewInHierarchy(tex->bitmap.filename.svalue,ctx.inner,hierarchyLevel,ctx.override_); + // TODO: embed the gamma in the material compiler Frontend + // adjust gamma on pixels (painful and long process) + if (!std::isnan(tex->bitmap.gamma)) { - ICPUImageView::SCreationParams viewParams = {}; - // find or restore image from cache - { - auto loadParams = ctx.inner.params; - // always restore, the only reason we haven't found a view is because either the image wasnt loaded yet, or its going to be processed with channel extraction or derivative mapping - const uint32_t restoreLevels = semantic==CMitsubaMaterialCompilerFrontend::EIVS_IDENTITIY&&tex->bitmap.channel==CElementTexture::Bitmap::CHANNEL::INVALID ? 0u:2u; // all the way to the buffer providing the pixels - loadParams.restoreLevels = std::max(loadParams.restoreLevels,hierarchyLevel+restoreLevels); - // load using the actual filename, not the cache key - asset::SAssetBundle bundle = interm_getAssetInHierarchy(m_assetMgr,tex->bitmap.filename.svalue,loadParams,hierarchyLevel,ctx.override_); - - // check if found - auto contentRange = bundle.getContents(); - if (contentRange.empty()) - { - os::Printer::log("[ERROR] Could Not Find Texture: "+cacheKey,ELL_ERROR); - return; - } - auto asset = contentRange.begin()[0]; - if (asset->getAssetType()!=asset::IAsset::ET_IMAGE) - { - os::Printer::log("[ERROR] Loaded an Asset but it wasn't a texture, was E_ASSET_TYPE "+std::to_string(asset->getAssetType()),ELL_ERROR); - return; - } - - viewParams.image = core::smart_refctd_ptr_static_cast(asset); - } - // adjust gamma on pixels (painful and long process) - if (!std::isnan(tex->bitmap.gamma)) - { - _NBL_DEBUG_BREAK_IF(true); // TODO : use an image filter (unify with the below maybe?)! - } - switch (semantic) - { - case CMitsubaMaterialCompilerFrontend::EIVS_IDENTITIY: - case CMitsubaMaterialCompilerFrontend::EIVS_BLEND_WEIGHT: - { - switch (tex->bitmap.channel) - { - // no GL_R8_SRGB support yet - case CElementTexture::Bitmap::CHANNEL::R: - viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_R); - break; - case CElementTexture::Bitmap::CHANNEL::G: - viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_G); - break; - case CElementTexture::Bitmap::CHANNEL::B: - viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_B); - break; - case CElementTexture::Bitmap::CHANNEL::A: - viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_A); - break; - /* special conversions needed to CIE space - case CElementTexture::Bitmap::CHANNEL::X: - case CElementTexture::Bitmap::CHANNEL::Y: - case CElementTexture::Bitmap::CHANNEL::Z:*/ - case CElementTexture::Bitmap::CHANNEL::INVALID: - [[fallthrough]]; - default: - if (semantic==CMitsubaMaterialCompilerFrontend::EIVS_BLEND_WEIGHT && asset::getFormatChannelCount(viewParams.image->getCreationParameters().format)<3u) - viewParams.image = createSingleChannelImage(viewParams.image.get(),asset::ICPUImageView::SComponentMapping::ES_IDENTITY); - break; - } - } - break; - case CMitsubaMaterialCompilerFrontend::EIVS_NORMAL_MAP: - viewParams.image = createDerivMap(ctx,viewParams.image.get(),samplerParams,true); - break; - case CMitsubaMaterialCompilerFrontend::EIVS_BUMP_MAP: - viewParams.image = createDerivMap(ctx,viewParams.image.get(),samplerParams,false); - break; - default: - _NBL_DEBUG_BREAK_IF(true); - assert(false); - break; - } - // get rest of view params and insert into cache - { - viewParams.flags = static_cast(0); - viewParams.viewType = IImageView::ET_2D; - viewParams.format = viewParams.image->getCreationParameters().format; - viewParams.subresourceRange.aspectMask = static_cast(0); - viewParams.subresourceRange.levelCount = viewParams.image->getCreationParameters().mipLevels; - viewParams.subresourceRange.layerCount = 1u; - //! TODO: this stuff (custom shader sampling code?) - _NBL_DEBUG_BREAK_IF(tex->bitmap.uoffset != 0.f); - _NBL_DEBUG_BREAK_IF(tex->bitmap.voffset != 0.f); - _NBL_DEBUG_BREAK_IF(tex->bitmap.uscale != 1.f); - _NBL_DEBUG_BREAK_IF(tex->bitmap.vscale != 1.f); - - asset::SAssetBundle viewBundle(nullptr,{ICPUImageView::create(std::move(viewParams))}); - ctx.override_->insertAssetIntoCache(std::move(viewBundle),cacheKey,ctx.inner,hierarchyLevel); - } + _NBL_DEBUG_BREAK_IF(true); + } + { + //! TODO: this stuff (custom shader sampling code?) + _NBL_DEBUG_BREAK_IF(tex->bitmap.uoffset != 0.f); + _NBL_DEBUG_BREAK_IF(tex->bitmap.voffset != 0.f); + _NBL_DEBUG_BREAK_IF(tex->bitmap.uscale != 1.f); + _NBL_DEBUG_BREAK_IF(tex->bitmap.vscale != 1.f); } } break; case CElementTexture::Type::SCALE: + // get to to the linked list end cacheTexture(ctx,hierarchyLevel,tex->scale.texture,semantic); break; default: @@ -910,7 +737,7 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape assert(filename.type==ext::MitsubaLoader::SPropertyElementData::Type::STRING); auto loadParams = ctx.inner.params; loadParams.loaderFlags = static_cast(loadParams.loaderFlags | IAssetLoader::ELPF_RIGHT_HANDED_MESHES); - auto retval = interm_getAssetInHierarchy(m_assetMgr, filename.svalue, loadParams, hierarchyLevel/*+ICPUScene::MESH_HIERARCHY_LEVELS_BELOW*/, ctx.override_); + auto retval = interm_getAssetInHierarchy( filename.svalue, loadParams, hierarchyLevel/*+ICPUScene::MESH_HIERARCHY_LEVELS_BELOW*/, ctx.override_); if (retval.getContents().empty()) { os::Printer::log(std::string("[ERROR] Could Not Find Mesh: ") + filename.svalue, ELL_ERROR); From 68f5a4937ad62652f39b3486fa2f2e8b17a00c85 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 18 Feb 2026 02:42:28 +0300 Subject: [PATCH 472/472] fixes after merge, update examples submodule --- examples_tests | 2 +- include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h | 6 ++++++ src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp | 8 ++------ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/examples_tests b/examples_tests index 3e39f036cd..2b034eb4a7 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 3e39f036cda70bc7a8e4dccdfe99d59a60b0a263 +Subproject commit 2b034eb4a796e043d882e9e6335070466e7a871f diff --git a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h index 149873d9c1..2046e5b592 100644 --- a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h +++ b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h @@ -29,6 +29,12 @@ struct ProtoPipeline final ); core::smart_refctd_ptr m_vxShader; + + constexpr static inline asset::SRasterizationParams DefaultRasterParams = { + .faceCullingMode = asset::EFCM_NONE, + .depthWriteEnable = false, + .depthCompareOp = asset::ECO_ALWAYS + }; }; bool recordDrawCall(video::IGPUCommandBuffer* commandBuffer); diff --git a/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp b/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp index fd5411c2ab..a825c580a9 100644 --- a/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp +++ b/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp @@ -84,6 +84,7 @@ smart_refctd_ptr ProtoPipeline::createPipeline( const IGPURenderpass* renderpass, const uint32_t subpassIx, SBlendParams blendParams, + asset::SRasterizationParams rasterizationParams, const hlsl::SurfaceTransform::FLAG_BITS swapchainTransform) { if (!renderpass || !bool(*this) || hlsl::bitCount(swapchainTransform) != 1) @@ -93,11 +94,6 @@ smart_refctd_ptr ProtoPipeline::createPipeline( smart_refctd_ptr m_retval; { - constexpr SRasterizationParams defaultRasterParams = { - .faceCullingMode = EFCM_NONE, - .depthWriteEnable = false, - .depthCompareOp = ECO_ALWAYS - }; const auto orientationAsUint32 = static_cast(swapchainTransform); IGPUPipelineBase::SShaderEntryMap specConstants; @@ -110,7 +106,7 @@ smart_refctd_ptr ProtoPipeline::createPipeline( params[0].cached = { .vertexInput = {}, // The Full Screen Triangle doesn't use any HW vertex input state .primitiveAssembly = {}, - .rasterization = defaultRasterParams, + .rasterization = rasterizationParams, .blend = blendParams, .subpassIx = subpassIx };