From cdcc9adb4474616beb487504bd24862597b1c722 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 21 Mar 2025 15:48:22 -0300 Subject: [PATCH 01/28] Initial commit --- include/nbl/builtin/hlsl/math/morton.hlsl | 36 +++++++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 37 insertions(+) create mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl new file mode 100644 index 0000000000..22c56f8999 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -0,0 +1,36 @@ +#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +template) +struct code +{ + using this_t = code; + using U = make_unsigned; + + static this_t create(vector cartesian) + { + //... TODO ... + return this_t(); + } + + //operator+, operator-, operator>>, operator<<, and other bitwise ops + + U value; +}; + +} //namespace morton +} //namespace hlsl +} //namespace nbl + + + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 291ee64bad..14e5fe67db 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -289,6 +289,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl" LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/morton.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") From 5fe6c0837ff53d156b9fc0500f3899c6c1c546c6 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Sun, 23 Mar 2025 19:30:10 -0300 Subject: [PATCH 02/28] CHeckpoint before master merge --- examples_tests | 2 +- include/nbl/builtin/hlsl/math/morton.hlsl | 54 ++++++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 91dc3afe4c..f2ea51d0b3 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 91dc3afe4c66e5bdfd313ec37e7e1863daa52116 +Subproject commit f2ea51d0b3e3388c0f9bae03602ec3b1f658c124 diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 22c56f8999..bf339f4d6f 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -10,12 +10,64 @@ namespace hlsl namespace morton { -template) +namespace impl +{ + +template +struct decode_mask; + +template +struct decode_mask : integral_constant {}; + +template +struct decode_mask : integral_constant::value << Dim) | T(1)> {}; + +template +NBL_CONSTEXPR T decode_mask_v = decode_mask::value; + +// Compile-time still a bit primitive in HLSL, we can support arbitrary-dimensional morton codes in C++ but HLSL's have to be hand coded +template +struct decode_masks_array; + +#ifndef __HLSL_VERSION + +template +struct decode_masks_array +{ + static consteval vector generateMasks() + { + vector masks; + for (auto i = 0u; i < Dim; i++) + { + masks[i] = decode_mask_v << T(i); + } + return masks; + } + + NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); +}; + +#else +template +struct decode_masks_array +{ + NBL_CONSTEXPR_STATIC_INLINE vector Masks = vector(decode_mask_v, decode_mask_v << T(1)); +}; +//template +//NBL_CONSTEXPR_STATIC_INLINE vector decode_masks_array::Masks = vector(decode_mask_v, decode_mask_v << T(1)); +#endif + +} //namespace impl + + +template && 1 < D && D < 5) struct code { using this_t = code; using U = make_unsigned; + + static this_t create(vector cartesian) { //... TODO ... From f18b2fa2925cd7f5c5cc94a808cc518b0bd9baaa Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 17:21:37 -0300 Subject: [PATCH 03/28] Checkpoint before merging new type_traits change --- include/nbl/builtin/hlsl/math/morton.hlsl | 56 +++++++++++++++-------- include/nbl/builtin/hlsl/type_traits.hlsl | 6 +++ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index bf339f4d6f..22081e2b7f 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -13,7 +13,7 @@ namespace morton namespace impl { -template +template struct decode_mask; template @@ -22,15 +22,11 @@ struct decode_mask : integral_constant {}; template struct decode_mask : integral_constant::value << Dim) | T(1)> {}; +#ifndef __HLSL_VERSION + template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -// Compile-time still a bit primitive in HLSL, we can support arbitrary-dimensional morton codes in C++ but HLSL's have to be hand coded -template -struct decode_masks_array; - -#ifndef __HLSL_VERSION - template struct decode_masks_array { @@ -47,31 +43,50 @@ struct decode_masks_array NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); }; -#else -template -struct decode_masks_array -{ - NBL_CONSTEXPR_STATIC_INLINE vector Masks = vector(decode_mask_v, decode_mask_v << T(1)); -}; -//template -//NBL_CONSTEXPR_STATIC_INLINE vector decode_masks_array::Masks = vector(decode_mask_v, decode_mask_v << T(1)); +template +NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; + #endif } //namespace impl +// HLSL only supports up to D = 4, and even then having this in a more generic manner is blocked by a DXC issue targeting SPIR-V +#ifndef __HLSL_VERSION + +#define NBL_HLSL_MORTON_MASKS(U, D) impl::decode_masks< U , D > + +#else + +// Up to D = 4 supported +#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask< U , D >::value,\ + impl::decode_mask< U , D >::value << U (1),\ + impl::decode_mask< U , D >::value << U (2),\ + impl::decode_mask< U , D >::value << U (3)\ + ) +#endif + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it template && 1 < D && D < 5) struct code { using this_t = code; - using U = make_unsigned; - + using U = make_unsigned_t; +#ifdef __HLSL_VERSION + _Static_assert(is_same_v, + "make_signed requires that T shall be a (possibly cv-qualified) " + "integral type or enumeration but not a bool type."); +#endif static this_t create(vector cartesian) { - //... TODO ... - return this_t(); + NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(I, D); + printf("%d %d %d %d", Masks[0], Masks[1], Masks[2], Masks[3]); + this_t foo; + foo.value = U(0); + return foo; } //operator+, operator-, operator>>, operator<<, and other bitwise ops @@ -79,6 +94,9 @@ struct code U value; }; +// Don't forget to delete this macro after usage +#undef NBL_HLSL_MORTON_MASKS + } //namespace morton } //namespace hlsl } //namespace nbl diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 708f643ab0..222dbcdb7c 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -688,6 +688,12 @@ NBL_CONSTEXPR uint64_t extent_v = extent::value; template using make_void_t = typename make_void::type; +template +using make_signed_t = typename make_signed::type; + +template +using make_unsigned_t = typename make_unsigned::type; + template struct conditional_value { From 4ebc555d320cc3e678095d72437e07721dc1441b Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 19:18:49 -0300 Subject: [PATCH 04/28] Works, but throws DXC warning --- include/nbl/builtin/hlsl/math/morton.hlsl | 25 +++++++++-------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 22081e2b7f..058bdad862 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -22,11 +22,11 @@ struct decode_mask : integral_constant {}; template struct decode_mask : integral_constant::value << Dim) | T(1)> {}; -#ifndef __HLSL_VERSION - template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +#ifndef __HLSL_VERSION + template struct decode_masks_array { @@ -58,10 +58,11 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; #else // Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask< U , D >::value,\ - impl::decode_mask< U , D >::value << U (1),\ - impl::decode_mask< U , D >::value << U (2),\ - impl::decode_mask< U , D >::value << U (3)\ +// This will throw a DXC warning about the vector being truncated - no way around that +#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask_v< U , D >,\ + impl::decode_mask_v< U , D > << U (1),\ + impl::decode_mask_v< U , D > << U (2),\ + impl::decode_mask_v< U , D > << U (3)\ ) #endif @@ -74,18 +75,12 @@ struct code using this_t = code; using U = make_unsigned_t; -#ifdef __HLSL_VERSION - _Static_assert(is_same_v, - "make_signed requires that T shall be a (possibly cv-qualified) " - "integral type or enumeration but not a bool type."); -#endif - static this_t create(vector cartesian) { - NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(I, D); - printf("%d %d %d %d", Masks[0], Masks[1], Masks[2], Masks[3]); + NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + printf("%u %u %u %u", Masks[0], Masks[1], Masks[2]); this_t foo; - foo.value = U(0); + foo.value = Masks[0]; return foo; } From 55a2ef637ca12c6c35b6f8001db6f619acfc2315 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 19:41:14 -0300 Subject: [PATCH 05/28] Added concept for valid morton dimensions --- include/nbl/builtin/hlsl/math/morton.hlsl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 058bdad862..99980284e9 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -13,6 +13,19 @@ namespace morton namespace impl { +// Valid dimension for a morton code +#ifndef __HLSL_VERSION + +template +NBL_BOOL_CONCEPT MortonDimension = D > 1; + +#else + +template +NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; + +#endif + template struct decode_mask; @@ -69,7 +82,7 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && 1 < D && D < 5) +template && impl::MortonDimension) struct code { using this_t = code; @@ -78,7 +91,6 @@ struct code static this_t create(vector cartesian) { NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - printf("%u %u %u %u", Masks[0], Masks[1], Masks[2]); this_t foo; foo.value = Masks[0]; return foo; From f5162561ee2203aa51c8c600aed225d679c9408d Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 21:28:07 -0300 Subject: [PATCH 06/28] Creation from vector working as intended --- include/nbl/builtin/hlsl/math/morton.hlsl | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 99980284e9..aab8511b95 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ #include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" namespace nbl { @@ -88,12 +89,22 @@ struct code using this_t = code; using U = make_unsigned_t; - static this_t create(vector cartesian) + static this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t foo; - foo.value = Masks[0]; - return foo; + NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + const vector unsignedCartesian = bit_cast, vector >(cartesian); + U val = U(0); + [[unroll]] + // We want to interleave the bits of each number in `unsignedCartesian`. We do this by enumerating + // val[0] = bit 0 of unsignedCartesian[0], val[1] = bit 0 of unsignedCartesian[1], ..., val[D-1] = bit 0 of unsignedCartesian[D-1], + // val[D] = bit 1 of unsignedCartesian[0], val[D+1] = bit 1 of unsignedCartesian[1], ..., val[2D-1] = bit 1 of unsignedCartesian[D-1] + // and so on until we get val[BitDwidth - 1] and stop. + for (U i = U(0); i < BitWidth; i++) + { + val |= (unsignedCartesian[i % D] & (U(1) << (i / D))) << (i - (i / D)); + } + this_t retVal = {val}; + return retVal; } //operator+, operator-, operator>>, operator<<, and other bitwise ops From 534d81bfc2ab1136d959a41ecee521990115d7bb Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 26 Mar 2025 13:05:20 -0300 Subject: [PATCH 07/28] Added some extra macro specifiers, vector truncation with no warnings on HLSL side by specializing , a bunch of morton operators --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 8 +- .../nbl/builtin/hlsl/cpp_compat/vector.hlsl | 30 +++ include/nbl/builtin/hlsl/math/morton.hlsl | 181 ++++++++++++++++-- 3 files changed, 198 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 3802bd69ea..a93727815b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -40,8 +40,11 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline +#define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr +#define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline +#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_FUNC +#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_STATIC #define NBL_CONST_MEMBER_FUNC const namespace nbl::hlsl @@ -70,8 +73,11 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static +#define NBL_CONSTEXPR_STATIC_FUNC static #define NBL_CONSTEXPR_INLINE_FUNC inline +#define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline +#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_CONSTEXPR_STATIC_INLINE_FUNC #define NBL_CONST_MEMBER_FUNC namespace nbl diff --git a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl index 354937427a..f6ced52db1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl @@ -1,6 +1,8 @@ #ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat/basic.h" + // stuff for C++ #ifndef __HLSL_VERSION #include @@ -92,4 +94,32 @@ struct blake3_hasher::update_impl,Dummy> } #endif } + +// To prevent implicit truncation warnings +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + +template +struct static_cast_helper, vector > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) + { + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < N; i++) + { + retVal[i] = val[i]; + } + return retVal; + } +}; + +} +} +} + #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index aab8511b95..ecd94ce69e 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -3,6 +3,8 @@ #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" namespace nbl { @@ -15,18 +17,9 @@ namespace impl { // Valid dimension for a morton code -#ifndef __HLSL_VERSION - -template -NBL_BOOL_CONCEPT MortonDimension = D > 1; - -#else - template NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; -#endif - template struct decode_mask; @@ -73,11 +66,12 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; // Up to D = 4 supported // This will throw a DXC warning about the vector being truncated - no way around that -#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask_v< U , D >,\ +// The only way to avoid this atm (until they fix issue 7006 below) is to wrap the whole class in a macro and expand it for each possible value of `D` +#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ impl::decode_mask_v< U , D > << U (1),\ impl::decode_mask_v< U , D > << U (2),\ impl::decode_mask_v< U , D > << U (3)\ - ) + )) #endif @@ -88,25 +82,134 @@ struct code { using this_t = code; using U = make_unsigned_t; + NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION - static this_t create(NBL_CONST_REF_ARG(vector) cartesian) + code() = default; + + // To immediately get compound operators and functional structs in CPP side + code(const I _value) : value(bit_cast(_value)){} + + #endif + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); const vector unsignedCartesian = bit_cast, vector >(cartesian); U val = U(0); + [[unroll]] - // We want to interleave the bits of each number in `unsignedCartesian`. We do this by enumerating - // val[0] = bit 0 of unsignedCartesian[0], val[1] = bit 0 of unsignedCartesian[1], ..., val[D-1] = bit 0 of unsignedCartesian[D-1], - // val[D] = bit 1 of unsignedCartesian[0], val[D+1] = bit 1 of unsignedCartesian[1], ..., val[2D-1] = bit 1 of unsignedCartesian[D-1] - // and so on until we get val[BitDwidth - 1] and stop. - for (U i = U(0); i < BitWidth; i++) + for (U dim = 0; dim < U(D); dim++) { - val |= (unsignedCartesian[i % D] & (U(1) << (i / D))) << (i - (i / D)); + [[unroll]] + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer + for (U valBit = dim, coordBit = U(1), shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= 1, shift += U(D) - 1) + { + val |= (unsignedCartesian[dim] & coordBit) << shift; + } } - this_t retVal = {val}; + + this_t retVal; + retVal.value = val; return retVal; } + // CPP can also have a constructor + #ifndef __HLSL_VERSION + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + code(NBL_CONST_REF_ARG(vector) cartesian) + { + *this = create(cartesian); + } + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + explicit operator vector() const noexcept + { + // Definition below, we override `impl::static_cast_helper` to have this conversion in both CPP/HLSL + return _static_cast, this_t>(*this); + } + + #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value << bits; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value >> bits; + return retVal; + } + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC + { + this_t allOnes; + // allOnes encodes a cartesian coordinate with all values set to 1 + allOnes.value = (U(1) << D) - U(1); + // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 + return operator~() + allOnes; + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + + //operator+, operator-, operator>>, operator<<, and other bitwise ops U value; @@ -116,6 +219,44 @@ struct code #undef NBL_HLSL_MORTON_MASKS } //namespace morton + +namespace impl +{ + +template +struct static_cast_helper, morton::code > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) + { + using U = typename morton::code::U; + NBL_CONSTEXPR_STATIC U BitWidth = morton::code::BitWidth; + // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits + // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift + // at the end to preserve sign. + // To this end, we first notice that the coordinate/dimension of index `dim` gets + // `bits(dim) = ceil((BitWidth - dim)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not + // divide `BitWidth perfectly`). + // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones + // by shifting everything `BitWidth - bits(dim)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. + + vector cartesian; + for (U dim = 0; dim < U(D); dim++) + { + const U bitsDim = (BitWidth - dim + U(D) - 1) / U(D); // <- this computes the ceil + U coordVal = U(0); + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer + for (U valBit = dim, coordBit = U(1) << dim, shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= U(D), shift += U(D) - 1) + { + coordVal |= (val.value & coordBit) << (BitWidth - bitsDim - shift); + } + cartesian[dim] = (bit_cast(coordVal) >> (BitWidth - bitsDim)); + } + return cartesian; + } +}; + +} // namespace impl + } //namespace hlsl } //namespace nbl From 625639031599374d44e8f8a6a79570471f0f4a9c Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 26 Mar 2025 14:53:42 -0300 Subject: [PATCH 08/28] Add safe copile-time vector truncation and some function specifiers for both cpp and hlsl --- include/nbl/builtin/hlsl/cpp_compat.hlsl | 3 + include/nbl/builtin/hlsl/cpp_compat/basic.h | 66 +++++++++---------- .../hlsl/cpp_compat/impl/vector_impl.hlsl | 35 ++++++++++ .../nbl/builtin/hlsl/cpp_compat/vector.hlsl | 30 --------- include/nbl/builtin/hlsl/math/morton.hlsl | 34 ---------- src/nbl/builtin/CMakeLists.txt | 1 + 6 files changed, 72 insertions(+), 97 deletions(-) create mode 100644 include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index 175a3e76c1..cb06447aa1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -6,4 +6,7 @@ #include #include +// Had to push some stuff here to avoid circular dependencies +#include + #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index a93727815b..41e920e41e 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -2,35 +2,7 @@ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_BASIC_INCLUDED_ #include - -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -template -struct static_cast_helper -{ - static inline To cast(From u) - { -#ifndef __HLSL_VERSION - return static_cast(u); -#else - return To(u); -#endif - } -}; -} - -template -inline To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - -} -} +#include #ifndef __HLSL_VERSION #include @@ -43,8 +15,7 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline -#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_FUNC -#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_STATIC +#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const namespace nbl::hlsl @@ -68,6 +39,7 @@ namespace nbl::hlsl #else + #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC @@ -77,8 +49,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline -#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_CONSTEXPR_STATIC_INLINE_FUNC -#define NBL_CONST_MEMBER_FUNC +#define NBL_CONST_MEMBER_FUNC namespace nbl { @@ -106,4 +77,33 @@ struct add_pointer #endif +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } +}; +} + +template +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +{ +return impl::static_cast_helper::cast(v); +} + +} +} + #endif diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl new file mode 100644 index 0000000000..524d1fa45e --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl @@ -0,0 +1,35 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ + +#include +#include +#include + +// To prevent implicit truncation warnings +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + +template NBL_PARTIAL_REQ_TOP(N <= M) +struct static_cast_helper, vector NBL_PARTIAL_REQ_BOT(N <= M) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) + { + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < N; i++) + { + retVal[i] = val[i]; + } + return retVal; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl index f6ced52db1..354937427a 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl @@ -1,8 +1,6 @@ #ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ -#include "nbl/builtin/hlsl/cpp_compat/basic.h" - // stuff for C++ #ifndef __HLSL_VERSION #include @@ -94,32 +92,4 @@ struct blake3_hasher::update_impl,Dummy> } #endif } - -// To prevent implicit truncation warnings -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ - -template -struct static_cast_helper, vector > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) - { - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < N; i++) - { - retVal[i] = val[i]; - } - return retVal; - } -}; - -} -} -} - #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index ecd94ce69e..50cf78caae 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -32,49 +32,15 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -#ifndef __HLSL_VERSION - -template -struct decode_masks_array -{ - static consteval vector generateMasks() - { - vector masks; - for (auto i = 0u; i < Dim; i++) - { - masks[i] = decode_mask_v << T(i); - } - return masks; - } - - NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); -}; - -template -NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; - -#endif - } //namespace impl -// HLSL only supports up to D = 4, and even then having this in a more generic manner is blocked by a DXC issue targeting SPIR-V -#ifndef __HLSL_VERSION - -#define NBL_HLSL_MORTON_MASKS(U, D) impl::decode_masks< U , D > - -#else - // Up to D = 4 supported -// This will throw a DXC warning about the vector being truncated - no way around that -// The only way to avoid this atm (until they fix issue 7006 below) is to wrap the whole class in a macro and expand it for each possible value of `D` #define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ impl::decode_mask_v< U , D > << U (1),\ impl::decode_mask_v< U , D > << U (2),\ impl::decode_mask_v< U , D > << U (3)\ )) -#endif - // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it template && impl::MortonDimension) diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 2e68d1fdf7..fa548e210a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -248,6 +248,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/vector_impl.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") From 246cefc422e8ef7b36cd22c90a1f695d643c3b45 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:44:44 -0300 Subject: [PATCH 09/28] Morton class done! --- include/nbl/builtin/hlsl/math/morton.hlsl | 241 +++++++++++++++++++--- 1 file changed, 215 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 50cf78caae..dfe53c3446 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -68,18 +68,17 @@ struct code */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); const vector unsignedCartesian = bit_cast, vector >(cartesian); U val = U(0); [[unroll]] - for (U dim = 0; dim < U(D); dim++) + for (U coord = 0; coord < U(D); coord++) { [[unroll]] - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer - for (U valBit = dim, coordBit = U(1), shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= 1, shift += U(D) - 1) + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer + for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) { - val |= (unsignedCartesian[dim] & coordBit) << shift; + val |= (unsignedCartesian[coord] & coordBit) << shift; } } @@ -112,6 +111,68 @@ struct code #endif + // --------------------------------------------------------- AUX METHODS ------------------------------------------------------------------- + + /** + * @brief Extracts a single coordinate + * + * @param [in] coord The coordinate to extract + */ + NBL_CONSTEXPR_INLINE_FUNC I getCoordinate(uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits + // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift + // at the end to preserve sign. + // To this end, we first notice that the coordinate of index `coord` gets + // `bits(coord) = ceil((BitWidth - coord)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not + // divide `BitWidth perfectly`). + // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones + // by shifting everything `BitWidth - bits(coord)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. + + const U bitsCoord = BitWidth / U(D) + ((coord < BitWidth % D) ? U(1) : U(0)); // <- this computes the ceil + U coordVal = U(0); + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer + [[unroll]] + for (U valBitIdx = U(coord), coordBit = U(1) << U(coord), shift = U(coord); valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= U(D), shift += U(D) - 1) + { + coordVal |= (value & coordBit) << (BitWidth - bitsCoord - shift); + } + return bit_cast(coordVal) >> (BitWidth - bitsCoord); + } + + /** + * @brief Returns an element of type U with the highest bit of the number encoded in `coord` set to its right value, and all other bits set to 0 + * + * @param [in] coord The coordinate whose highest bit we want to get + */ + /* + NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); + // This is the index of that bit as an index in the encoded value + const U shift = coordHighestBitIdx * U(D) + U(coord); + return value & (U(1) << shift); + } + */ + + /** + * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord + * (for each value) has its highest bit set to 1. + * + * @param [in] coord The coordinate whose highest bit we want to get + */ + NBL_CONSTEXPR_INLINE_FUNC U logicalOrHighestBits(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); + // This is the index of that bit as an index in the encoded value + const U shift = coordHighestBitIdx * U(D) + U(coord); + return (value | rhs.value) & (U(1) << shift); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -174,9 +235,153 @@ struct code // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + this_t retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + { + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + retVal.value |= ((value | (~Masks[coord])) + (rhs.value & Masks[coord])) & Masks[coord]; + } + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + this_t retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + { + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + retVal.value |= ((value & Masks[coord]) - (rhs.value & Masks[coord])) & Masks[coord]; + } + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC bool operator!() NBL_CONST_MEMBER_FUNC + { + return value.operator!(); + } - //operator+, operator-, operator>>, operator<<, and other bitwise ops + NBL_CONSTEXPR_INLINE_FUNC bool coordEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + return (value & Masks[coord]) == (rhs.value & Masks[coord]); + } + + NBL_CONSTEXPR_INLINE_FUNC vector operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + vector retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + retVal[coord] = coordEquals(rhs, coord); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC bool allEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordNotEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return !coordEquals(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC vector operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + vector retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + retVal[coord] = coordNotEquals(rhs, coord); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return ! allEqual(rhs); + } + + + + template + NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + Comparison comparison; + OppositeComparison oppositeComparison; + + // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison + #ifndef __HLSL_VERSION + if constexpr (is_unsigned_v) + #else + if (is_unsigned_v) + #endif + { + return comparison(value & Masks[coord], rhs.value & Masks[coord]); + } + // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit + else + { + // I will give an example for the case of `Comparison` being `functional::less`, but other cases are similar + // If both are negative (both bits set to 1) then `x < y` iff `z > w` when `z,w` are the bit representations of `x,y` as unsigned + // If this is nonnegative and rhs is negative, it should return false. Since in this case `highestBit = 0` and `rhsHighestBit = 1` this + // is the same as doing `z > w` again + // If this is negative and rhs is nonnegative, it should return true. But in this case we have `highestBit = 1` and `rhsHighestBit = 0` + // so again we can just return `z > w`. + // All three cases end up in the same expression. + if (logicalOrHighestBits(rhs, coord)) + return oppositeComparison(value & Masks[coord], rhs.value & Masks[coord]); + // If neither of them have their highest bit set, both are nonnegative. Therefore, we can return the unsigned comparison + else + return comparison(value & Masks[coord], rhs.value & Masks[coord]); + } + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, greater >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, greater_equal >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, less >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, less_equal >(rhs, coord); + } + + #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ + { \ + vector retVal; \ + [[unroll]] \ + for (uint16_t coord = 0; coord < D; coord++) \ + retVal[coord] = COMPARISON (rhs, coord); \ + return retVal; \ + } + + DEFINE_OPERATOR(< , coordLessThan); + DEFINE_OPERATOR(<= , coordLessThanEquals); + DEFINE_OPERATOR(> , coordGreaterThan); + DEFINE_OPERATOR(>= , coordGreaterThanEquals); U value; }; @@ -186,6 +391,7 @@ struct code } //namespace morton +// Still in nbl::hlsl we can go to nbl::hlsl::impl and specialize the `static_cast_helper` namespace impl { @@ -194,28 +400,11 @@ struct static_cast_helper, morton::code > { NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) { - using U = typename morton::code::U; - NBL_CONSTEXPR_STATIC U BitWidth = morton::code::BitWidth; - // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits - // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift - // at the end to preserve sign. - // To this end, we first notice that the coordinate/dimension of index `dim` gets - // `bits(dim) = ceil((BitWidth - dim)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not - // divide `BitWidth perfectly`). - // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones - // by shifting everything `BitWidth - bits(dim)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. - vector cartesian; - for (U dim = 0; dim < U(D); dim++) + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) { - const U bitsDim = (BitWidth - dim + U(D) - 1) / U(D); // <- this computes the ceil - U coordVal = U(0); - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer - for (U valBit = dim, coordBit = U(1) << dim, shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= U(D), shift += U(D) - 1) - { - coordVal |= (val.value & coordBit) << (BitWidth - bitsDim - shift); - } - cartesian[dim] = (bit_cast(coordVal) >> (BitWidth - bitsDim)); + cartesian[coord] = val.getCoordinate(coord); } return cartesian; } From 1c7f7911e416c8ec42ba3055b9da9a9da900d23f Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:48:35 -0300 Subject: [PATCH 10/28] Remove some leftover commented code --- include/nbl/builtin/hlsl/math/morton.hlsl | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index dfe53c3446..153ec08bf0 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -140,23 +140,6 @@ struct code return bit_cast(coordVal) >> (BitWidth - bitsCoord); } - /** - * @brief Returns an element of type U with the highest bit of the number encoded in `coord` set to its right value, and all other bits set to 0 - * - * @param [in] coord The coordinate whose highest bit we want to get - */ - /* - NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); - // This is the index of that bit as an index in the encoded value - const U shift = coordHighestBitIdx * U(D) + U(coord); - return value & (U(1) << shift); - } - */ - /** * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord * (for each value) has its highest bit set to 1. From 508879948064ff01c05a9e1f2166d2261c17697f Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:56:57 -0300 Subject: [PATCH 11/28] Remove leaking macro --- include/nbl/builtin/hlsl/math/morton.hlsl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 153ec08bf0..4dc05738b6 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -366,6 +366,8 @@ struct code DEFINE_OPERATOR(> , coordGreaterThan); DEFINE_OPERATOR(>= , coordGreaterThanEquals); + #undef DEFINE_OPERATOR + U value; }; From e25a35cce8f0554baf98173f9cc1d1dd93629042 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 28 Mar 2025 20:16:00 -0300 Subject: [PATCH 12/28] Bugfixes with arithmetic --- include/nbl/builtin/hlsl/math/morton.hlsl | 108 +++++++++++++--------- 1 file changed, 63 insertions(+), 45 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 4dc05738b6..89d1a99749 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -57,7 +57,7 @@ struct code code() = default; // To immediately get compound operators and functional structs in CPP side - code(const I _value) : value(bit_cast(_value)){} + code(const U _value) : value(_value) {} #endif @@ -69,7 +69,7 @@ struct code NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { const vector unsignedCartesian = bit_cast, vector >(cartesian); - U val = U(0); + this_t retVal = { U(0) }; [[unroll]] for (U coord = 0; coord < U(D); coord++) @@ -78,12 +78,10 @@ struct code // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) { - val |= (unsignedCartesian[coord] & coordBit) << shift; + retVal.value |= (unsignedCartesian[coord] & coordBit) << shift; } } - this_t retVal; - retVal.value = val; return retVal; } @@ -141,48 +139,43 @@ struct code } /** - * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord - * (for each value) has its highest bit set to 1. + * @brief Returns an element of type U by extracting only the highest bit (of the bits used to encode `coord`) * - * @param [in] coord The coordinate whose highest bit we want to get + * @param [in] coord The coordinate whose highest bit we want to extract. */ - NBL_CONSTEXPR_INLINE_FUNC U logicalOrHighestBits(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC { // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these // bits is `bits(coord) - 1` const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); // This is the index of that bit as an index in the encoded value const U shift = coordHighestBitIdx * U(D) + U(coord); - return (value | rhs.value) & (U(1) << shift); + return value & (U(1) << shift); } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value & rhs.value; + this_t retVal = { value & rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value | rhs.value; + this_t retVal = { value | rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value ^ rhs.value; + this_t retVal = { value ^ rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = ~value; + this_t retVal = { ~value }; return retVal; } @@ -191,15 +184,13 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value << bits; + this_t retVal = { value << U(bits) }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value >> bits; + this_t retVal = { value >> U(bits) }; return retVal; } @@ -209,19 +200,20 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC { - this_t allOnes; // allOnes encodes a cartesian coordinate with all values set to 1 - allOnes.value = (U(1) << D) - U(1); + const static this_t allOnes = { (U(1) << D) - U(1) }; // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 return operator~() + allOnes; } // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + // CHANGED FOR DEBUG: REMEMBER TO CHANGE BACK + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal; + this_t retVal = { U(0) }; [[unroll]] for (uint16_t coord = 0; coord < D; coord++) { @@ -240,7 +232,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal; + this_t retVal = { U(0) }; [[unroll]] for (uint16_t coord = 0; coord < D; coord++) { @@ -293,17 +285,15 @@ struct code NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return ! allEqual(rhs); + return !allEqual(rhs); } - - - template + template NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); Comparison comparison; - OppositeComparison oppositeComparison; + OnSignMismatch onSignMismatch; // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison #ifndef __HLSL_VERSION @@ -317,39 +307,67 @@ struct code // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit else { - // I will give an example for the case of `Comparison` being `functional::less`, but other cases are similar - // If both are negative (both bits set to 1) then `x < y` iff `z > w` when `z,w` are the bit representations of `x,y` as unsigned - // If this is nonnegative and rhs is negative, it should return false. Since in this case `highestBit = 0` and `rhsHighestBit = 1` this - // is the same as doing `z > w` again - // If this is negative and rhs is nonnegative, it should return true. But in this case we have `highestBit = 1` and `rhsHighestBit = 0` - // so again we can just return `z > w`. - // All three cases end up in the same expression. - if (logicalOrHighestBits(rhs, coord)) - return oppositeComparison(value & Masks[coord], rhs.value & Masks[coord]); - // If neither of them have their highest bit set, both are nonnegative. Therefore, we can return the unsigned comparison + // I will give an example for `operator<` but the same reasoning holds for all others. Some abuse of notation but hopefully it's clear. + + // If `this[coord] >= 0` and `rhs[coord] < 0` then `this[coord] < rhs[coord]` returns false. Notice that in this case, the highest bit of + // `value` (of the bits representing the number encoded in `coord`) is `0`, while the highest bit for rhs is `1`. + // Similarly, if `this[coord] < 0` and `rhs[coord] >= 0` then `this[coord] < rhs[coord]` returns true, and the highest bit situation is inverted. + // This means that if the signs of `this[coord]` and `rhs[coord]` are not equal, the result depends on the sign of `this[coord]`. + // What that result should be is controlled by `OnSignMismatch`. + // Finally, notice that if only one of those bits is set to 1, then the `xor` of that highest bit yields 1 as well + const U highestBit = extractHighestBit(coord); + const U rhsHighestBit = rhs.extractHighestBit(coord); + if (highestBit ^ rhsHighestBit) + return onSignMismatch(highestBit); + // If both are nonnegative, then we can just use the comparison as it comes. + // If both are negative, it just so happens that applying the same operator to their unsigned bitcasted representations yields the same result. + // For `operator<`, for example, consider two negative numbers. Starting from the MSB (we know it's `1` for both in this case) and moving to the right, + // consider what happens when we encounter the first bit where they mismatch: the one with a `0` at position `k` (by position I mean counted from the + // left, starting at 0) is adding at most `2^k - 1` in the lowest bits, while the one with a `1` is adding exactly `2^k`. This means that the one + // with a 0 is "more negative". else return comparison(value & Masks[coord], rhs.value & Masks[coord]); } } + + struct OnSignMismatchLessThan + { + // On a sign mismatch, `thisrhs` is true if this is non-negative (`highestBit` set to `0`) and false otherwise + // Therefore since it takes a number with only the highest bit set we only have to return the opposite of whether there is in fact a bit set + bool operator()(U highestBit) + { + return !bool(highestBit); + } + }; NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, greater >(rhs, coord); + return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, greater_equal >(rhs, coord); + return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, less >(rhs, coord); + return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, less_equal >(rhs, coord); + return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); } #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ From 0d9dd4afa6190dd029cf0e8e311ec132a818ec4a Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 1 Apr 2025 15:25:38 -0300 Subject: [PATCH 13/28] Checkpoint, have to check why vector compat isn't working --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 6 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 34 ++ .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 13 + .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 153 +++++++ include/nbl/builtin/hlsl/functional.hlsl | 34 +- include/nbl/builtin/hlsl/math/morton.hlsl | 423 ------------------ include/nbl/builtin/hlsl/morton.hlsl | 72 +++ .../builtin/hlsl/spirv_intrinsics/core.hlsl | 3 +- src/nbl/builtin/CMakeLists.txt | 4 +- 9 files changed, 311 insertions(+), 431 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/uint64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl create mode 100644 include/nbl/builtin/hlsl/morton.hlsl diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 41e920e41e..77d9d887bd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -17,6 +17,7 @@ #define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) namespace nbl::hlsl { @@ -49,7 +50,8 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline -#define NBL_CONST_MEMBER_FUNC +#define NBL_CONST_MEMBER_FUNC +#define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) namespace nbl { @@ -100,7 +102,7 @@ struct static_cast_helper template NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) { -return impl::static_cast_helper::cast(v); + return impl::static_cast_helper::cast(v); } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 1d43d9b14a..7b8726566f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -103,6 +103,10 @@ template struct nMax_helper; template struct nClamp_helper; +template +struct addCarry_helper; +template +struct subBorrow_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -162,6 +166,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_hel template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMax_helper, nMax, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMin_helper, nMin, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, (T), (T)(T), T) +// Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitCount_helper, bitCount, (T), (T), BITCOUNT_HELPER_RETRUN_TYPE) @@ -599,6 +606,33 @@ struct nClamp_helper } }; +// Once again no need to restrict the two below with concepts for same reason as HLSL version +template +struct addCarry_helper +{ + using return_t = spirv::AddCarryOutput; + NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = operand1 + operand2; + retVal.carry = retVal.result < operand1 ? T(1) : T(0); + return retVal; + } +}; + +template +struct subBorrow_helper +{ + using return_t = spirv::SubBorrowOutput; + NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = static_cast(operand1 - operand2); + retVal.borrow = operand1 >= operand2 ? T(0) : T(1); + return retVal; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index b695c4b82b..1f1957dbbd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -217,6 +217,19 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A return cpp_compat_intrinsics_impl::refract_helper::__call(I, N, eta); } +template +NBL_CONSTEXPR_INLINE_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); +} + +template +NBL_CONSTEXPR_INLINE_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); +} + + #ifdef __HLSL_VERSION #define NAMESPACE spirv #else diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl new file mode 100644 index 0000000000..3178159794 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -0,0 +1,153 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +struct emulated_uint64_t +{ + using storage_t = vector; + using this_t = emulated_uint64_t; + + storage_t data; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_uint64_t() = default; + + // To immediately get compound operators and functional structs in CPP side + explicit emulated_uint64_t(const storage_t _data) : data(_data) {} + + #endif + + /** + * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + this_t retVal; + retVal.data = _data; + return retVal; + } + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data & rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data | rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data ^ rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(~data); + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); + const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const this_t retVal = create(addResult); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); + const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const this_t retVal = create(subResult); + return retVal; + } + +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + const uint32_t higherBitsMask = ~uint32_t(0) << shift; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> _bits, ((operand.data.x & lowerBitsMask) << shift) | (operand.data.y >> _bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + +} //namespace nbl +} //namespace hlsl + + + +#endif diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 25d822a940..3cf24193a4 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -165,7 +165,7 @@ COMPOUND_ASSIGN(divides) // ----------------- End of compound assignment ops ---------------- -// Min, Max and Ternary Operator don't use ALIAS_STD because they don't exist in STD +// Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs struct minimum @@ -200,13 +200,39 @@ struct ternary_operator { using type_t = T; - T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_INLINE_FUNC T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) { return condition ? lhs : rhs; } }; -} -} +template +struct left_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } +}; + +template +struct arithmetic_right_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } +}; + +// Declare template, but left unimplemented by default +template +struct logical_right_shift_operator; + +} //namespace nbl +} //namespace hlsl #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl deleted file mode 100644 index 89d1a99749..0000000000 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ /dev/null @@ -1,423 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ - -#include "nbl/builtin/hlsl/concepts/core.hlsl" -#include "nbl/builtin/hlsl/bit.hlsl" -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/functional.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace morton -{ - -namespace impl -{ - -// Valid dimension for a morton code -template -NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; - -template -struct decode_mask; - -template -struct decode_mask : integral_constant {}; - -template -struct decode_mask : integral_constant::value << Dim) | T(1)> {}; - -template -NBL_CONSTEXPR T decode_mask_v = decode_mask::value; - -} //namespace impl - -// Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ - impl::decode_mask_v< U , D > << U (1),\ - impl::decode_mask_v< U , D > << U (2),\ - impl::decode_mask_v< U , D > << U (3)\ - )) - -// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 -// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && impl::MortonDimension) -struct code -{ - using this_t = code; - using U = make_unsigned_t; - NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - #ifndef __HLSL_VERSION - - code() = default; - - // To immediately get compound operators and functional structs in CPP side - code(const U _value) : value(_value) {} - - #endif - - /** - * @brief Creates a Morton code from a set of cartesian coordinates - * - * @param [in] cartesian Coordinates to encode - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) - { - const vector unsignedCartesian = bit_cast, vector >(cartesian); - this_t retVal = { U(0) }; - - [[unroll]] - for (U coord = 0; coord < U(D); coord++) - { - [[unroll]] - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer - for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) - { - retVal.value |= (unsignedCartesian[coord] & coordBit) << shift; - } - } - - return retVal; - } - - // CPP can also have a constructor - #ifndef __HLSL_VERSION - - /** - * @brief Creates a Morton code from a set of cartesian coordinates - * - * @param [in] cartesian Coordinates to encode - */ - code(NBL_CONST_REF_ARG(vector) cartesian) - { - *this = create(cartesian); - } - - /** - * @brief Decodes this Morton code back to a set of cartesian coordinates - */ - explicit operator vector() const noexcept - { - // Definition below, we override `impl::static_cast_helper` to have this conversion in both CPP/HLSL - return _static_cast, this_t>(*this); - } - - #endif - - // --------------------------------------------------------- AUX METHODS ------------------------------------------------------------------- - - /** - * @brief Extracts a single coordinate - * - * @param [in] coord The coordinate to extract - */ - NBL_CONSTEXPR_INLINE_FUNC I getCoordinate(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits - // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift - // at the end to preserve sign. - // To this end, we first notice that the coordinate of index `coord` gets - // `bits(coord) = ceil((BitWidth - coord)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not - // divide `BitWidth perfectly`). - // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones - // by shifting everything `BitWidth - bits(coord)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. - - const U bitsCoord = BitWidth / U(D) + ((coord < BitWidth % D) ? U(1) : U(0)); // <- this computes the ceil - U coordVal = U(0); - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer - [[unroll]] - for (U valBitIdx = U(coord), coordBit = U(1) << U(coord), shift = U(coord); valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= U(D), shift += U(D) - 1) - { - coordVal |= (value & coordBit) << (BitWidth - bitsCoord - shift); - } - return bit_cast(coordVal) >> (BitWidth - bitsCoord); - } - - /** - * @brief Returns an element of type U by extracting only the highest bit (of the bits used to encode `coord`) - * - * @param [in] coord The coordinate whose highest bit we want to extract. - */ - NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); - // This is the index of that bit as an index in the encoded value - const U shift = coordHighestBitIdx * U(D) + U(coord); - return value & (U(1) << shift); - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value & rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value | rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value ^ rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = { ~value }; - return retVal; - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value << U(bits) }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value >> U(bits) }; - return retVal; - } - - #endif - - // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC - { - // allOnes encodes a cartesian coordinate with all values set to 1 - const static this_t allOnes = { (U(1) << D) - U(1) }; - // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 - return operator~() + allOnes; - } - - // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- - - // CHANGED FOR DEBUG: REMEMBER TO CHANGE BACK - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal = { U(0) }; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - // put 1 bits everywhere in the bits the current axis is not using - // then extract just the axis bits for the right hand coordinate - // carry-1 will propagate the bits across the already set bits - // then clear out the bits not belonging to current axis - // Note: Its possible to clear on `this` and fill on `rhs` but that will - // disable optimizations, we expect the compiler to optimize a lot if the - // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` - retVal.value |= ((value | (~Masks[coord])) + (rhs.value & Masks[coord])) & Masks[coord]; - } - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal = { U(0) }; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate - retVal.value |= ((value & Masks[coord]) - (rhs.value & Masks[coord])) & Masks[coord]; - } - return retVal; - } - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC bool operator!() NBL_CONST_MEMBER_FUNC - { - return value.operator!(); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - return (value & Masks[coord]) == (rhs.value & Masks[coord]); - } - - NBL_CONSTEXPR_INLINE_FUNC vector operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - vector retVal; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - retVal[coord] = coordEquals(rhs, coord); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC bool allEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return value == rhs.value; - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordNotEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return !coordEquals(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC vector operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - vector retVal; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - retVal[coord] = coordNotEquals(rhs, coord); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !allEqual(rhs); - } - - template - NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - Comparison comparison; - OnSignMismatch onSignMismatch; - - // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison - #ifndef __HLSL_VERSION - if constexpr (is_unsigned_v) - #else - if (is_unsigned_v) - #endif - { - return comparison(value & Masks[coord], rhs.value & Masks[coord]); - } - // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit - else - { - // I will give an example for `operator<` but the same reasoning holds for all others. Some abuse of notation but hopefully it's clear. - - // If `this[coord] >= 0` and `rhs[coord] < 0` then `this[coord] < rhs[coord]` returns false. Notice that in this case, the highest bit of - // `value` (of the bits representing the number encoded in `coord`) is `0`, while the highest bit for rhs is `1`. - // Similarly, if `this[coord] < 0` and `rhs[coord] >= 0` then `this[coord] < rhs[coord]` returns true, and the highest bit situation is inverted. - // This means that if the signs of `this[coord]` and `rhs[coord]` are not equal, the result depends on the sign of `this[coord]`. - // What that result should be is controlled by `OnSignMismatch`. - // Finally, notice that if only one of those bits is set to 1, then the `xor` of that highest bit yields 1 as well - const U highestBit = extractHighestBit(coord); - const U rhsHighestBit = rhs.extractHighestBit(coord); - if (highestBit ^ rhsHighestBit) - return onSignMismatch(highestBit); - // If both are nonnegative, then we can just use the comparison as it comes. - // If both are negative, it just so happens that applying the same operator to their unsigned bitcasted representations yields the same result. - // For `operator<`, for example, consider two negative numbers. Starting from the MSB (we know it's `1` for both in this case) and moving to the right, - // consider what happens when we encounter the first bit where they mismatch: the one with a `0` at position `k` (by position I mean counted from the - // left, starting at 0) is adding at most `2^k - 1` in the lowest bits, while the one with a `1` is adding exactly `2^k`. This means that the one - // with a 0 is "more negative". - else - return comparison(value & Masks[coord], rhs.value & Masks[coord]); - } - } - - struct OnSignMismatchLessThan - { - // On a sign mismatch, `thisrhs` is true if this is non-negative (`highestBit` set to `0`) and false otherwise - // Therefore since it takes a number with only the highest bit set we only have to return the opposite of whether there is in fact a bit set - bool operator()(U highestBit) - { - return !bool(highestBit); - } - }; - - NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); - } - - #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ - { \ - vector retVal; \ - [[unroll]] \ - for (uint16_t coord = 0; coord < D; coord++) \ - retVal[coord] = COMPARISON (rhs, coord); \ - return retVal; \ - } - - DEFINE_OPERATOR(< , coordLessThan); - DEFINE_OPERATOR(<= , coordLessThanEquals); - DEFINE_OPERATOR(> , coordGreaterThan); - DEFINE_OPERATOR(>= , coordGreaterThanEquals); - - #undef DEFINE_OPERATOR - - U value; -}; - -// Don't forget to delete this macro after usage -#undef NBL_HLSL_MORTON_MASKS - -} //namespace morton - -// Still in nbl::hlsl we can go to nbl::hlsl::impl and specialize the `static_cast_helper` -namespace impl -{ - -template -struct static_cast_helper, morton::code > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) - { - vector cartesian; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - cartesian[coord] = val.getCoordinate(coord); - } - return cartesian; - } -}; - -} // namespace impl - -} //namespace hlsl -} //namespace nbl - - - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl new file mode 100644 index 0000000000..89eddf8675 --- /dev/null +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -0,0 +1,72 @@ +#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +namespace impl +{ + +// Valid dimension for a morton code +template +NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; + +// Masks + +template +struct decode_mask; + +template +struct decode_mask : integral_constant {}; + +template +struct decode_mask : integral_constant::value << Dim) | T(1)> {}; + +template +NBL_CONSTEXPR T decode_mask_v = decode_mask::value; + +// Decode masks are different for each dimension + +template +struct MortonDecoder; + +} //namespace impl + +// Up to D = 4 supported +#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ + impl::decode_mask_v< U , D > << U (1),\ + impl::decode_mask_v< U , D > << U (2),\ + impl::decode_mask_v< U , D > << U (3)\ + )) + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth>16), conditional_t<(TotalBitWidth>32), _uint64_t, uint32_t>, uint16_t> ; + + + storage_t value; +}; + +// Don't forget to delete this macro after usage +#undef NBL_HLSL_MORTON_MASKS + +} //namespace morton +} //namespace hlsl +} //namespace nbl + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index d351cab07d..d8d90de726 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -4,13 +4,14 @@ #ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ +#include + #ifdef __HLSL_VERSION // TODO: AnastZIuk fix public search paths so we don't choke #include "spirv/unified1/spirv.hpp" #include #include #include -#include namespace nbl { diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index fa548e210a..a11a26d69a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -214,6 +214,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/uint64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable @@ -291,7 +292,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl" LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/morton.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") @@ -368,5 +368,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") #blur LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") +#morton codes +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From 89d2bf2a5d9fab347850babe31fdc8f0a95c64f6 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 2 Apr 2025 16:19:20 -0300 Subject: [PATCH 14/28] Refactor morton class, get new conversion running --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 8 +- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 11 ++ include/nbl/builtin/hlsl/morton.hlsl | 175 +++++++++++++++++- 3 files changed, 186 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 7b8726566f..92fc9e929b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -611,11 +611,11 @@ template struct addCarry_helper { using return_t = spirv::AddCarryOutput; - NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + constexpr static inline return_t __call(const T operand1, const T operand2) { return_t retVal; retVal.result = operand1 + operand2; - retVal.carry = retVal.result < operand1 ? T(1) : T(0); + retVal.carry = T(retVal.result < operand1); return retVal; } }; @@ -624,11 +624,11 @@ template struct subBorrow_helper { using return_t = spirv::SubBorrowOutput; - NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + constexpr static inline return_t __call(const T operand1, const T operand2) { return_t retVal; retVal.result = static_cast(operand1 - operand2); - retVal.borrow = operand1 >= operand2 ? T(0) : T(1); + retVal.borrow = T(operand1 < operand2); return retVal; } }; diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index 3178159794..c4f1f1ef1b 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -40,6 +40,17 @@ struct emulated_uint64_t return retVal; } + /** + * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `uint64` being emulated + * @param [in] lo Lowest 32 bits of the `uint64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return create(storage_t(hi, lo)); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 89eddf8675..d4ada29d70 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -1,11 +1,12 @@ -#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" namespace nbl { @@ -35,11 +36,177 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- + // Decode masks are different for each dimension +// Decoder works with unsigned, cast to sign depends on the Morton class +// Bit width checks happen in Morton class as well -template +template struct MortonDecoder; +// Specializations for lack of uint64_t + +template +struct MortonDecoder<2, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), // Groups bits by 1 on, 1 off + emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), // Groups bits by 2 on, 2 off + emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), // Groups bits by 4 on, 4 off + emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), // Groups bits by 8 on, 8 off + emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), // Groups bits by 16 on, 16 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) };// Groups bits by 32 on, 32 off + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<3, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), + conditional_value<(Bits <= 6), uint16_t, uint16_t(2), + conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), // Groups bits by 1 on, 2 off (also only considers 21 bits) + emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), // Groups bits by 3 on, 6 off + emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), // Groups bits by 6 on, 12 off + emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), // Groups bits by 12 on, 24 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + // First iteration is special + decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & DecodeMasks[1]; + [[unroll]] + for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 2]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<4, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), // Groups bits by 1 on, 3 off + emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), // Groups bits by 2 on, 6 off + emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), // Groups bits by 4 on, 12 off + emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), // Groups bits by 8 on, 24 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) };// Groups bits by 16 on, 48 off + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[6] = { _static_cast(0x5555555555555555), // Groups bits by 1 on, 1 off + _static_cast(0x3333333333333333), // Groups bits by 2 on, 2 off + _static_cast(0x0F0F0F0F0F0F0F0F), // Groups bits by 4 on, 4 off + _static_cast(0x00FF00FF00FF00FF), // Groups bits by 8 on, 8 off + _static_cast(0x0000FFFF0000FFFF), // Groups bits by 16 on, 16 off + _static_cast(0x00000000FFFFFFFF) };// Groups bits by 32 on, 32 off + + encode_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), + conditional_value<(Bits <= 6), uint16_t, uint16_t(2), + conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1249249249249249), // Groups bits by 1 on, 2 off (also only considers 21 bits) + _static_cast(0x01C0E070381C0E07), // Groups bits by 3 on, 6 off + _static_cast(0x0FC003F000FC003F), // Groups bits by 6 on, 12 off + _static_cast(0x0000FFF000000FFF), // Groups bits by 12 on, 24 off + _static_cast(0x0000000000FFFFFF) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + + encode_t decoded = encodedValue & DecodeMasks[0]; + // First iteration is special + decoded = (decoded | (decoded >> 2) | (decoded >> 4)) & DecodeMasks[1]; + [[unroll]] + for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 2]; + } + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1111111111111111), // Groups bits by 1 on, 3 off + _static_cast(0x0303030303030303), // Groups bits by 2 on, 6 off + _static_cast(0x000F000F000F000F), // Groups bits by 4 on, 12 off + _static_cast(0x000000FF000000FF), // Groups bits by 8 on, 24 off + _static_cast(0x000000000000FFFF) };// Groups bits by 16 on, 48 off + + encode_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded); + } +}; + } //namespace impl // Up to D = 4 supported @@ -56,7 +223,7 @@ struct code { using this_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; - using storage_t = conditional_t<(TotalBitWidth>16), conditional_t<(TotalBitWidth>32), _uint64_t, uint32_t>, uint16_t> ; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; storage_t value; From de4d0fb2f266da125d94801c5c38bd81a9260acd Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 2 Apr 2025 23:45:53 -0300 Subject: [PATCH 15/28] Add new classes for encoding/decoding of mortn codes --- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 57 ++++ include/nbl/builtin/hlsl/morton.hlsl | 287 ++++++++++++++++-- 2 files changed, 312 insertions(+), 32 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index c4f1f1ef1b..3794031c8e 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -3,6 +3,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" namespace nbl { @@ -156,6 +157,62 @@ constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) #endif +namespace impl +{ + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + const To highBits = _static_cast(u.data.x) << To(32); + return highBits | _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(_static_cast(u >> 32), _static_cast(u)); + } +}; + +} //namespace impl + } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d4ada29d70..e2e1596587 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -22,7 +22,7 @@ namespace impl template NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; -// Masks +// Basic decode masks template struct decode_mask; @@ -36,17 +36,240 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- +// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- +// Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y + +#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +{\ + NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ +}; + +#ifndef __HLSL_VERSION + +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +{\ + NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE));\ +}; + +#else + +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +{\ + NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ +};\ +NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE)); +#endif + +#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template struct morton_mask_##DIM##_##MASK ;\ + NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE)\ + NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, 0x##HEX_HIGH_VALUE##HEX_LOW_VALUE)\ + template\ + NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; + +NBL_MORTON_DECODE_MASK(2, 0, 55555555, 55555555) // Groups bits by 1 on, 1 off +NBL_MORTON_DECODE_MASK(2, 1, 33333333, 33333333) // Groups bits by 2 on, 2 off +NBL_MORTON_DECODE_MASK(2, 2, 0F0F0F0F, 0F0F0F0F) // Groups bits by 4 on, 4 off +NBL_MORTON_DECODE_MASK(2, 3, 00FF00FF, 00FF00FF) // Groups bits by 8 on, 8 off +NBL_MORTON_DECODE_MASK(2, 4, 0000FFFF, 0000FFFF) // Groups bits by 16 on, 16 off +NBL_MORTON_DECODE_MASK(2, 5, 00000000, FFFFFFFF) // Groups bits by 32 on, 32 off + +NBL_MORTON_DECODE_MASK(3, 0, 12492492, 49249249) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits +NBL_MORTON_DECODE_MASK(3, 1, 01C0E070, 381C0E07) // Groups bits by 3 on, 6 off +NBL_MORTON_DECODE_MASK(3, 2, 0FC003F0, 00FC003F) // Groups bits by 6 on, 12 off +NBL_MORTON_DECODE_MASK(3, 3, 0000FFF0, 00000FFF) // Groups bits by 12 on, 24 off +NBL_MORTON_DECODE_MASK(3, 4, 00000000, 00FFFFFF) // Groups bits by 24 on, 48 off + +NBL_MORTON_DECODE_MASK(4, 0, 11111111, 11111111) // Groups bits by 1 on, 3 off +NBL_MORTON_DECODE_MASK(4, 1, 03030303, 03030303) // Groups bits by 2 on, 6 off +NBL_MORTON_DECODE_MASK(4, 2, 000F000F, 000F000F) // Groups bits by 4 on, 12 off +NBL_MORTON_DECODE_MASK(4, 3, 000000FF, 000000FF) // Groups bits by 8 on, 24 off +NBL_MORTON_DECODE_MASK(4, 4, 00000000, 0000FFFF) // Groups bits by 16 on, 48 off + +#undef NBL_MORTON_DECODE_MASK +#undef NBL_MORTON_EMULATED_DECODE_MASK +#undef NBL_MORTON_GENERIC_DECODE_MASK + +// ----------------------------------------------------------------- MORTON ENCODERS --------------------------------------------------- + +template +struct MortonEncoder; + +template +struct MortonEncoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 16) + { + encoded = (encoded | leftShift(encoded, 16)) & morton_mask_2_4_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + encoded = (encoded | leftShift(encoded, 8)) & morton_mask_2_3_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + encoded = (encoded | leftShift(encoded, 4)) & morton_mask_2_2_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + encoded = (encoded | leftShift(encoded, 2)) & morton_mask_2_1_v; + } + encoded = (encoded | leftShift(encoded, 1)) & morton_mask_2_0_v; + return encoded; + } +}; + +template +struct MortonEncoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 12) + { + encoded = (encoded | leftShift(encoded, 24)) & morton_mask_3_3_v; + } + NBL_IF_CONSTEXPR(Bits > 6) + { + encoded = (encoded | leftShift(encoded, 12)) & morton_mask_3_2_v; + } + NBL_IF_CONSTEXPR(Bits > 3) + { + encoded = (encoded | leftShift(encoded, 6)) & morton_mask_3_1_v; + } + encoded = (encoded | leftShift(encoded, 2) | leftShift(encoded, 4)) & morton_mask_3_0_v; + return encoded; + } +}; -// Decode masks are different for each dimension -// Decoder works with unsigned, cast to sign depends on the Morton class -// Bit width checks happen in Morton class as well +template +struct MortonEncoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 8) + { + encoded = (encoded | leftShift(encoded, 24)) & morton_mask_4_3_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + encoded = (encoded | leftShift(encoded, 12)) & morton_mask_4_2_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + encoded = (encoded | leftShift(encoded, 6)) & morton_mask_4_1_v; + } + encoded = (encoded | leftShift(encoded, 3)) & morton_mask_4_0_v; + return encoded; + } +}; + +// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- template struct MortonDecoder; -// Specializations for lack of uint64_t +template +struct MortonDecoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_2_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 1)) & morton_mask_2_1_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + decoded = (decoded | rightShift(decoded, 2)) & morton_mask_2_2_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + decoded = (decoded | rightShift(decoded, 4)) & morton_mask_2_3_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + decoded = (decoded | rightShift(decoded, 8)) & morton_mask_2_4_v; + } + NBL_IF_CONSTEXPR(Bits > 16) + { + decoded = (decoded | rightShift(decoded, 16)) & morton_mask_2_5_v; + } + + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_3_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & morton_mask_3_1_v; + } + NBL_IF_CONSTEXPR(Bits > 3) + { + decoded = (decoded | rightShift(decoded, 6)) & morton_mask_3_2_v; + } + NBL_IF_CONSTEXPR(Bits > 6) + { + decoded = (decoded | rightShift(decoded, 12)) & morton_mask_3_3_v; + } + NBL_IF_CONSTEXPR(Bits > 12) + { + decoded = (decoded | rightShift(decoded, 24)) & morton_mask_3_4_v; + } + + return _static_cast(decoded); + } +}; +template +struct MortonDecoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_4_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 3)) & morton_mask_4_1_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + decoded = (decoded | rightShift(decoded, 6)) & morton_mask_4_2_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + decoded = (decoded | rightShift(decoded, 12)) & morton_mask_4_3_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + decoded = (decoded | rightShift(decoded, 24)) & morton_mask_4_4_v; + } + + return _static_cast(decoded); + } +}; + +/* template struct MortonDecoder<2, Bits, emulated_uint64_t> { @@ -55,12 +278,12 @@ struct MortonDecoder<2, Bits, emulated_uint64_t> { NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), // Groups bits by 1 on, 1 off - emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), // Groups bits by 2 on, 2 off - emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), // Groups bits by 4 on, 4 off - emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), // Groups bits by 8 on, 8 off - emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), // Groups bits by 16 on, 16 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) };// Groups bits by 32 on, 32 off + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), + emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), + emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), + emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), + emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) }; arithmetic_right_shift_operator rightShift; @@ -84,11 +307,11 @@ struct MortonDecoder<3, Bits, emulated_uint64_t> conditional_value<(Bits <= 6), uint16_t, uint16_t(2), conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), // Groups bits by 1 on, 2 off (also only considers 21 bits) - emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), // Groups bits by 3 on, 6 off - emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), // Groups bits by 6 on, 12 off - emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), // Groups bits by 12 on, 24 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), (also only considers 21 bits) + emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), + emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), + emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) }; (40 off if you're feeling pedantic) arithmetic_right_shift_operator rightShift; @@ -112,11 +335,11 @@ struct MortonDecoder<4, Bits, emulated_uint64_t> { NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), // Groups bits by 1 on, 3 off - emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), // Groups bits by 2 on, 6 off - emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), // Groups bits by 4 on, 12 off - emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), // Groups bits by 8 on, 24 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) };// Groups bits by 16 on, 48 off + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), + emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), + emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), + emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) }; arithmetic_right_shift_operator rightShift; @@ -207,14 +430,9 @@ struct MortonDecoder<4, Bits, encode_t> } }; -} //namespace impl +*/ -// Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ - impl::decode_mask_v< U , D > << U (1),\ - impl::decode_mask_v< U , D > << U (2),\ - impl::decode_mask_v< U , D > << U (3)\ - )) +} //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it @@ -227,10 +445,15 @@ struct code storage_t value; -}; -// Don't forget to delete this macro after usage -#undef NBL_HLSL_MORTON_MASKS + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION + + code() = default; + + #endif +}; } //namespace morton } //namespace hlsl From 799420e9dfa1f8bd8039fd724edea4ecf3133a87 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 4 Apr 2025 16:20:54 -0300 Subject: [PATCH 16/28] Fix conversion operators --- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 33 ++- include/nbl/builtin/hlsl/morton.hlsl | 279 ++++++------------ 2 files changed, 116 insertions(+), 196 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index 3794031c8e..ab08e1ff38 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -24,9 +24,6 @@ struct emulated_uint64_t emulated_uint64_t() = default; - // To immediately get compound operators and functional structs in CPP side - explicit emulated_uint64_t(const storage_t _data) : data(_data) {} - #endif /** @@ -52,6 +49,16 @@ struct emulated_uint64_t return create(storage_t(hi, lo)); } + /** + * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * + * @param [in] _data `uint64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) + { + return create(_static_cast(u >> 32), _static_cast(u)); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -115,9 +122,11 @@ struct left_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { + if (!bits) + return operand; const uint32_t _bits = uint32_t(bits); const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t higherBitsMask = ~uint32_t(0) << shift; + const uint32_t higherBitsMask = (~uint32_t(0)) << shift; // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; return emulated_uint64_t::create(retValData); @@ -132,6 +141,8 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { + if (!bits) + return operand; const uint32_t _bits = uint32_t(bits); const uint32_t shift = ComponentBitWidth - _bits; const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; @@ -173,10 +184,10 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +template<> +struct static_cast_helper { - using To = Unsigned; + using To = uint64_t; using From = emulated_uint64_t; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) @@ -199,15 +210,15 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +template<> +struct static_cast_helper { using To = emulated_uint64_t; - using From = Unsigned; + using From = uint64_t; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) { - return emulated_uint64_t::create(_static_cast(u >> 32), _static_cast(u)); + return emulated_uint64_t::create(u); } }; diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e2e1596587..07aa21b821 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -46,44 +46,41 @@ NBL_CONSTEXPR T decode_mask_v = decode_mask::value; #ifndef __HLSL_VERSION -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ -{\ - NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE));\ -}; +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) #else -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ {\ NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ };\ -NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE)); +NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE); #endif -#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template struct morton_mask_##DIM##_##MASK ;\ - NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE)\ - NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, 0x##HEX_HIGH_VALUE##HEX_LOW_VALUE)\ +#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK ;\ + NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE)\ + NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE)\ template\ NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; -NBL_MORTON_DECODE_MASK(2, 0, 55555555, 55555555) // Groups bits by 1 on, 1 off -NBL_MORTON_DECODE_MASK(2, 1, 33333333, 33333333) // Groups bits by 2 on, 2 off -NBL_MORTON_DECODE_MASK(2, 2, 0F0F0F0F, 0F0F0F0F) // Groups bits by 4 on, 4 off -NBL_MORTON_DECODE_MASK(2, 3, 00FF00FF, 00FF00FF) // Groups bits by 8 on, 8 off -NBL_MORTON_DECODE_MASK(2, 4, 0000FFFF, 0000FFFF) // Groups bits by 16 on, 16 off -NBL_MORTON_DECODE_MASK(2, 5, 00000000, FFFFFFFF) // Groups bits by 32 on, 32 off - -NBL_MORTON_DECODE_MASK(3, 0, 12492492, 49249249) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits -NBL_MORTON_DECODE_MASK(3, 1, 01C0E070, 381C0E07) // Groups bits by 3 on, 6 off -NBL_MORTON_DECODE_MASK(3, 2, 0FC003F0, 00FC003F) // Groups bits by 6 on, 12 off -NBL_MORTON_DECODE_MASK(3, 3, 0000FFF0, 00000FFF) // Groups bits by 12 on, 24 off -NBL_MORTON_DECODE_MASK(3, 4, 00000000, 00FFFFFF) // Groups bits by 24 on, 48 off - -NBL_MORTON_DECODE_MASK(4, 0, 11111111, 11111111) // Groups bits by 1 on, 3 off -NBL_MORTON_DECODE_MASK(4, 1, 03030303, 03030303) // Groups bits by 2 on, 6 off -NBL_MORTON_DECODE_MASK(4, 2, 000F000F, 000F000F) // Groups bits by 4 on, 12 off -NBL_MORTON_DECODE_MASK(4, 3, 000000FF, 000000FF) // Groups bits by 8 on, 24 off -NBL_MORTON_DECODE_MASK(4, 4, 00000000, 0000FFFF) // Groups bits by 16 on, 48 off +NBL_MORTON_DECODE_MASK(2, 0, uint64_t(0x5555555555555555)) // Groups bits by 1 on, 1 off +NBL_MORTON_DECODE_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off +NBL_MORTON_DECODE_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off +NBL_MORTON_DECODE_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off +NBL_MORTON_DECODE_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off +NBL_MORTON_DECODE_MASK(2, 5, uint64_t(0x00000000FFFFFFFF)) // Groups bits by 32 on, 32 off + +NBL_MORTON_DECODE_MASK(3, 0, uint64_t(0x1249249249249249)) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits +NBL_MORTON_DECODE_MASK(3, 1, uint64_t(0x01C0E070381C0E07)) // Groups bits by 3 on, 6 off +NBL_MORTON_DECODE_MASK(3, 2, uint64_t(0x0FC003F000FC003F)) // Groups bits by 6 on, 12 off +NBL_MORTON_DECODE_MASK(3, 3, uint64_t(0x0000FFF000000FFF)) // Groups bits by 12 on, 24 off +NBL_MORTON_DECODE_MASK(3, 4, uint64_t(0x0000000000FFFFFF)) // Groups bits by 24 on, 48 off + +NBL_MORTON_DECODE_MASK(4, 0, uint64_t(0x1111111111111111)) // Groups bits by 1 on, 3 off +NBL_MORTON_DECODE_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off +NBL_MORTON_DECODE_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off +NBL_MORTON_DECODE_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off +NBL_MORTON_DECODE_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off #undef NBL_MORTON_DECODE_MASK #undef NBL_MORTON_EMULATED_DECODE_MASK @@ -269,193 +266,105 @@ struct MortonDecoder<4, Bits, encode_t> } }; -/* -template -struct MortonDecoder<2, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); +} //namespace impl - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), - emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), - emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), - emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), - emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) }; +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - arithmetic_right_shift_operator rightShift; + + storage_t value; - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded.data.y); - } -}; + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- -template -struct MortonDecoder<3, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), - conditional_value<(Bits <= 6), uint16_t, uint16_t(2), - conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), (also only considers 21 bits) - emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), - emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), - emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) }; (40 off if you're feeling pedantic) + code() = default; - arithmetic_right_shift_operator rightShift; + #endif - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - // First iteration is special - decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & DecodeMasks[1]; + /** + * @brief Creates a Morton code from a set of integral cartesian coordinates + * + * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class + */ + template + NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + create(NBL_CONST_REF_ARG(vector) cartesian) + { + using U = make_unsigned_t; + left_shift_operator leftShift; + storage_t encodedCartesian = _static_cast(uint64_t(0)); [[unroll]] - for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + for (uint16_t i = 0; i < D; i++) { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 2]; + encodedCartesian = encodedCartesian | leftShift(impl::MortonEncoder::encode(_static_cast(cartesian[i])), i); } - return _static_cast(decoded.data.y); + this_t retVal; + retVal.value = encodedCartesian; + return retVal; } -}; -template -struct MortonDecoder<4, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), - emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), - emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), - emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) }; + // CPP can also have an actual constructor + #ifndef __HLSL_VERSION - arithmetic_right_shift_operator rightShift; + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded.data.y); - } -}; - -template -struct MortonDecoder<2, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + template + explicit code(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[6] = { _static_cast(0x5555555555555555), // Groups bits by 1 on, 1 off - _static_cast(0x3333333333333333), // Groups bits by 2 on, 2 off - _static_cast(0x0F0F0F0F0F0F0F0F), // Groups bits by 4 on, 4 off - _static_cast(0x00FF00FF00FF00FF), // Groups bits by 8 on, 8 off - _static_cast(0x0000FFFF0000FFFF), // Groups bits by 16 on, 16 off - _static_cast(0x00000000FFFFFFFF) };// Groups bits by 32 on, 32 off - - encode_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded); + *this = create(cartesian); } -}; -template -struct MortonDecoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + // This one is defined later since it requires `static_cast_helper` specialization + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + + template + explicit operator vector() const noexcept { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), - conditional_value<(Bits <= 6), uint16_t, uint16_t(2), - conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1249249249249249), // Groups bits by 1 on, 2 off (also only considers 21 bits) - _static_cast(0x01C0E070381C0E07), // Groups bits by 3 on, 6 off - _static_cast(0x0FC003F000FC003F), // Groups bits by 6 on, 12 off - _static_cast(0x0000FFF000000FFF), // Groups bits by 12 on, 24 off - _static_cast(0x0000000000FFFFFF) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) - - encode_t decoded = encodedValue & DecodeMasks[0]; - // First iteration is special - decoded = (decoded | (decoded >> 2) | (decoded >> 4)) & DecodeMasks[1]; - [[unroll]] - for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) - { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 2]; - } - return _static_cast(decoded); + return _static_cast, morton::code, Bits, D>>(*this); } + + #endif }; -template -struct MortonDecoder<4, Bits, encode_t> +} //namespace morton + +// Specialize the `static_cast_helper` +namespace impl { - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1111111111111111), // Groups bits by 1 on, 3 off - _static_cast(0x0303030303030303), // Groups bits by 2 on, 6 off - _static_cast(0x000F000F000F000F), // Groups bits by 4 on, 12 off - _static_cast(0x000000FF000000FF), // Groups bits by 8 on, 24 off - _static_cast(0x000000000000FFFF) };// Groups bits by 16 on, 48 off - - encode_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + using U = make_unsigned_t; + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + arithmetic_right_shift_operator rightShift; + vector cartesian; + for (uint16_t i = 0; i < D; i++) { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); } - return _static_cast(decoded); + return cartesian; } }; -*/ - -} //namespace impl +} // namespace impl -// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 -// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && D * Bits <= 64) -struct code -{ - using this_t = code; - NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; - using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - - - storage_t value; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - #ifndef __HLSL_VERSION - - code() = default; - - #endif -}; - -} //namespace morton } //namespace hlsl } //namespace nbl From 52323bc1f67e58b547c65be11ae9ac9d08e8e4ed Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 4 Apr 2025 23:45:39 -0300 Subject: [PATCH 17/28] Finish the rest of comparison ops and we're done! --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 2 + include/nbl/builtin/hlsl/functional.hlsl | 23 +- include/nbl/builtin/hlsl/morton.hlsl | 231 +++++++++++++++++- .../nbl/builtin/hlsl/portable/uint64_t.hlsl | 30 +++ .../nbl/builtin/hlsl/portable/vector_t.hlsl | 18 ++ src/nbl/builtin/CMakeLists.txt | 1 + 6 files changed, 294 insertions(+), 11 deletions(-) create mode 100644 include/nbl/builtin/hlsl/portable/uint64_t.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 0053008aa4..a106cec440 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include +#include #include #include #include @@ -329,6 +330,7 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 3cf24193a4..e5486e2727 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -195,7 +195,7 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; -template +template struct ternary_operator { using type_t = T; @@ -206,7 +206,7 @@ struct ternary_operator } }; -template +template struct left_shift_operator { using type_t = T; @@ -217,7 +217,7 @@ struct left_shift_operator } }; -template +template struct arithmetic_right_shift_operator { using type_t = T; @@ -228,9 +228,20 @@ struct arithmetic_right_shift_operator } }; -// Declare template, but left unimplemented by default -template -struct logical_right_shift_operator; +template +struct logical_right_shift_operator +{ + using type_t = T; + using unsigned_type_t = make_unsigned_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + arithmetic_right_shift_operator arithmeticRightShift; + return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); + } +}; + + } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 07aa21b821..499deb1db4 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -266,6 +266,47 @@ struct MortonDecoder<4, Bits, encode_t> } }; +// ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- +// Here because no partial specialization of methods + +template +struct Equals; + +template +struct Equals +{ + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + retVal[i] = (_value & rhs[i]) == rhs[i]; + } + return retVal; + } +}; + +template +struct Equals +{ + template + NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > + operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + { + using U = make_unsigned_t; + vector interleaved; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); + } + Equals equals; + return equals(_value, interleaved); + } +}; + } //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 @@ -274,10 +315,10 @@ template; + using this_signed_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - storage_t value; // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- @@ -325,26 +366,205 @@ struct code *this = create(cartesian); } - // This one is defined later since it requires `static_cast_helper` specialization - /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template - explicit operator vector() const noexcept + constexpr inline explicit operator vector() const noexcept { return _static_cast, morton::code, Bits, D>>(*this); } #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + left_shift_operator leftShift; + // allOnes encodes a cartesian coordinate with all values set to 1 + this_t allOnes; + allOnes.value = leftShift(_static_cast(1), D) - _static_cast(1); + // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 + this_signed_t retVal; + retVal.value = (operator~() + allOnes).value; + return retVal; + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + this_t retVal; + retVal.value = _static_cast(uint64_t(0)); + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + retVal.value |= ((value | (~leftShift(Mask, i))) + (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); + } + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + this_t retVal; + retVal.value = _static_cast(uint64_t(0)); + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + retVal.value |= ((value & leftShift(Mask, i)) - (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); + } + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator==(NBL_CONST_REF_ARG(vector) rhs) + { + impl::Equals equals; + return equals(value, rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value != rhs.value; + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator==(rhs); + } }; } //namespace morton +template +struct left_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator valueLeftShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueLeftShift(operand.value, bits * D); + return retVal; + } +}; + +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + arithmetic_right_shift_operator valueArithmeticRightShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueArithmeticRightShift(operand.value, bits * D); + return retVal; + } +}; + +// This one's uglier - have to unpack to get the expected behaviour +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + vector cartesian = _static_cast >(operand); + cartesian >> scalar_t(bits); + return type_t::create(cartesian); + } +}; + +#ifndef __HLSL_VERSION + +template&& D* Bits <= 64) +constexpr inline morton::code morton::code::operator<<(uint16_t bits) const +{ + left_shift_operator> leftShift; + return leftShift(*this, bits); +} + +template&& D* Bits <= 64) +constexpr inline morton::code morton::code::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator> rightShift; + return rightShift(*this, bits); +} + +#endif + // Specialize the `static_cast_helper` namespace impl { + // I must be of same signedness as the morton code, and be wide enough to hold each component template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > @@ -355,6 +575,7 @@ struct static_cast_helper, morton::code, Bits, D, _u using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; arithmetic_right_shift_operator rightShift; vector cartesian; + [[unroll]] for (uint16_t i = 0; i < D; i++) { cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); diff --git a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl new file mode 100644 index 0000000000..ac081234ac --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl @@ -0,0 +1,30 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_UINT_64 to force using emulated uint64 + +namespace nbl +{ +namespace hlsl +{ +template +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_UINT_64 +using portable_uint64_t = emulated_uint64_t; +#else +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +#endif + +#else +using portable_uint64_t = uint64_t; +#endif + +//static_assert(sizeof(portable_uint64_t) == sizeof(uint64_t)); + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index ace199e20b..dcaea97739 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -36,19 +36,37 @@ template using portable_vector_t4 = portable_vector_t; #ifdef __HLSL_VERSION +// Float template using portable_float64_t2 = portable_vector_t2 >; template using portable_float64_t3 = portable_vector_t3 >; template using portable_float64_t4 = portable_vector_t4 >; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2 >; +template +using portable_uint64_t3 = portable_vector_t3 >; +template +using portable_uint64_t4 = portable_vector_t4 >; #else +// Float template using portable_float64_t2 = portable_vector_t2; template using portable_float64_t3 = portable_vector_t3; template using portable_float64_t4 = portable_vector_t4; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2; +template +using portable_uint64_t3 = portable_vector_t3; +template +using portable_uint64_t4 = portable_vector_t4; #endif } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index a11a26d69a..d7005a1ed6 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -219,6 +219,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/uint64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 From b6b70030434018a9e70ea4c52c86d48c135cc94e Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 7 Apr 2025 19:41:08 -0300 Subject: [PATCH 18/28] Final Mortons --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 488 ++++++++++++++++++ .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 232 --------- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 3 +- include/nbl/builtin/hlsl/morton.hlsl | 107 +++- .../nbl/builtin/hlsl/portable/int64_t.hlsl | 31 ++ .../nbl/builtin/hlsl/portable/uint64_t.hlsl | 30 -- .../nbl/builtin/hlsl/portable/vector_t.hlsl | 17 + src/nbl/builtin/CMakeLists.txt | 4 +- 8 files changed, 641 insertions(+), 271 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/int64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/emulated/uint64_t.hlsl create mode 100644 include/nbl/builtin/hlsl/portable/int64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/portable/uint64_t.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl new file mode 100644 index 0000000000..f3269cc6ba --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -0,0 +1,488 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +struct emulated_uint64_t +{ + using storage_t = vector; + using this_t = emulated_uint64_t; + + storage_t data; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_uint64_t() = default; + + #endif + + /** + * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + this_t retVal; + retVal.data = _data; + return retVal; + } + + /** + * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `uint64` being emulated + * @param [in] lo Lowest 32 bits of the `uint64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return create(storage_t(hi, lo)); + } + + /** + * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * + * @param [in] _data `uint64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) + { + return create(_static_cast(u >> 32), _static_cast(u)); + } + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data & rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data | rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data ^ rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(~data); + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); + const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const this_t retVal = create(addResult); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); + const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const this_t retVal = create(subResult); + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return data.x == rhs.data.x && data.y == rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return data.x != rhs.data.x || data.y != rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return data.x < rhs.data.x; + else + return data.y < rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return data.x > rhs.data.x; + else + return data.y > rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator>(rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator<(rhs); + } +}; + +struct emulated_int64_t : emulated_uint64_t +{ + using base_t = emulated_uint64_t; + using base_t::storage_t; + using this_t = emulated_int64_t; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_int64_t() = default; + + #endif + + /** + * @brief Creates an `emulated_int64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `int64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + return _static_cast(base_t::create(_data)); + } + + /** + * @brief Creates an `emulated_int64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `int64` being emulated + * @param [in] lo Lowest 32 bits of the `int64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return _static_cast(base_t::create(hi, lo)); + } + + /** + * @brief Creates an `emulated_int64_t` from a `int64_t`. Useful for compile-time encoding. + * + * @param [in] _data `int64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(int64_t) i) + { + return _static_cast(base_t::create(_static_cast(i))); + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + // Only this one needs to be redefined since it's arithmetic + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + // Same as unsigned but the topmost bits are compared as signed + NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return _static_cast(data.x) < _static_cast(rhs.data.x); + else + return data.y < rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return _static_cast(data.x) > _static_cast(rhs.data.x); + else + return data.y > rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator>(rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator<(rhs); + } +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_int64_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator leftShift; + return _static_cast(leftShift(_static_cast(operand), bits)); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + // Also the right shift *only* in the top bits happens as a signed arithmetic right shift + const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; + return emulated_int64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + +namespace impl +{ + +template<> +struct static_cast_helper +{ + using To = emulated_uint64_t; + using From = emulated_int64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + To retVal; + retVal.data = i.data; + return retVal; + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_int64_t; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + To retVal; + retVal.data = u.data; + return retVal; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return _static_cast(u.data.y); + } +}; + +template<> +struct static_cast_helper +{ + using To = uint64_t; + using From = emulated_uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + const To highBits = _static_cast(u.data.x) << To(32); + return highBits | _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_uint64_t; + using From = uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(u); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +{ + using To = Signed; + using From = emulated_int64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return _static_cast(i.data.y); + } +}; + +template<> +struct static_cast_helper +{ + using To = int64_t; + using From = emulated_int64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + const To highBits = _static_cast(i.data.x) << To(32); + return highBits | _static_cast(i.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +{ + using To = emulated_int64_t; + using From = Signed; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return emulated_int64_t::create(uint32_t(0), _static_cast(i)); + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_int64_t; + using From = int64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return emulated_int64_t::create(i); + } +}; + +} //namespace impl + +} //namespace nbl +} //namespace hlsl + +#ifndef __HLSL_VERSION +#define NBL_ADD_STD std:: +#else +#define NBL_ADD_STD nbl::hlsl:: +#endif + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +#undef NBL_ADD_STD + + + +#endif diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl deleted file mode 100644 index ab08e1ff38..0000000000 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ /dev/null @@ -1,232 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/functional.hlsl" -#include "nbl/builtin/hlsl/concepts/core.hlsl" - -namespace nbl -{ -namespace hlsl -{ - -struct emulated_uint64_t -{ - using storage_t = vector; - using this_t = emulated_uint64_t; - - storage_t data; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - - #ifndef __HLSL_VERSION - - emulated_uint64_t() = default; - - #endif - - /** - * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - this_t retVal; - retVal.data = _data; - return retVal; - } - - /** - * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern - * - * @param [in] hi Highest 32 bits of the `uint64` being emulated - * @param [in] lo Lowest 32 bits of the `uint64` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) - { - return create(storage_t(hi, lo)); - } - - /** - * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. - * - * @param [in] _data `uint64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) - { - return create(_static_cast(u >> 32), _static_cast(u)); - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data & rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data | rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data ^ rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(~data); - return retVal; - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - constexpr inline this_t operator<<(uint16_t bits) const; - - constexpr inline this_t operator>>(uint16_t bits) const; - - #endif - - // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); - const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; - const this_t retVal = create(addResult); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); - const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; - const this_t retVal = create(subResult); - return retVal; - } - -}; - -template<> -struct left_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t higherBitsMask = (~uint32_t(0)) << shift; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; - return emulated_uint64_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> _bits, ((operand.data.x & lowerBitsMask) << shift) | (operand.data.y >> _bits) }; - return emulated_uint64_t::create(retValData); - } -}; - -#ifndef __HLSL_VERSION - -constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const -{ - left_shift_operator leftShift; - return leftShift(*this, bits); -} - -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -#endif - -namespace impl -{ - -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > -{ - using To = Unsigned; - using From = emulated_uint64_t; - - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return _static_cast(u.data.y); - } -}; - -template<> -struct static_cast_helper -{ - using To = uint64_t; - using From = emulated_uint64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - const To highBits = _static_cast(u.data.x) << To(32); - return highBits | _static_cast(u.data.y); - } -}; - -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > -{ - using To = emulated_uint64_t; - using From = Unsigned; - - // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); - } -}; - -template<> -struct static_cast_helper -{ - using To = emulated_uint64_t; - using From = uint64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return emulated_uint64_t::create(u); - } -}; - -} //namespace impl - -} //namespace nbl -} //namespace hlsl - - - -#endif diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index a106cec440..65a97bbe68 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,7 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include -#include +#include #include #include #include @@ -331,6 +331,7 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 499deb1db4..9c834424a8 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -5,7 +5,7 @@ #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" -#include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" +#include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" namespace nbl @@ -275,14 +275,15 @@ struct Equals; template struct Equals { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; vector retVal; [[unroll]] for (uint16_t i = 0; i < D; i++) { - retVal[i] = (_value & rhs[i]) == rhs[i]; + retVal[i] = (value & leftShift(Mask, i)) == leftShift(rhs[i], i); } return retVal; } @@ -293,7 +294,7 @@ struct Equals { template NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using U = make_unsigned_t; vector interleaved; @@ -303,10 +304,77 @@ struct Equals interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); } Equals equals; - return equals(_value, interleaved); + return equals(value, interleaved); + } +}; + +template +struct BaseComparison; + +// Aux method for extracting highest bit, used by the comparison below +template +NBL_CONSTEXPR_INLINE_FUNC storage_t extractHighestBit(storage_t value, uint16_t coord) +{ + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const uint16_t coordHighestBitIdx = Bits / D - ((coord < Bits % D) ? uint16_t(0) : uint16_t(1)); + // This is the index of that bit as an index in the encoded value + const uint16_t shift = coordHighestBitIdx * D + coord; + left_shift_operator leftShift; + return value & leftShift(_static_cast(uint16_t(1)), shift); +} + +template +struct BaseComparison +{ + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + vector retVal; + ComparisonOp comparison; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + storage_t thisCoord = value & leftShift(Mask, i); + storage_t rhsCoord = leftShift(rhs[i], i); + // If coordinate is negative, we add 1s in every bit not corresponding to coord + if (extractHighestBit(thisCoord) != _static_cast(uint64_t(0))) + thisCoord = thisCoord | ~leftShift(Mask, i); + if (extractHighestBit(rhsCoord) != _static_cast(uint64_t(0))) + rhsCoord = rhsCoord | ~leftShift(Mask, i); + retVal[i] = comparison(thisCoord, rhsCoord); + } + return retVal; + } +}; + +template +struct BaseComparison +{ + template + NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > + operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + using U = make_unsigned_t; + vector interleaved; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); + } + BaseComparison baseComparison; + return baseComparison(value, interleaved); } }; +template +struct LessThan : BaseComparison > {}; + +template +struct LessEquals : BaseComparison > {}; + + } //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 @@ -490,8 +558,35 @@ struct code template enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) { - return !operator==(rhs); + return !operator== (rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<(NBL_CONST_REF_ARG(vector) rhs) + { + impl::LessThan lessThan; + return lessThan(value, rhs); } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<=(NBL_CONST_REF_ARG(vector) rhs) + { + impl::LessEquals lessEquals; + return lessEquals(value, rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator<= (rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>=(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator< (rhs); + } + }; } //namespace morton diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl new file mode 100644 index 0000000000..6929e160fa --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -0,0 +1,31 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_INT_64 to force using emulated int64 types + +namespace nbl +{ +namespace hlsl +{ +template +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_INT_64 +using portable_uint64_t = emulated_uint64_t; +using portable_int64_t = emulated_int64_t; +#else +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; +#endif + +#else +using portable_uint64_t = uint64_t; +using portable_int64_t = int64_t; +#endif + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl deleted file mode 100644 index ac081234ac..0000000000 --- a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ -#define _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ - -#include -#include - -// define NBL_FORCE_EMULATED_UINT_64 to force using emulated uint64 - -namespace nbl -{ -namespace hlsl -{ -template -#ifdef __HLSL_VERSION -#ifdef NBL_FORCE_EMULATED_UINT_64 -using portable_uint64_t = emulated_uint64_t; -#else -using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; -#endif - -#else -using portable_uint64_t = uint64_t; -#endif - -//static_assert(sizeof(portable_uint64_t) == sizeof(uint64_t)); - -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index dcaea97739..16d5b40f81 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -3,6 +3,7 @@ #include #include +#include namespace nbl { @@ -51,6 +52,14 @@ template using portable_uint64_t3 = portable_vector_t3 >; template using portable_uint64_t4 = portable_vector_t4 >; + +//Int +template +using portable_int64_t2 = portable_vector_t2 >; +template +using portable_int64_t3 = portable_vector_t3 >; +template +using portable_int64_t4 = portable_vector_t4 >; #else // Float template @@ -67,6 +76,14 @@ template using portable_uint64_t3 = portable_vector_t3; template using portable_uint64_t4 = portable_vector_t4; + +// Int +template +using portable_int64_t2 = portable_vector_t2; +template +using portable_int64_t3 = portable_vector_t3; +template +using portable_int64_t4 = portable_vector_t4; #endif } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index d7005a1ed6..f03d8ae22c 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -214,12 +214,12 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/uint64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/uint64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 From 60ff99a4dadfdecc5abf59e4fb2d95e62d6ed929 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 7 Apr 2025 23:20:42 -0300 Subject: [PATCH 19/28] Clean up the emulated int code, fix some constant creation in the morton code --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 317 ++++++++---------- include/nbl/builtin/hlsl/morton.hlsl | 15 +- 2 files changed, 161 insertions(+), 171 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index f3269cc6ba..cad10242f2 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -5,31 +5,35 @@ #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" +// Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs +// and whether the topmost bits of the divisor are equal to 0 +// - Francisco + namespace nbl { namespace hlsl { -struct emulated_uint64_t +template +struct emulated_int64_base { - using storage_t = vector; - using this_t = emulated_uint64_t; + using storage_t = vector; + using this_t = emulated_int64_base; - storage_t data; + storage_t data; // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - #ifndef __HLSL_VERSION - emulated_uint64_t() = default; + emulated_int64_base() = default; #endif /** - * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { @@ -39,10 +43,10 @@ struct emulated_uint64_t } /** - * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern * - * @param [in] hi Highest 32 bits of the `uint64` being emulated - * @param [in] lo Lowest 32 bits of the `uint64` being emulated + * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated + * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) { @@ -50,9 +54,9 @@ struct emulated_uint64_t } /** - * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * @brief Creates an `emulated_int64_base` from a `uint64_t` with its bitpattern. Useful for compile-time encoding. * - * @param [in] _data `uint64_t` to be unpacked into high and low bits + * @param [in] u `uint64_t` to be unpacked into high and low bits */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) { @@ -126,7 +130,15 @@ struct emulated_uint64_t NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { if (data.x != rhs.data.x) - return data.x < rhs.data.x; + { + // If signed, compare topmost bits as signed + NBL_IF_CONSTEXPR(Signed) + return _static_cast(data.x) < _static_cast(rhs.data.x); + // If unsigned, compare them as-is + else + return data.x < rhs.data.x; + } + // Lower bits are positive in both signed and unsigned else return data.y < rhs.data.y; } @@ -134,7 +146,14 @@ struct emulated_uint64_t NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { if (data.x != rhs.data.x) - return data.x > rhs.data.x; + { + // If signed, compare topmost bits as signed + NBL_IF_CONSTEXPR(Signed) + return _static_cast(data.x) > _static_cast(rhs.data.x); + // If unsigned, compare them as-is + else + return data.x > rhs.data.x; + } else return data.y > rhs.data.y; } @@ -150,94 +169,15 @@ struct emulated_uint64_t } }; -struct emulated_int64_t : emulated_uint64_t -{ - using base_t = emulated_uint64_t; - using base_t::storage_t; - using this_t = emulated_int64_t; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - +using emulated_uint64_t = emulated_int64_base; +using emulated_int64_t = emulated_int64_base; - #ifndef __HLSL_VERSION - - emulated_int64_t() = default; - - #endif +// ---------------------- Functional operatos ------------------------ - /** - * @brief Creates an `emulated_int64_t` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `int64_t` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - return _static_cast(base_t::create(_data)); - } - - /** - * @brief Creates an `emulated_int64_t` from two `uint32_t`s representing its bitpattern - * - * @param [in] hi Highest 32 bits of the `int64` being emulated - * @param [in] lo Lowest 32 bits of the `int64` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) - { - return _static_cast(base_t::create(hi, lo)); - } - - /** - * @brief Creates an `emulated_int64_t` from a `int64_t`. Useful for compile-time encoding. - * - * @param [in] _data `int64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(int64_t) i) - { - return _static_cast(base_t::create(_static_cast(i))); - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - // Only this one needs to be redefined since it's arithmetic - constexpr inline this_t operator>>(uint16_t bits) const; - - #endif - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - - // Same as unsigned but the topmost bits are compared as signed - NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - if (data.x != rhs.data.x) - return _static_cast(data.x) < _static_cast(rhs.data.x); - else - return data.y < rhs.data.y; - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - if (data.x != rhs.data.x) - return _static_cast(data.x) > _static_cast(rhs.data.x); - else - return data.y > rhs.data.y; - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator>(rhs); - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator<(rhs); - } -}; - -template<> -struct left_shift_operator +template +struct left_shift_operator > { - using type_t = emulated_uint64_t; + using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) @@ -248,7 +188,7 @@ struct left_shift_operator const uint32_t shift = ComponentBitWidth - _bits; // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; - return emulated_uint64_t::create(retValData); + return type_t::create(retValData); } }; @@ -270,18 +210,6 @@ struct arithmetic_right_shift_operator } }; -template<> -struct left_shift_operator -{ - using type_t = emulated_int64_t; - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - left_shift_operator leftShift; - return _static_cast(leftShift(_static_cast(operand), bits)); - } -}; - template<> struct arithmetic_right_shift_operator { @@ -303,7 +231,8 @@ struct arithmetic_right_shift_operator #ifndef __HLSL_VERSION -constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +template +constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint16_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); @@ -356,113 +285,163 @@ struct static_cast_helper } }; -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { - using To = Unsigned; - using From = emulated_uint64_t; + using To = I; + using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - return _static_cast(u.data.y); + return _static_cast(val.data.y); } }; -template<> -struct static_cast_helper +template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) +struct static_cast_helper NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > { - using To = uint64_t; - using From = emulated_uint64_t; + using To = I; + using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - const To highBits = _static_cast(u.data.x) << To(32); - return highBits | _static_cast(u.data.y); + const To highBits = _static_cast(val.data.x) << To(32); + return highBits | _static_cast(val.data.y); } }; -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) > { - using To = emulated_uint64_t; - using From = Unsigned; + using To = emulated_int64_base; + using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + return To::create(uint32_t(0), _static_cast(i)); } }; -template<> -struct static_cast_helper +template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > { - using To = emulated_uint64_t; - using From = uint64_t; + using To = emulated_int64_base; + using From = I; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return emulated_uint64_t::create(u); + return To::create(_static_cast(i)); } }; -template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +} //namespace impl + +// ---------------------- STD arithmetic operators ------------------------ +// Specializations of the structs found in functional.hlsl +// These all have to be specialized because of the identity that can't be initialized inside the struct definition + +template +struct plus > { - using To = Signed; - using From = emulated_int64_t; + using type_t = emulated_int64_base; - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return _static_cast(i.data.y); + return lhs + rhs; } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template<> -struct static_cast_helper +template +struct minus > { - using To = int64_t; - using From = emulated_int64_t; + using type_t = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - const To highBits = _static_cast(i.data.x) << To(32); - return highBits | _static_cast(i.data.y); + return lhs - rhs; } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > -{ - using To = emulated_int64_t; - using From = Signed; +#ifdef __HLSL_VERSION +template<> +NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +template<> +NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); +#endif - // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) +// --------------------------------- Compound assignment operators ------------------------------------------ +// Specializations of the structs found in functional.hlsl + +template +struct plus_assign > +{ + using type_t = emulated_int64_base; + using base_t = plus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return emulated_int64_t::create(uint32_t(0), _static_cast(i)); + lhs = baseOp(lhs, rhs); } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template<> -struct static_cast_helper +template +struct minus_assign > { - using To = emulated_int64_t; - using From = int64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + using type_t = emulated_int64_base; + using base_t = minus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return emulated_int64_t::create(i); + lhs = baseOp(lhs, rhs); } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -} //namespace impl +#ifdef __HLSL_VERSION +template<> +NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +template<> +NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; +#endif } //namespace nbl } //namespace hlsl +// Declare them as signed/unsigned versions of each other + #ifndef __HLSL_VERSION #define NBL_ADD_STD std:: #else diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9c834424a8..e2ae3d8b0a 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -39,17 +39,28 @@ NBL_CONSTEXPR T decode_mask_v = decode_mask::value; // --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- // Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y +#ifndef __HLSL_VERSION + #define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ {\ NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ }; -#ifndef __HLSL_VERSION - #define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) #else +#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +{\ + NBL_CONSTEXPR_STATIC_INLINE T value;\ +};\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint16_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint32_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ + #define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ {\ NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ From 55601628733ca20218f0c13d481e0c1df29bed1a Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 8 Apr 2025 19:44:15 -0300 Subject: [PATCH 20/28] Addressing latest PR review. Generic overloads for of different functional structs blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7325 --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 218 +++++++++--------- include/nbl/builtin/hlsl/functional.hlsl | 102 ++++++++ include/nbl/builtin/hlsl/morton.hlsl | 2 + .../nbl/builtin/hlsl/portable/int64_t.hlsl | 7 +- 4 files changed, 218 insertions(+), 111 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index cad10242f2..45cb82ed78 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -4,6 +4,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" // Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs // and whether the topmost bits of the divisor are equal to 0 @@ -35,7 +36,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -48,19 +49,9 @@ struct emulated_int64_base * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { - return create(storage_t(hi, lo)); - } - - /** - * @brief Creates an `emulated_int64_base` from a `uint64_t` with its bitpattern. Useful for compile-time encoding. - * - * @param [in] u `uint64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) - { - return create(_static_cast(u >> 32), _static_cast(u)); + return create(storage_t(lo, hi)); } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- @@ -92,9 +83,9 @@ struct emulated_int64_base // Only valid in CPP #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint16_t bits) const; + constexpr inline this_t operator<<(this_t bits) const; - constexpr inline this_t operator>>(uint16_t bits) const; + constexpr inline this_t operator>>(this_t bits) const; #endif @@ -102,16 +93,16 @@ struct emulated_int64_base NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); - const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const spirv::AddCarryOutput lowerAddResult = addCarry(data.x, rhs.data.x); + const storage_t addResult = { lowerAddResult.result, data.y + rhs.data.y + lowerAddResult.carry }; const this_t retVal = create(addResult); return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); - const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.x, rhs.data.x); + const storage_t subResult = { lowerSubResult.result, data.y - rhs.data.y - lowerSubResult.borrow }; const this_t retVal = create(subResult); return retVal; } @@ -172,86 +163,6 @@ struct emulated_int64_base using emulated_uint64_t = emulated_int64_base; using emulated_int64_t = emulated_int64_base; -// ---------------------- Functional operatos ------------------------ - -template -struct left_shift_operator > -{ - using type_t = emulated_int64_base; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; - return type_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; - return emulated_uint64_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_int64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - // Also the right shift *only* in the top bits happens as a signed arithmetic right shift - const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; - return emulated_int64_t::create(retValData); - } -}; - -#ifndef __HLSL_VERSION - -template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint16_t bits) const -{ - left_shift_operator leftShift; - return leftShift(*this, bits); -} - -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -#endif - namespace impl { @@ -285,7 +196,7 @@ struct static_cast_helper } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { using To = I; @@ -294,25 +205,24 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con // Return only the lowest bits NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - return _static_cast(val.data.y); + return _static_cast(val.data.x); } }; -template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) -struct static_cast_helper NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > { using To = I; using From = emulated_int64_base; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - const To highBits = _static_cast(val.data.x) << To(32); - return highBits | _static_cast(val.data.y); + return bit_cast(val.data); } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { using To = emulated_int64_base; using From = I; @@ -324,20 +234,108 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con } }; -template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v ) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > { using To = emulated_int64_base; using From = I; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return To::create(_static_cast(i)); + To retVal; + retVal.data = bit_cast(i); + return retVal; } }; } //namespace impl +// ---------------------- Functional operators ------------------------ + +template +struct left_shift_operator > +{ + using type_t = emulated_int64_base; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << bits) | (operand.data.y >> shift), operand.data.y << bits }; + return type_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + // Also the right shift *only* in the top bits happens as a signed arithmetic right shift + const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; + return emulated_int64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +template +constexpr inline emulated_int64_base emulated_int64_base::operator<<(this_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(this_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(this_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + // ---------------------- STD arithmetic operators ------------------------ // Specializations of the structs found in functional.hlsl // These all have to be specialized because of the identity that can't be initialized inside the struct definition diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index e5486e2727..cc95633f44 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/limits.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" namespace nbl @@ -217,6 +218,56 @@ struct left_shift_operator } }; +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct left_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand << bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(! (concepts::IntVector) && concepts::Vectorial) +struct left_shift_operator) && concepts::Vectorial) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, leftShift(operand.getComponent(i), bits.getComponent(i))); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, leftShift(operand.getComponent(i), bits)); + } + return shifted; + } +}; + template struct arithmetic_right_shift_operator { @@ -228,6 +279,57 @@ struct arithmetic_right_shift_operator } }; +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand >> bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, rightShift(operand.getComponent(i), bits.getComponent(i))); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, rightShift(operand.getComponent(i), bits)); + } + return shifted; + } +}; + +// Left unimplemented for vectorial types by default template struct logical_right_shift_operator { diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e2ae3d8b0a..ea583fddfa 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,6 +8,8 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" +// TODO: mega macro to get functional plus, minus, plus_assign, minus_assign + namespace nbl { namespace hlsl diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl index 6929e160fa..2dffa40a2d 100644 --- a/include/nbl/builtin/hlsl/portable/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -10,18 +10,23 @@ namespace nbl { namespace hlsl { -template #ifdef __HLSL_VERSION #ifdef NBL_FORCE_EMULATED_INT_64 +template using portable_uint64_t = emulated_uint64_t; +template using portable_int64_t = emulated_int64_t; #else +template using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +template using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; #endif #else +template using portable_uint64_t = uint64_t; +template using portable_int64_t = int64_t; #endif From e50c56b52e873da965804153eba64b3cb133c4a3 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 9 Apr 2025 00:23:55 -0300 Subject: [PATCH 21/28] Bunch of emulated int64 fixes regarding creation, comparison operators and left/right shifts --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 95 +++++++++---------- 1 file changed, 44 insertions(+), 51 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 45cb82ed78..98fcf2835b 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -34,7 +34,7 @@ struct emulated_int64_base /** * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { @@ -54,6 +54,18 @@ struct emulated_int64_base return create(storage_t(lo, hi)); } + // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + { + return data.x; + } + + NBL_CONSTEXPR_INLINE_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC + { + return data.y; + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -93,60 +105,42 @@ struct emulated_int64_base NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.x, rhs.data.x); - const storage_t addResult = { lowerAddResult.result, data.y + rhs.data.y + lowerAddResult.carry }; - const this_t retVal = create(addResult); + const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); + const this_t retVal = create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.x, rhs.data.x); - const storage_t subResult = { lowerSubResult.result, data.y - rhs.data.y - lowerSubResult.borrow }; - const this_t retVal = create(subResult); + const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); + const this_t retVal = create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); return retVal; } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return data.x == rhs.data.x && data.y == rhs.data.y; + return all(data == rhs.data); } NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return data.x != rhs.data.x || data.y != rhs.data.y; + return any(data != rhs.data); } NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - if (data.x != rhs.data.x) - { - // If signed, compare topmost bits as signed - NBL_IF_CONSTEXPR(Signed) - return _static_cast(data.x) < _static_cast(rhs.data.x); - // If unsigned, compare them as-is - else - return data.x < rhs.data.x; - } - // Lower bits are positive in both signed and unsigned - else - return data.y < rhs.data.y; + // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less + // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() < rhs.__getLSB()))); } NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - if (data.x != rhs.data.x) - { - // If signed, compare topmost bits as signed - NBL_IF_CONSTEXPR(Signed) - return _static_cast(data.x) > _static_cast(rhs.data.x); - // If unsigned, compare them as-is - else - return data.x > rhs.data.x; - } - else - return data.y > rhs.data.y; + // Same reasoning as above + const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() > rhs.__getLSB()))); } NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -260,15 +254,15 @@ struct left_shift_operator > // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0` in LSB and the result of `uint32_t(1) << 32` in your architecture in MSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << bits) | (operand.data.y >> shift), operand.data.y << bits }; - return type_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + return bits ? shifted : operand; } }; @@ -280,15 +274,15 @@ struct arithmetic_right_shift_operator // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; - return emulated_uint64_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + return bits ? shifted : operand; } }; @@ -300,16 +294,15 @@ struct arithmetic_right_shift_operator // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0xFFFFFFFF` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - // Also the right shift *only* in the top bits happens as a signed arithmetic right shift - const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; - return emulated_int64_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(uint32_t(int32_t(operand.__getMSB()) >> bits), ~uint32_t(0)) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); + return bits ? shifted : operand; } }; From b1de9c37b2e2572ea13163f241e9fab0a044bb8e Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 9 Apr 2025 16:24:21 -0300 Subject: [PATCH 22/28] Fix automatic specialize macro in cpp compat intrinsics, add intrinsic and generic ternary operator that should work for all compatible types, address PR review comments --- include/nbl/builtin/hlsl/complex.hlsl | 16 -------- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 38 +++++++++++++++++-- .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 6 +++ .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 36 +++++++++++------- include/nbl/builtin/hlsl/functional.hlsl | 21 +++++++++- .../builtin/hlsl/spirv_intrinsics/core.hlsl | 6 +++ 6 files changed, 89 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl index 6728a9bf3d..a3a9f387d0 100644 --- a/include/nbl/builtin/hlsl/complex.hlsl +++ b/include/nbl/builtin/hlsl/complex.hlsl @@ -427,22 +427,6 @@ complex_t rotateRight(NBL_CONST_REF_ARG(complex_t) value) return retVal; } -template -struct ternary_operator< complex_t > -{ - using type_t = complex_t; - - complex_t operator()(bool condition, NBL_CONST_REF_ARG(complex_t) lhs, NBL_CONST_REF_ARG(complex_t) rhs) - { - const vector lhsVector = vector(lhs.real(), lhs.imag()); - const vector rhsVector = vector(rhs.real(), rhs.imag()); - const vector resultVector = condition ? lhsVector : rhsVector; - const complex_t result = { resultVector.x, resultVector.y }; - return result; - } -}; - - } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 92fc9e929b..e1ba823b9b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -75,6 +75,8 @@ template struct all_helper; template struct any_helper; +template +struct select_helper; template struct bitReverseAs_helper; template @@ -121,8 +123,8 @@ struct subBorrow_helper; // the template<> needs to be written ourselves // return type is __VA_ARGS__ to protect against `,` in templated return types #define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, SPIRV_FUNCTION_NAME, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\ -NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ -struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ +NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ +struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ {\ using return_t = __VA_ARGS__;\ static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\ @@ -143,8 +145,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length, template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSqrt, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (B)(T), (B)(T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T) @@ -633,6 +636,35 @@ struct subBorrow_helper } }; +template +NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) +struct select_helper) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + return condition ? object1 : object2; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) +struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + using traits = hlsl::vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 1f1957dbbd..284ba564d7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -150,6 +150,12 @@ inline bool any(Vector vec) return cpp_compat_intrinsics_impl::any_helper::__call(vec); } +template +NBL_CONSTEXPR_INLINE_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +{ + return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); +} + /** * @brief Returns x - floor(x). * diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 98fcf2835b..53881423e9 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -132,15 +132,19 @@ struct emulated_int64_base { // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSBEqual = __getMSB() == rhs.__getMSB(); const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); - return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() < rhs.__getLSB()))); + const bool LSB = __getLSB() < rhs.__getLSB(); + return MSBEqual ? LSB : MSB; } NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Same reasoning as above + const bool MSBEqual = __getMSB() == rhs.__getMSB(); const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); - return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() > rhs.__getLSB()))); + const bool LSB = __getLSB() > rhs.__getLSB(); + return MSBEqual ? LSB : MSB; } NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -259,10 +263,12 @@ struct left_shift_operator > NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(0, operand.__getLSB() << shift) - : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - return bits ? shifted : operand; + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; @@ -279,10 +285,12 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(operand.__getMSB() >> shift, 0) - : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); - return bits ? shifted : operand; + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; @@ -299,10 +307,12 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(uint32_t(int32_t(operand.__getMSB()) >> bits), ~uint32_t(0)) + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); - return bits ? shifted : operand; + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index cc95633f44..51ee4f4829 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -201,9 +201,26 @@ struct ternary_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) { - return condition ? lhs : rhs; + return select(condition, lhs, rhs); + } +}; + +template +struct ternary_operator > +{ + using type_t = T; + using traits = hlsl::vector_traits; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + { + return select(condition, lhs, rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(vector) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + { + return select, T>(condition, lhs, rhs); } }; diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index d8d90de726..8add7a9ed3 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -12,6 +12,7 @@ #include #include #include +#include namespace nbl { @@ -335,6 +336,11 @@ template [[vk::ext_instruction(spv::OpAny)]] enable_if_t&& is_same_v::scalar_type, bool>, BooleanVector> any(BooleanVector vec); +// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q) +template && (! concepts::Vector || (concepts::Vector && (extent_v == extent_v)))) +[[vk::ext_instruction(spv::OpSelect)]] +ResultType select(Condition condition, ResultType object1, ResultType object2); + template) [[vk::ext_instruction(spv::OpIAddCarry)]] AddCarryOutput addCarry(T operand1, T operand2); From ea8cd43756146225058dcfbc1ddf4d254b0fd579 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 11 Apr 2025 12:39:16 -0300 Subject: [PATCH 23/28] Checkpoint: adding a bunch of operators to emulated vector types --- include/nbl/builtin/hlsl/concepts/core.hlsl | 10 + include/nbl/builtin/hlsl/concepts/vector.hlsl | 2 + include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 + .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 108 ++++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 4 +- include/nbl/builtin/hlsl/functional.hlsl | 101 ++++-- include/nbl/builtin/hlsl/morton.hlsl | 290 +++++------------- include/nbl/builtin/hlsl/type_traits.hlsl | 2 + 8 files changed, 236 insertions(+), 283 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index dcbafae8a5..4a8b848cb8 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -74,12 +74,22 @@ struct is_emulating_floating_point_scalar { NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; }; + +template +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = IntegralScalar; +}; } //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) template NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; +//! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t) +template +NBL_BOOL_CONCEPT IntegralLikeScalar = impl::is_emulating_integral_scalar::value; + } } } diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl index 468838730a..3ea3199951 100644 --- a/include/nbl/builtin/hlsl/concepts/vector.hlsl +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -40,6 +40,8 @@ NBL_BOOL_CONCEPT FloatingPointLikeVectorial = concepts::Vectorial && concepts template NBL_BOOL_CONCEPT IntVectorial = concepts::Vectorial && (is_integral_v::scalar_type>); template +NBL_BOOL_CONCEPT IntegralLikeVectorial = concepts::Vectorial && concepts::IntegralLikeScalar::scalar_type>; +template NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::SignedIntegralScalar::scalar_type>; } diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 77d9d887bd..81bdf32c19 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -11,6 +11,7 @@ #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static +#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline @@ -45,6 +46,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static +#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_STATIC_FUNC static #define NBL_CONSTEXPR_INLINE_FUNC inline diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 53881423e9..ca51b0060a 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -94,10 +94,8 @@ struct emulated_int64_base // Only valid in CPP #ifndef __HLSL_VERSION - - constexpr inline this_t operator<<(this_t bits) const; - - constexpr inline this_t operator>>(this_t bits) const; + constexpr inline this_t operator<<(uint32_t bits) const; + constexpr inline this_t operator>>(uint32_t bits) const; #endif @@ -256,13 +254,12 @@ struct left_shift_operator > using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0` in LSB and the result of `uint32_t(1) << 32` in your architecture in MSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) @@ -270,6 +267,12 @@ struct left_shift_operator > ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; template<> @@ -278,13 +281,12 @@ struct arithmetic_right_shift_operator using type_t = emulated_uint64_t; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) @@ -292,6 +294,12 @@ struct arithmetic_right_shift_operator ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; template<> @@ -300,13 +308,12 @@ struct arithmetic_right_shift_operator using type_t = emulated_int64_t; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0xFFFFFFFF` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) @@ -314,24 +321,30 @@ struct arithmetic_right_shift_operator ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; #ifndef __HLSL_VERSION template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(this_t bits) const +constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint32_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); } -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(this_t bits) const +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); } -constexpr inline emulated_int64_t emulated_int64_t::operator>>(this_t bits) const +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); @@ -353,11 +366,7 @@ struct plus > return lhs + rhs; } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; template @@ -370,23 +379,17 @@ struct minus > return lhs - rhs; } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; -#ifdef __HLSL_VERSION template<> -NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); -#endif +NBL_CONSTEXPR_INLINE emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -402,11 +405,7 @@ struct plus_assign > lhs = baseOp(lhs, rhs); } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; template @@ -420,23 +419,30 @@ struct minus_assign > lhs = baseOp(lhs, rhs); } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; -#ifdef __HLSL_VERSION template<> -NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; -#endif +NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; + +// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- +namespace concepts +{ +namespace impl +{ +template +struct is_emulating_integral_scalar > +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; +} +} } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 65a97bbe68..4d7c3839d9 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -330,8 +330,8 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) - DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) - DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 51ee4f4829..93687bdb6a 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -207,23 +207,6 @@ struct ternary_operator } }; -template -struct ternary_operator > -{ - using type_t = T; - using traits = hlsl::vector_traits; - - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select(condition, lhs, rhs); - } - - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(vector) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select, T>(condition, lhs, rhs); - } -}; - template struct left_shift_operator { @@ -252,34 +235,68 @@ struct left_shift_operator) > } }; -template NBL_PARTIAL_REQ_TOP(! (concepts::IntVector) && concepts::Vectorial) -struct left_shift_operator) && concepts::Vectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::Vector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, leftShift(operand.getComponent(i), bits.getComponent(i))); + setter(shifted, i, leftShift(getter(operand, i), getter(bits, i))); } return shifted; } NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, leftShift(operand.getComponent(i), bits)); + setter(shifted, i, leftShift(getter(operand, i), bits)); } return shifted; } @@ -313,34 +330,68 @@ struct arithmetic_right_shift_operator NBL_PARTIAL_REQ_TOP(concepts::Vectorial) -struct arithmetic_right_shift_operator) > +template NBL_PARTIAL_REQ_TOP(!concepts::Vector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, rightShift(operand.getComponent(i), bits.getComponent(i))); + setter(shifted, i, rightShift(getter(operand, i), getter(bits, i))); } return shifted; } NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, rightShift(operand.getComponent(i), bits)); + setter(shifted, i, rightShift(getter(operand, i), bits)); } return shifted; } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index ea583fddfa..9e62e40c2a 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" +#include "nbl/builtin/hlsl/portable/vector_t.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -22,90 +23,67 @@ namespace impl // Valid dimension for a morton code template -NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; +NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; -// Basic decode masks - -template -struct decode_mask; +// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- -template -struct decode_mask : integral_constant {}; +NBL_CONSTEXPR uint16_t CodingStages = 5; -template -struct decode_mask : integral_constant::value << Dim) | T(1)> {}; +template +struct coding_mask; -template -NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +template +NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; -// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- -// Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y +// 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage +// mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). +#define NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(DIM, BASE_VALUE) template struct coding_mask\ +{\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t KilloffMask = _Bits * DIM < 64 ? (uint64_t(1) << (_Bits * DIM)) - 1 : ~uint64_t(0);\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE) & KilloffMask;\ +}; -#ifndef __HLSL_VERSION +#define NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(DIM, STAGE, BASE_VALUE) template struct coding_mask\ +{\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE);\ +}; -#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +// Final stage mask also counts exact number of bits, although maybe it's not necessary +#define NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template struct coding_mask\ {\ - NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ }; -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off -#else +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, uint64_t(0x30C30C30C30C30C3)) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, uint64_t(0xF00F00F00F00F00F)) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, uint64_t(0x00FF0000FF0000FF)) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, uint64_t(0xFFFF00000000FFFF)) // Groups bits by 16 on, 32 off -#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ -{\ - NBL_CONSTEXPR_STATIC_INLINE T value;\ -};\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint16_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint32_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ - -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ -{\ - NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ -};\ -NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE); -#endif +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) -#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK ;\ - NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE)\ - NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE)\ - template\ - NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; - -NBL_MORTON_DECODE_MASK(2, 0, uint64_t(0x5555555555555555)) // Groups bits by 1 on, 1 off -NBL_MORTON_DECODE_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off -NBL_MORTON_DECODE_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off -NBL_MORTON_DECODE_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off -NBL_MORTON_DECODE_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off -NBL_MORTON_DECODE_MASK(2, 5, uint64_t(0x00000000FFFFFFFF)) // Groups bits by 32 on, 32 off - -NBL_MORTON_DECODE_MASK(3, 0, uint64_t(0x1249249249249249)) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits -NBL_MORTON_DECODE_MASK(3, 1, uint64_t(0x01C0E070381C0E07)) // Groups bits by 3 on, 6 off -NBL_MORTON_DECODE_MASK(3, 2, uint64_t(0x0FC003F000FC003F)) // Groups bits by 6 on, 12 off -NBL_MORTON_DECODE_MASK(3, 3, uint64_t(0x0000FFF000000FFF)) // Groups bits by 12 on, 24 off -NBL_MORTON_DECODE_MASK(3, 4, uint64_t(0x0000000000FFFFFF)) // Groups bits by 24 on, 48 off - -NBL_MORTON_DECODE_MASK(4, 0, uint64_t(0x1111111111111111)) // Groups bits by 1 on, 3 off -NBL_MORTON_DECODE_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off -NBL_MORTON_DECODE_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off -NBL_MORTON_DECODE_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off -NBL_MORTON_DECODE_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off - -#undef NBL_MORTON_DECODE_MASK -#undef NBL_MORTON_EMULATED_DECODE_MASK -#undef NBL_MORTON_GENERIC_DECODE_MASK - -// ----------------------------------------------------------------- MORTON ENCODERS --------------------------------------------------- - -template -struct MortonEncoder; - -template -struct MortonEncoder<2, Bits, encode_t> +NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS + +#undef NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK + +// ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- + +template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +struct MortonEncoder { template NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) @@ -114,168 +92,70 @@ struct MortonEncoder<2, Bits, encode_t> encode_t encoded = _static_cast(decodedValue); NBL_IF_CONSTEXPR(Bits > 16) { - encoded = (encoded | leftShift(encoded, 16)) & morton_mask_2_4_v; + encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 8) { - encoded = (encoded | leftShift(encoded, 8)) & morton_mask_2_3_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 8 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 4) { - encoded = (encoded | leftShift(encoded, 4)) & morton_mask_2_2_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 4 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 2) { - encoded = (encoded | leftShift(encoded, 2)) & morton_mask_2_1_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 2 * (Dim - 1)); } - encoded = (encoded | leftShift(encoded, 1)) & morton_mask_2_0_v; - return encoded; - } -}; - -template -struct MortonEncoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) - { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); - NBL_IF_CONSTEXPR(Bits > 12) - { - encoded = (encoded | leftShift(encoded, 24)) & morton_mask_3_3_v; - } - NBL_IF_CONSTEXPR(Bits > 6) - { - encoded = (encoded | leftShift(encoded, 12)) & morton_mask_3_2_v; - } - NBL_IF_CONSTEXPR(Bits > 3) - { - encoded = (encoded | leftShift(encoded, 6)) & morton_mask_3_1_v; - } - encoded = (encoded | leftShift(encoded, 2) | leftShift(encoded, 4)) & morton_mask_3_0_v; - return encoded; - } -}; - -template -struct MortonEncoder<4, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) - { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); - NBL_IF_CONSTEXPR(Bits > 8) - { - encoded = (encoded | leftShift(encoded, 24)) & morton_mask_4_3_v; - } - NBL_IF_CONSTEXPR(Bits > 4) - { - encoded = (encoded | leftShift(encoded, 12)) & morton_mask_4_2_v; - } - NBL_IF_CONSTEXPR(Bits > 2) + NBL_IF_CONSTEXPR(Bits > 1) { - encoded = (encoded | leftShift(encoded, 6)) & morton_mask_4_1_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); } - encoded = (encoded | leftShift(encoded, 3)) & morton_mask_4_0_v; - return encoded; + return encoded & _static_cast(coding_mask_v); } }; -// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- +// ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template -struct MortonDecoder; - -template -struct MortonDecoder<2, Bits, encode_t> +template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +struct MortonDecoder { - template + template 16), uint32_t, uint16_t> + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_2_0_v; + arithmetic_right_shift_operator > rightShift; + portable_vector_t decoded; NBL_IF_CONSTEXPR(Bits > 1) { - decoded = (decoded | rightShift(decoded, 1)) & morton_mask_2_1_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 1 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 2) { - decoded = (decoded | rightShift(decoded, 2)) & morton_mask_2_2_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 2 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 4) { - decoded = (decoded | rightShift(decoded, 4)) & morton_mask_2_3_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 4 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 8) { - decoded = (decoded | rightShift(decoded, 8)) & morton_mask_2_4_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 8 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 16) { - decoded = (decoded | rightShift(decoded, 16)) & morton_mask_2_5_v; - } - - return _static_cast(decoded); - } -}; - -template -struct MortonDecoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) - { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_3_0_v; - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & morton_mask_3_1_v; - } - NBL_IF_CONSTEXPR(Bits > 3) - { - decoded = (decoded | rightShift(decoded, 6)) & morton_mask_3_2_v; - } - NBL_IF_CONSTEXPR(Bits > 6) - { - decoded = (decoded | rightShift(decoded, 12)) & morton_mask_3_3_v; - } - NBL_IF_CONSTEXPR(Bits > 12) - { - decoded = (decoded | rightShift(decoded, 24)) & morton_mask_3_4_v; - } - - return _static_cast(decoded); - } -}; - -template -struct MortonDecoder<4, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) - { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_4_0_v; - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = (decoded | rightShift(decoded, 3)) & morton_mask_4_1_v; - } - NBL_IF_CONSTEXPR(Bits > 2) - { - decoded = (decoded | rightShift(decoded, 6)) & morton_mask_4_2_v; - } - NBL_IF_CONSTEXPR(Bits > 4) - { - decoded = (decoded | rightShift(decoded, 12)) & morton_mask_4_3_v; - } - NBL_IF_CONSTEXPR(Bits > 8) - { - decoded = (decoded | rightShift(decoded, 24)) & morton_mask_4_4_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 16 * (Dim - 1)); } - return _static_cast(decoded); + return _static_cast(decoded & _static_cast(coding_mask_v)); } }; @@ -290,7 +170,7 @@ struct Equals { NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; vector retVal; [[unroll]] @@ -342,7 +222,7 @@ struct BaseComparison { NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; vector retVal; ComparisonOp comparison; @@ -392,7 +272,7 @@ struct LessEquals : BaseComparison && D * Bits <= 64) +template && D * Bits <= 64) struct code { using this_t = code; @@ -515,7 +395,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; this_t retVal; retVal.value = _static_cast(uint64_t(0)); @@ -536,7 +416,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; this_t retVal; retVal.value = _static_cast(uint64_t(0)); @@ -653,14 +533,14 @@ struct arithmetic_right_shift_operator > #ifndef __HLSL_VERSION -template&& D* Bits <= 64) +template&& D* Bits <= 64) constexpr inline morton::code morton::code::operator<<(uint16_t bits) const { left_shift_operator> leftShift; return leftShift(*this, bits); } -template&& D* Bits <= 64) +template&& D* Bits <= 64) constexpr inline morton::code morton::code::operator>>(uint16_t bits) const { arithmetic_right_shift_operator> rightShift; diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 5bfc7ca89b..bc160de788 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -664,6 +664,8 @@ using conditional_t = typename conditional::type; // Template Variables +template +NBL_CONSTEXPR T integral_constant_v = integral_constant::value; template NBL_CONSTEXPR bool is_same_v = is_same::value; template From 53a5f6a8cd4c19718694ff701c3723bbfffcf0f5 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 11 Apr 2025 17:04:15 -0300 Subject: [PATCH 24/28] Vectorized encode/decode for better pipelining --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 152 ++++++++++-------- include/nbl/builtin/hlsl/morton.hlsl | 29 +++- 2 files changed, 106 insertions(+), 75 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 4d7c3839d9..c4938fc9c2 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -147,93 +147,107 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(component_t val) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + val); - - return output; + #define NBL_EMULATED_VECTOR_DEFINE_OPERATOR(OP)\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (this_t other)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (vector other)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other.getComponent(i)); - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector other) - { - this_t output; + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(&) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(|) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(^) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(+) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(-) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(*) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(/) - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other[i]); + #undef NBL_EMULATED_VECTOR_DEFINE_OPERATOR - return output; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(component_t val) + NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { - this_t output; - + component_t sum = 0; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - val); + sum = sum + CRTP::getComponent(i); - return output; + return sum; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) - { - this_t output; +}; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); +template +struct emulated_vector : CRTP +{ + using component_t = ComponentType; + using this_t = emulated_vector; - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector other) + NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) { this_t output; - + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other[i]); + output.setComponent(i, other.getComponent(i)); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(component_t val) + template + NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) { this_t output; - + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * val); + output.setComponent(i, ComponentType::create(other[i])); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); - return output; + #define NBL_EMULATED_VECTOR_OPERATOR(OP, ENABLE_CONDITION) NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (component_t val)\ + {\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + val);\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ + {\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i));\ + return output;\ } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other[i]); - return output; - } + NBL_EMULATED_VECTOR_OPERATOR(&, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(|, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(^, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(+, true) + NBL_EMULATED_VECTOR_OPERATOR(-, true) + NBL_EMULATED_VECTOR_OPERATOR(*, true) + NBL_EMULATED_VECTOR_OPERATOR(/, true) + + #undef NBL_EMULATED_VECTOR_OPERATOR - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() + NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() { - component_t sum = 0; + ComponentType sum = ComponentType::create(0); + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); @@ -241,6 +255,7 @@ struct emulated_vector : CRTP } }; + #define DEFINE_OPERATORS_FOR_TYPE(...)\ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ {\ @@ -270,12 +285,13 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// TODO: some of code duplication could be avoided -template -struct emulated_vector : CRTP +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- + +template +struct emulated_vector, CRTP, false> : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; + using component_t = emulated_float64_t; + using this_t = emulated_vector; NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) { @@ -293,7 +309,7 @@ struct emulated_vector : CRTP this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); + output.setComponent(i, component_t::create(other[i])); return output; } @@ -330,8 +346,6 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) - //DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) - //DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) @@ -341,9 +355,9 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() + NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { - ComponentType sum = ComponentType::create(0); + component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9e62e40c2a..e8cb2b73bf 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -85,11 +85,12 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) struct MortonEncoder { - template + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); + left_shift_operator > leftShift; + portable_vector_t encoded = _static_cast >(decodedValue); NBL_IF_CONSTEXPR(Bits > 16) { encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); @@ -114,7 +115,16 @@ struct MortonEncoder encoded = encoded & _static_cast(coding_mask_v); encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); } - return encoded & _static_cast(coding_mask_v); + encoded = encoded & _static_cast(coding_mask_v); + encoded = leftShift(encoded, _static_cast >(vector(0, 1, 2, 3))); + // The `encoded` above is vectorized for each coord, here we collapse all coords into a single element + encode_t actualEncoded = _static_cast(uint64_t(0)); + array_get, encode_t> getter; + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + actualEncoded = actualEncoded | getter(encoded, i); + + return actualEncoded; } }; @@ -123,12 +133,19 @@ struct MortonEncoder template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) struct MortonDecoder { - template 16), uint32_t, uint16_t> - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(vector_traits::scalar_type) * 8 >= Bits) + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { arithmetic_right_shift_operator > rightShift; portable_vector_t decoded; + array_set, encode_t> setter; + // Write initial values into decoded + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + setter(decoded, i, encodedValue); + decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); + NBL_IF_CONSTEXPR(Bits > 1) { decoded = decoded & _static_cast(coding_mask_v); From cf52d9cbf2d99e3ceb16495ef9049511cbde2096 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 14 Apr 2025 16:02:17 -0300 Subject: [PATCH 25/28] Adress the last of PR review changes: vectorize more operators, add a bunch of operators and functional structs for vectorial types --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 20 +- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 8 + .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 113 ++++++- include/nbl/builtin/hlsl/functional.hlsl | 28 +- include/nbl/builtin/hlsl/morton.hlsl | 319 ++++++++---------- include/nbl/builtin/hlsl/mpl.hlsl | 28 +- 6 files changed, 310 insertions(+), 206 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 81bdf32c19..f01d2d78ec 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -90,7 +90,7 @@ namespace impl template struct static_cast_helper { - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(NBL_CONST_REF_ARG(From) u) { #ifndef __HLSL_VERSION return static_cast(u); @@ -99,10 +99,26 @@ struct static_cast_helper #endif } }; + +// CPP-side, this can invoke the copy constructor if the copy is non-trivial in generic code +// HLSL-side, this enables generic conversion code between types, contemplating the case where no conversion is needed +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC Same cast(NBL_CONST_REF_ARG(Same) s) + { +#ifndef __HLSL_VERSION + return static_cast(s); +#else + return s; +#endif + } +}; + } template -NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) { return impl::static_cast_helper::cast(v); } diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index ca51b0060a..4f354c900e 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -431,6 +431,14 @@ NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity template<> NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; +// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- + +template<> +struct is_signed : bool_constant {}; + +template<> +struct is_unsigned : bool_constant {}; + // --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- namespace concepts { diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index c4938fc9c2..fd5f5e3c34 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -147,7 +147,7 @@ struct emulated_vector : CRTP return output; } - #define NBL_EMULATED_VECTOR_DEFINE_OPERATOR(OP)\ + #define NBL_EMULATED_VECTOR_OPERATOR(OP)\ NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ {\ this_t output;\ @@ -170,15 +170,33 @@ struct emulated_vector : CRTP return output;\ } - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(&) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(|) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(^) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(+) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(-) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(*) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(/) + NBL_EMULATED_VECTOR_OPERATOR(&) + NBL_EMULATED_VECTOR_OPERATOR(|) + NBL_EMULATED_VECTOR_OPERATOR(^) + NBL_EMULATED_VECTOR_OPERATOR(+) + NBL_EMULATED_VECTOR_OPERATOR(-) + NBL_EMULATED_VECTOR_OPERATOR(*) + NBL_EMULATED_VECTOR_OPERATOR(/) - #undef NBL_EMULATED_VECTOR_DEFINE_OPERATOR + #undef NBL_EMULATED_VECTOR_OPERATOR + + #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ + {\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ + } + + NBL_EMULATED_VECTOR_COMPARISON(==) + NBL_EMULATED_VECTOR_COMPARISON(!=) + NBL_EMULATED_VECTOR_COMPARISON(<) + NBL_EMULATED_VECTOR_COMPARISON(<=) + NBL_EMULATED_VECTOR_COMPARISON(>) + NBL_EMULATED_VECTOR_COMPARISON(>=) + + #undef NBL_EMULATED_VECTOR_COMPARISON NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { @@ -222,7 +240,7 @@ struct emulated_vector : CRTP this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + val);\ + output.setComponent(i, CRTP::getComponent(i) OP val);\ return output;\ }\ NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ @@ -230,7 +248,7 @@ struct emulated_vector : CRTP this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i));\ + output.setComponent(i, CRTP::getComponent(i) OP other.getComponent(i));\ return output;\ } @@ -244,6 +262,24 @@ struct emulated_vector : CRTP #undef NBL_EMULATED_VECTOR_OPERATOR + #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ + {\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ + } + + NBL_EMULATED_VECTOR_COMPARISON(==) + NBL_EMULATED_VECTOR_COMPARISON(!=) + NBL_EMULATED_VECTOR_COMPARISON(<) + NBL_EMULATED_VECTOR_COMPARISON(<=) + NBL_EMULATED_VECTOR_COMPARISON(>) + NBL_EMULATED_VECTOR_COMPARISON(>=) + + #undef NBL_EMULATED_VECTOR_COMPARISON + NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() { ComponentType sum = ComponentType::create(0); @@ -442,7 +478,7 @@ namespace impl template struct static_cast_helper, vector, void> { - static inline emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t2 cast(vector vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -455,7 +491,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t3 cast(vector vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -469,7 +505,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t4 cast(vector vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -487,7 +523,7 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - static inline OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec) { array_get getter; array_set setter; @@ -500,6 +536,53 @@ struct static_cast_helper, emulated_vector_t\ +struct static_cast_helper, emulated_vector_t##N , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + for (int i = 0; i < N; ++i)\ + setter(output, i, _static_cast(getter(vec, i)));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(2) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(3) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) + +#undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST + +#define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ +struct static_cast_helper, emulated_vector_t##M , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##M ;\ + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + for (int i = 0; i < N; ++i)\ + setter(output, i, getter(vec, i));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TRUNCATION(2, 2) +NBL_EMULATED_VEC_TRUNCATION(2, 3) +NBL_EMULATED_VEC_TRUNCATION(2, 4) +NBL_EMULATED_VEC_TRUNCATION(3, 3) +NBL_EMULATED_VEC_TRUNCATION(3, 4) +NBL_EMULATED_VEC_TRUNCATION(4, 4) + +#undef NBL_EMULATED_VEC_TRUNCATION + } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 93687bdb6a..45198cbe7a 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -80,7 +80,7 @@ struct reference_wrapper : enable_if_t< // TODO: partial specializations for T being a special SPIR-V type for image ops, etc. -#define ALIAS_STD(NAME,OP) template struct NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME { \ using type_t = T; \ \ T operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \ @@ -92,7 +92,7 @@ struct reference_wrapper : enable_if_t< #else // CPP -#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; #endif @@ -136,13 +136,35 @@ ALIAS_STD(divides,/) }; +ALIAS_STD(equal_to,==) }; +ALIAS_STD(not_equal_to,!=) }; ALIAS_STD(greater,>) }; ALIAS_STD(less,<) }; ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal,<=) }; +ALIAS_STD(less_equal, <= ) }; #undef ALIAS_STD +// The above comparison operators return bool on STD. Here's a specialization so that they return `vector` for vectorial types +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return lhs OP rhs;\ + }\ +}; + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=) + +#undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION + // ------------------------ Compound assignment operators ---------------------- #define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e8cb2b73bf..d2fca1165f 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,6 +8,7 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" #include "nbl/builtin/hlsl/portable/vector_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -82,61 +83,65 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS // ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- -template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) struct MortonEncoder { template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index + * + * @param [in] decodedValue Cartesian coordinates to interleave and shift + */ + NBL_CONSTEXPR_STATIC_INLINE_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { + NBL_CONSTEXPR_STATIC encode_t EncodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; left_shift_operator > leftShift; - portable_vector_t encoded = _static_cast >(decodedValue); - NBL_IF_CONSTEXPR(Bits > 16) - { - encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 8) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 8 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 4) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 4 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 2) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 2 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 1) + portable_vector_t interleaved = _static_cast >(decodedValue)& EncodeMasks[CodingStages]; + + NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; + [[unroll]] + for (uint16_t i = Stages; i > 0; i--) { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); + interleaved = interleaved | leftShift(interleaved, (uint32_t(1) << (i - 1)) * (Dim - 1)); + interleaved = interleaved & EncodeMasks[i - 1]; } - encoded = encoded & _static_cast(coding_mask_v); - encoded = leftShift(encoded, _static_cast >(vector(0, 1, 2, 3))); - // The `encoded` above is vectorized for each coord, here we collapse all coords into a single element - encode_t actualEncoded = _static_cast(uint64_t(0)); + + // After interleaving, shift each coordinate left by their index + return leftShift(interleaved, _static_cast >(vector(0, 1, 2, 3))); + } + + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Encodes a vector of cartesian coordinates as a Morton code + * + * @param [in] decodedValue Cartesian coordinates to encode + */ + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + portable_vector_t interleaveShifted = interleaveShift(decodedValue); + + encode_t encoded = _static_cast(uint64_t(0)); array_get, encode_t> getter; [[unroll]] for (uint16_t i = 0; i < Dim; i++) - actualEncoded = actualEncoded | getter(encoded, i); - - return actualEncoded; + encoded = encoded | getter(interleaveShifted, i); + + return encoded; } }; // ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) struct MortonDecoder { template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; arithmetic_right_shift_operator > rightShift; portable_vector_t decoded; array_set, encode_t> setter; @@ -146,38 +151,28 @@ struct MortonDecoder setter(decoded, i, encodedValue); decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 1 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 2) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 2 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 4) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 4 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 8) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 8 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 16) + NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; + [[unroll]] + for (uint16_t i = 0; i < Stages; i++) { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 16 * (Dim - 1)); + decoded = decoded & DecodeMasks[i]; + decoded = decoded | rightShift(decoded, (uint32_t(1) << i) * (Dim - 1)); } - return _static_cast(decoded & _static_cast(coding_mask_v)); + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated + NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) + return _static_cast(decoded); + else + return _static_cast(decoded & DecodeMasks[CodingStages]); } }; // ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- // Here because no partial specialization of methods +// `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted + +template +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::round_up_to_pot_v)); template struct Equals; @@ -185,105 +180,76 @@ struct Equals; template struct Equals { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - retVal[i] = (value & leftShift(Mask, i)) == leftShift(rhs[i], i); - } - return retVal; + NBL_CONSTEXPR portable_vector_t zeros = _static_cast >(_static_cast >(vector(0,0,0,0))); + + portable_vector_t rhsCasted = _static_cast >(rhs); + portable_vector_t xored = rhsCasted ^ value; + return xored == zeros; } }; template struct Equals { - template - NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - using U = make_unsigned_t; - vector interleaved; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); - } - Equals equals; - return equals(value, interleaved); + const portable_vector_t interleaved = MortonEncoder::interleaveShift(rhs); + return Equals::__call(value, interleaved); } }; template struct BaseComparison; -// Aux method for extracting highest bit, used by the comparison below -template -NBL_CONSTEXPR_INLINE_FUNC storage_t extractHighestBit(storage_t value, uint16_t coord) -{ - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const uint16_t coordHighestBitIdx = Bits / D - ((coord < Bits % D) ? uint16_t(0) : uint16_t(1)); - // This is the index of that bit as an index in the encoded value - const uint16_t shift = coordHighestBitIdx * D + coord; - left_shift_operator leftShift; - return value & leftShift(_static_cast(uint16_t(1)), shift); -} +// Aux variable that has only the sign bit for the first of D dimensions +template +NBL_CONSTEXPR uint64_t SignMask = uint64_t(1) << (D * (Bits - 1)); template struct BaseComparison { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; - vector retVal; + NBL_CONSTEXPR_STATIC portable_vector_t InterleaveMasks = _static_cast >(_static_cast >(vector(coding_mask_v, coding_mask_v << 1, coding_mask_v << 2, coding_mask_v << 3))); + NBL_CONSTEXPR_STATIC portable_vector_t SignMasks = _static_cast >(_static_cast >(vector(SignMask, SignMask << 1, SignMask << 2, SignMask << 3))); ComparisonOp comparison; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - storage_t thisCoord = value & leftShift(Mask, i); - storage_t rhsCoord = leftShift(rhs[i], i); - // If coordinate is negative, we add 1s in every bit not corresponding to coord - if (extractHighestBit(thisCoord) != _static_cast(uint64_t(0))) - thisCoord = thisCoord | ~leftShift(Mask, i); - if (extractHighestBit(rhsCoord) != _static_cast(uint64_t(0))) - rhsCoord = rhsCoord | ~leftShift(Mask, i); - retVal[i] = comparison(thisCoord, rhsCoord); - } - return retVal; + // Obtain a vector of deinterleaved coordinates and flip their sign bits + const portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); } }; template struct BaseComparison { - template - NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - using U = make_unsigned_t; - vector interleaved; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); - } + const vector interleaved = MortonEncoder::interleaveShift(rhs); BaseComparison baseComparison; return baseComparison(value, interleaved); } }; template -struct LessThan : BaseComparison > {}; +struct LessThan : BaseComparison > > {}; template -struct LessEquals : BaseComparison > {}; +struct LessEquals : BaseComparison > > {}; +template +struct GreaterThan : BaseComparison > > {}; + +template +struct GreaterEquals : BaseComparison > > {}; } //namespace impl @@ -313,19 +279,11 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { - using U = make_unsigned_t; - left_shift_operator leftShift; - storage_t encodedCartesian = _static_cast(uint64_t(0)); - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - encodedCartesian = encodedCartesian | leftShift(impl::MortonEncoder::encode(_static_cast(cartesian[i])), i); - } this_t retVal; - retVal.value = encodedCartesian; + retVal.value = impl::MortonEncoder::encode(cartesian); return retVal; } @@ -337,8 +295,7 @@ struct code * * @param [in] cartesian Coordinates to encode */ - - template + template= Bits) explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); @@ -347,11 +304,8 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template - constexpr inline explicit operator vector() const noexcept - { - return _static_cast, morton::code, Bits, D>>(*this); - } + template= Bits) + constexpr inline explicit operator vector() const noexcept; #endif @@ -398,14 +352,13 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC { - left_shift_operator leftShift; - // allOnes encodes a cartesian coordinate with all values set to 1 - this_t allOnes; - allOnes.value = leftShift(_static_cast(1), D) - _static_cast(1); - // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 - this_signed_t retVal; - retVal.value = (operator~() + allOnes).value; - return retVal; + this_t zero; + zero.value = _static_cast(0); + #ifndef __HLSL_VERSION + return zero - *this; + #else + return zero - this; + #endif } // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- @@ -453,48 +406,51 @@ struct code return value == rhs.value; } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator==(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector equals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::Equals equals; - return equals(value, rhs); - } + return impl::Equals::__call(value, rhs); + } NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value != rhs.value; } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector notEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator== (rhs); + return !equals(rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector less(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::LessThan lessThan; - return lessThan(value, rhs); + return impl::LessThan::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector lessEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::LessEquals lessEquals; - return lessEquals(value, rhs); + return impl::LessEquals::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector greater(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator<= (rhs); + return impl::GreaterThan::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector greaterEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator< (rhs); + return impl::GreaterEquals::__call(value, rhs); } }; @@ -571,26 +527,29 @@ namespace impl { // I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > { NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { - using U = make_unsigned_t; using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; - arithmetic_right_shift_operator rightShift; - vector cartesian; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); - } - return cartesian; + return morton::impl::MortonDecoder::decode(val.value); } }; } // namespace impl +#ifndef __HLSL_VERSION + +template && D* Bits <= 64) +template = Bits) +constexpr inline morton::code::operator vector() const noexcept +{ + return _static_cast, morton::code, Bits, D>>(*this); +} + +#endif + } //namespace hlsl } //namespace nbl diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 2015b05b3d..67f6445324 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -43,13 +43,23 @@ struct countl_zero : impl::countl_zero template NBL_CONSTEXPR T countl_zero_v = countl_zero::value; +template +struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; +template +NBL_CONSTEXPR bool is_pot_v = is_pot::value; + template struct log2 { NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero::value-1 : -1ull; }; template -NBL_CONSTEXPR uint64_t log2_v = log2::value; +NBL_CONSTEXPR uint16_t log2_v = log2::value; + +template +struct log2_ceil : integral_constant + uint16_t(!is_pot_v)> {}; +template +NBL_CONSTEXPR uint16_t log2_ceil_v = log2_ceil::value; template struct rotl @@ -79,11 +89,6 @@ struct align_up template NBL_CONSTEXPR uint64_t align_up_v = align_up::value; -template -struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; -template -NBL_CONSTEXPR bool is_pot_v = is_pot::value; - template struct max { @@ -99,6 +104,17 @@ struct min }; template NBL_CONSTEXPR T min_v = min::value; + +template +struct round_up_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; + +template +struct round_down_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR uint64_t round_down_to_pot_v = round_down_to_pot::value; + } } } From f954522001947a4f7f4c74696b71571924a5c590 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 24 Apr 2025 15:57:18 -0300 Subject: [PATCH 26/28] Removed `NBL_CONSTEXPR_INLINE_FUNC` macro, replaced all usages with `NBL_CONSTEXPR_FUNC` Adds `OpUndef` to spirv `intrinsics.hlsl` and `cpp_compat.hlsl` Adds an explicit `truncate` function for vectors and emulated vectors Adds a bunch of specializations for vectorial types in `functional.hlsl` Bugfixes and changes to Morton codes, very close to them working properly with emulated ints --- include/nbl/builtin/hlsl/algorithm.hlsl | 18 +- include/nbl/builtin/hlsl/cpp_compat.hlsl | 3 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 27 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 17 +- .../hlsl/cpp_compat/impl/vector_impl.hlsl | 35 -- .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 12 +- .../nbl/builtin/hlsl/cpp_compat/promote.hlsl | 12 +- .../nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 76 ++++ .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 16 +- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 16 +- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 160 ++++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 423 ++++++++++-------- include/nbl/builtin/hlsl/functional.hlsl | 144 ++++-- include/nbl/builtin/hlsl/ieee754.hlsl | 16 +- include/nbl/builtin/hlsl/ieee754/impl.hlsl | 16 +- include/nbl/builtin/hlsl/morton.hlsl | 358 ++++++++------- .../builtin/hlsl/spirv_intrinsics/core.hlsl | 7 +- include/nbl/builtin/hlsl/type_traits.hlsl | 2 + src/nbl/builtin/CMakeLists.txt | 2 +- 19 files changed, 798 insertions(+), 562 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl create mode 100644 include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl index 3a7c4963c2..0178673f4e 100644 --- a/include/nbl/builtin/hlsl/algorithm.hlsl +++ b/include/nbl/builtin/hlsl/algorithm.hlsl @@ -18,7 +18,7 @@ namespace impl // TODO: use structs template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { T tmp = lhs; lhs = rhs; @@ -26,7 +26,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -34,7 +34,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -42,7 +42,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -50,7 +50,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -58,7 +58,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -66,7 +66,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -74,7 +74,7 @@ namespace impl } #else template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { std::swap(lhs, rhs); } @@ -82,7 +82,7 @@ namespace impl } template -NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) +NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { impl::swap(lhs, rhs); } diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index cb06447aa1..03d47864fb 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -5,8 +5,9 @@ // it includes vector and matrix #include #include +#include // Had to push some stuff here to avoid circular dependencies -#include +#include #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index f01d2d78ec..0985af6eb3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -14,8 +14,6 @@ #define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_STATIC_FUNC constexpr static -#define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) @@ -44,13 +42,11 @@ namespace nbl::hlsl #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_FUNC +#define NBL_CONSTEXPR_FUNC inline #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_STATIC_INLINE const static -#define NBL_CONSTEXPR_STATIC_FUNC static -#define NBL_CONSTEXPR_INLINE_FUNC inline -#define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline +#define NBL_CONSTEXPR_STATIC_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) @@ -90,7 +86,7 @@ namespace impl template struct static_cast_helper { - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(NBL_CONST_REF_ARG(From) u) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) u) { #ifndef __HLSL_VERSION return static_cast(u); @@ -100,25 +96,10 @@ struct static_cast_helper } }; -// CPP-side, this can invoke the copy constructor if the copy is non-trivial in generic code -// HLSL-side, this enables generic conversion code between types, contemplating the case where no conversion is needed -template -struct static_cast_helper -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC Same cast(NBL_CONST_REF_ARG(Same) s) - { -#ifndef __HLSL_VERSION - return static_cast(s); -#else - return s; -#endif - } -}; - } template -NBL_CONSTEXPR_INLINE_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) +NBL_CONSTEXPR_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) { return impl::static_cast_helper::cast(v); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index e1ba823b9b..4f7c7370bc 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -109,6 +109,8 @@ template struct addCarry_helper; template struct subBorrow_helper; +template +struct undef_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -172,6 +174,7 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, // Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(undef_helper, undef, (T), , T) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitCount_helper, bitCount, (T), (T), BITCOUNT_HELPER_RETRUN_TYPE) @@ -640,7 +643,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) struct select_helper) > { - NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { return condition ? object1 : object2; } @@ -650,7 +653,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > { - NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { using traits = hlsl::vector_traits; array_get conditionGetter; @@ -665,6 +668,16 @@ struct select_helper&& concepts::V } }; +template +struct undef_helper +{ + NBL_CONSTEXPR_STATIC_FUNC T __call() + { + T t; + return t; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl deleted file mode 100644 index 524d1fa45e..0000000000 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ - -#include -#include -#include - -// To prevent implicit truncation warnings -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ - -template NBL_PARTIAL_REQ_TOP(N <= M) -struct static_cast_helper, vector NBL_PARTIAL_REQ_BOT(N <= M) > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) - { - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < N; i++) - { - retVal[i] = val[i]; - } - return retVal; - } -}; - -} -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 284ba564d7..c511042c27 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -23,6 +23,12 @@ namespace nbl namespace hlsl { +template +NBL_CONSTEXPR_FUNC T undef() +{ + return cpp_compat_intrinsics_impl::undef_helper::__call(); +} + template inline typename cpp_compat_intrinsics_impl::bitCount_helper::return_t bitCount(NBL_CONST_REF_ARG(T) val) { @@ -151,7 +157,7 @@ inline bool any(Vector vec) } template -NBL_CONSTEXPR_INLINE_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) { return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); } @@ -224,13 +230,13 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A } template -NBL_CONSTEXPR_INLINE_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +NBL_CONSTEXPR_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) { return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); } template -NBL_CONSTEXPR_INLINE_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +NBL_CONSTEXPR_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) { return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 51ca73f6d3..0afe214de7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - T operator()(U v) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) { return T(v); } @@ -26,7 +26,7 @@ struct Promote template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v)}; return promoted; @@ -36,7 +36,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v)}; return promoted; @@ -46,7 +46,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -56,7 +56,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -68,7 +68,7 @@ struct Promote, U> } template -T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) +NBL_CONSTEXPR_FUNC T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) { impl::Promote _promote; return _promote(v); diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl new file mode 100644 index 0000000000..a95df183be --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -0,0 +1,76 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ + +#include "nbl/builtin/hlsl/type_traits.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +namespace impl +{ + +template +struct Truncate +{ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) + { + return T(v); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0] }; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1]}; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1], v[2] }; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1], v[2], v[3] }; + return truncated; + } +}; + +} //namespace impl + +template +NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) +{ + impl::Truncate _truncate; + return _truncate(v); +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index a0cde90df9..2dfc52c957 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -412,25 +412,25 @@ inline int extractExponent(__VA_ARGS__ x)\ }\ \ template<>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ {\ return extractMantissa(x.data);\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ {\ return extractNormalizeMantissa(x.data);\ }\ @@ -577,10 +577,10 @@ namespace ieee754 { namespace impl { -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } } IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 44b881345d..df785e3e8f 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -41,7 +41,7 @@ namespace hlsl { namespace emulated_float64_t_impl { -NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) +NBL_CONSTEXPR_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) { uint64_t2 output; output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); @@ -74,7 +74,7 @@ inline uint64_t castFloat32ToStorageType(float32_t val) } }; -NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) +NBL_CONSTEXPR_FUNC bool isZero(uint64_t val) { return (val << 1) == 0ull; } @@ -137,18 +137,18 @@ inline uint64_t reinterpretAsFloat64BitPattern(int64_t val) return sign | reinterpretAsFloat64BitPattern(absVal); }; -NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t value) +NBL_CONSTEXPR_FUNC uint64_t flushDenormToZero(uint64_t value) { const uint64_t biasBits = value & ieee754::traits::exponentMask; return biasBits ? value : (value & ieee754::traits::signMask); } -NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) +NBL_CONSTEXPR_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) { return signShifted | expShifted | mantissa; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) { lhs &= ~ieee754::traits::signMask; rhs &= ~ieee754::traits::signMask; @@ -156,18 +156,18 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) return lhs == rhs && lhs == ieee754::traits::inf; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs | rhs) << 1); } -NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs) << 1) && (lhs == rhs); } template -NBL_CONSTEXPR_INLINE_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) { if (!FastMath) { diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 4f354c900e..8a3fd42faf 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -20,6 +20,7 @@ struct emulated_int64_base { using storage_t = vector; using this_t = emulated_int64_base; + using this_signed_t = emulated_int64_base; storage_t data; @@ -29,6 +30,12 @@ struct emulated_int64_base emulated_int64_base() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` + template + constexpr explicit emulated_int64_base(const I& toEmulate); + + constexpr explicit emulated_int64_base(const emulated_int64_base& other) : data(other.data) {} + #endif /** @@ -36,7 +43,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -46,47 +53,57 @@ struct emulated_int64_base /** * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern * - * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated + * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { return create(storage_t(lo, hi)); } + // ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- + // GLM requires these for vector casts + + #ifndef __HLSL_VERSION + + template + constexpr explicit operator I() const noexcept; + + #endif + // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC { return data.x; } - NBL_CONSTEXPR_INLINE_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC { return data.y; } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data & rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data | rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data ^ rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { this_t retVal = create(~data); return retVal; @@ -101,56 +118,62 @@ struct emulated_int64_base // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + vector negated = -data; + return this_signed_t::create(_static_cast(negated)); + } + + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); - const this_t retVal = create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); - return retVal; + return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); - const this_t retVal = create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); - return retVal; + return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return all(data == rhs.data); + equal_to equals; + return all(equals(data, rhs.data)); } - NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return any(data != rhs.data); + not_equal_to notEquals; + return any(notEquals(data, rhs.data)); } - NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); const bool LSB = __getLSB() < rhs.__getLSB(); return MSBEqual ? LSB : MSB; } - NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Same reasoning as above const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); const bool LSB = __getLSB() > rhs.__getLSB(); return MSBEqual ? LSB : MSB; } - NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return !operator>(rhs); } - NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return !operator<(rhs); } @@ -162,32 +185,16 @@ using emulated_int64_t = emulated_int64_base; namespace impl { -template<> -struct static_cast_helper -{ - using To = emulated_uint64_t; - using From = emulated_int64_t; - - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) - { - To retVal; - retVal.data = i.data; - return retVal; - } -}; - -template<> -struct static_cast_helper +template +struct static_cast_helper, emulated_int64_base > { - using To = emulated_int64_t; - using From = emulated_uint64_t; + using To = emulated_int64_base; + using From = emulated_int64_base; - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) other) { To retVal; - retVal.data = u.data; + retVal.data = other.data; return retVal; } }; @@ -199,19 +206,19 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) { return _static_cast(val.data.x); } }; -template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v) -struct static_cast_helper NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > { using To = I; using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) { return bit_cast(val.data); } @@ -224,28 +231,53 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) { - return To::create(uint32_t(0), _static_cast(i)); + return To::create(_static_cast(i), uint32_t(0)); } }; -template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v ) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > { using To = emulated_int64_base; using From = I; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) { + // `bit_cast` blocked by GLM vectors using a union + #ifndef __HLSL_VERSION + return To::create(_static_cast(i), _static_cast(i >> 32)); + #else To retVal; - retVal.data = bit_cast(i); + retVal.data = bit_cast >(i); return retVal; + #endif } }; } //namespace impl +// Define constructor and conversion operators + +#ifndef __HLSL_VERSION + +template +template +constexpr emulated_int64_base::emulated_int64_base(const I& toEmulate) +{ + *this = _static_cast>(toEmulate); +} + +template +template +constexpr emulated_int64_base::operator I() const noexcept +{ + return _static_cast(*this); +} + +#endif + // ---------------------- Functional operators ------------------------ template @@ -258,7 +290,7 @@ struct left_shift_operator > //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; @@ -269,7 +301,7 @@ struct left_shift_operator > } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -285,7 +317,7 @@ struct arithmetic_right_shift_operator //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; @@ -296,7 +328,7 @@ struct arithmetic_right_shift_operator } // If `_bits > 63` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -312,18 +344,18 @@ struct arithmetic_right_shift_operator //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); ternary_operator ternary; return ternary(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index fd5f5e3c34..3780ce001b 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -24,7 +24,7 @@ struct _2_component_vec static_assert(sizeof(T) <= 8); - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -32,7 +32,7 @@ struct _2_component_vec y = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -40,9 +40,10 @@ struct _2_component_vec return y; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 2; @@ -56,7 +57,7 @@ struct _3_component_vec T z; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -66,7 +67,7 @@ struct _3_component_vec z = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -76,9 +77,10 @@ struct _3_component_vec return z; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 3; @@ -92,7 +94,7 @@ struct _4_component_vec T z; T w; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -104,7 +106,7 @@ struct _4_component_vec w = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -116,184 +118,210 @@ struct _4_component_vec return w; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 4; }; -template ::value> -struct emulated_vector : CRTP -{ - using this_t = emulated_vector; - using component_t = ComponentType; +template +struct emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - CRTP output; +// Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - } - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other[i]); - - return output; - } - - #define NBL_EMULATED_VECTOR_OPERATOR(OP)\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP val);\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (this_t other)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (vector other)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP other[i]);\ - return output;\ - } +#define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP() NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i).operator##OP());\ + return output;\ +} - NBL_EMULATED_VECTOR_OPERATOR(&) - NBL_EMULATED_VECTOR_OPERATOR(|) - NBL_EMULATED_VECTOR_OPERATOR(^) - NBL_EMULATED_VECTOR_OPERATOR(+) - NBL_EMULATED_VECTOR_OPERATOR(-) - NBL_EMULATED_VECTOR_OPERATOR(*) - NBL_EMULATED_VECTOR_OPERATOR(/) +#define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ +}\ +NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ +} - #undef NBL_EMULATED_VECTOR_OPERATOR +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP(vector other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ +} - #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ - {\ - vector output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ - return output;\ - } +#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ +} - NBL_EMULATED_VECTOR_COMPARISON(==) - NBL_EMULATED_VECTOR_COMPARISON(!=) - NBL_EMULATED_VECTOR_COMPARISON(<) - NBL_EMULATED_VECTOR_COMPARISON(<=) - NBL_EMULATED_VECTOR_COMPARISON(>) - NBL_EMULATED_VECTOR_COMPARISON(>=) +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC vector operator##OP (vector other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other[i];\ + return output;\ +} - #undef NBL_EMULATED_VECTOR_COMPARISON +#define NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +using this_t = emulated_vector;\ +using component_t = ComponentType;\ +NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other)\ +{\ + CRTP output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other.getComponent(i));\ +}\ +NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \ +{\ + component_t sum = CRTP::getComponent(0);\ + [[unroll]]\ + for (uint32_t i = 1u; i < CRTP::Dimension; ++i)\ + sum = sum + CRTP::getComponent(i);\ + return sum;\ +} - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() - { - component_t sum = 0; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other)\ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other[i]);\ + return output;\ +} - return sum; - } +// Fundamental, integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector&& concepts::IntegralLikeScalar) > : CRTP +{ + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) }; -template -struct emulated_vector : CRTP +// Fundamental, not integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; - - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - this_t output; - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - - return output; - } - - template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); - - return output; - } - - #define NBL_EMULATED_VECTOR_OPERATOR(OP, ENABLE_CONDITION) NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (component_t val)\ - {\ - this_t output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) OP val);\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ - {\ - this_t output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) OP other.getComponent(i));\ - return output;\ - } - - NBL_EMULATED_VECTOR_OPERATOR(&, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(|, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(^, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(+, true) - NBL_EMULATED_VECTOR_OPERATOR(-, true) - NBL_EMULATED_VECTOR_OPERATOR(*, true) - NBL_EMULATED_VECTOR_OPERATOR(/, true) - - #undef NBL_EMULATED_VECTOR_OPERATOR - - #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ - {\ - vector output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ - return output;\ - } - - NBL_EMULATED_VECTOR_COMPARISON(==) - NBL_EMULATED_VECTOR_COMPARISON(!=) - NBL_EMULATED_VECTOR_COMPARISON(<) - NBL_EMULATED_VECTOR_COMPARISON(<=) - NBL_EMULATED_VECTOR_COMPARISON(>) - NBL_EMULATED_VECTOR_COMPARISON(>=) - - #undef NBL_EMULATED_VECTOR_COMPARISON + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) +}; - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() - { - ComponentType sum = ComponentType::create(0); - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +// Not fundamental, integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector && concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) +}; - return sum; - } +// Not fundamental, not integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) }; +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR + +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- #define DEFINE_OPERATORS_FOR_TYPE(...)\ -NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -302,7 +330,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -311,7 +339,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -321,15 +349,14 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- template -struct emulated_vector, CRTP, false> : CRTP +struct emulated_vector, CRTP> : CRTP { using component_t = emulated_float64_t; - using this_t = emulated_vector; + using this_t = emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) + NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other) { this_t output; @@ -340,7 +367,7 @@ struct emulated_vector, CRTP, fa } template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) + NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other) { this_t output; @@ -350,7 +377,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) + NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -359,7 +386,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) + NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -368,7 +395,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) + NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -391,7 +418,7 @@ struct emulated_vector, CRTP, fa DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() + NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC { component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) @@ -478,7 +505,7 @@ namespace impl template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -491,7 +518,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -505,7 +532,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -523,12 +550,13 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) { array_get getter; array_set setter; OutputVecType output; + [[unroll]] for (int i = 0; i < N; ++i) setter(output, i, _static_cast(getter(vec, i))); @@ -541,11 +569,12 @@ struct static_cast_helper, emulated_vecto {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##N ;\ - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ OutputVecType output;\ + [[unroll]]\ for (int i = 0; i < N; ++i)\ setter(output, i, _static_cast(getter(vec, i)));\ return output;\ @@ -558,16 +587,38 @@ NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) #undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST +#define NBL_EMULATED_VEC_PROMOTION(N) template\ +struct Promote, ComponentType>\ +{\ + using VecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\ + {\ + array_set setter;\ + VecType promoted;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(promoted, i, v);\ + return promoted;\ + }\ +}; + +NBL_EMULATED_VEC_PROMOTION(2) +NBL_EMULATED_VEC_PROMOTION(3) +NBL_EMULATED_VEC_PROMOTION(4) + +#undef NBL_EMULATED_VEC_PROMOTION + #define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ -struct static_cast_helper, emulated_vector_t##M , void>\ +struct Truncate, emulated_vector_t##M >\ {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##M ;\ - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ OutputVecType output;\ + [[unroll]]\ for (int i = 0; i < N; ++i)\ setter(output, i, getter(vec, i));\ return output;\ @@ -583,7 +634,7 @@ NBL_EMULATED_VEC_TRUNCATION(4, 4) #undef NBL_EMULATED_VEC_TRUNCATION -} +} //namespace impl } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 45198cbe7a..76b527f6bd 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -91,7 +91,6 @@ struct reference_wrapper : enable_if_t< #else // CPP - #define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; @@ -135,18 +134,69 @@ ALIAS_STD(divides,/) NBL_CONSTEXPR_STATIC_INLINE T identity = T(1); }; +#ifndef __HLSL_VERSION + +template +struct bit_not : std::bit_not +{ + using type_t = T; +}; + +#else + +template +struct bit_not +{ + using type_t = T; + + T operator()(NBL_CONST_REF_ARG(T) operand) + { + return ~operand; + } +}; + +// The default version above only works for fundamental scalars, vectors and matrices. This is because you can't call `~x` unless `x` is one of the former. +// Similarly, calling `x.operator~()` is not valid for the aforementioned, and only for types overriding this operator. So, we need a specialization. +template NBL_PARTIAL_REQ_TOP(!(concepts::Scalar || concepts::Vector || concepts::Matrix)) +struct bit_not || concepts::Vector || concepts::Matrix)) > +{ + using type_t = T; + + T operator()(NBL_CONST_REF_ARG(T) operand) + { + return operand.operator~(); + } +}; + +#endif -ALIAS_STD(equal_to,==) }; -ALIAS_STD(not_equal_to,!=) }; -ALIAS_STD(greater,>) }; -ALIAS_STD(less,<) }; -ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal, <= ) }; +ALIAS_STD(equal_to, ==) }; +ALIAS_STD(not_equal_to, !=) }; +ALIAS_STD(greater, >) }; +ALIAS_STD(less, <) }; +ALIAS_STD(greater_equal, >=) }; +ALIAS_STD(less_equal, <=) }; #undef ALIAS_STD -// The above comparison operators return bool on STD. Here's a specialization so that they return `vector` for vectorial types -#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +// The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector` for vectorial types + +// GLM doesn't have operators on vectors +#ifndef __HLSL_VERSION + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return glm::GLM_OP (lhs, rhs);\ + }\ +}; + +#else + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ struct NAME ) >\ {\ using type_t = T;\ @@ -156,16 +206,18 @@ struct NAME ) >\ }\ }; -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=) +#endif + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==, equal) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=, notEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >, greaterThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <, lessThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=, greaterThanEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual) #undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION -// ------------------------ Compound assignment operators ---------------------- +// ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS -------------------------------------------------------------------- #define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ using type_t = T; \ @@ -186,7 +238,7 @@ COMPOUND_ASSIGN(divides) #undef COMPOUND_ASSIGN -// ----------------- End of compound assignment ops ---------------- +// ---------------------------------------------------------------- MIN, MAX, TERNARY ------------------------------------------------------------------------- // Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs(condition, lhs, rhs); } }; +// ----------------------------------------------------------------- SHIFT OPERATORS -------------------------------------------------------------------- + template struct left_shift_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand << bits; } @@ -246,28 +300,28 @@ struct left_shift_operator) > using type_t = T; using scalar_t = scalar_type_t; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand << bits; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { return operand << bits; } }; -template NBL_PARTIAL_REQ_TOP(!concepts::Vector && concepts::IntegralLikeVectorial) -struct left_shift_operator && concepts::IntegralLikeVectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -278,11 +332,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -293,11 +347,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -308,11 +362,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -329,7 +383,7 @@ struct arithmetic_right_shift_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand >> bits; } @@ -341,28 +395,28 @@ struct arithmetic_right_shift_operator; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand >> bits; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { return operand >> bits; } }; -template NBL_PARTIAL_REQ_TOP(!concepts::Vector&& concepts::IntegralLikeVectorial) -struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -373,11 +427,11 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -388,11 +442,11 @@ struct arithmetic_right_shift_operator::Dimension>) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -403,11 +457,11 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -426,7 +480,7 @@ struct logical_right_shift_operator using type_t = T; using unsigned_type_t = make_unsigned_t; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { arithmetic_right_shift_operator arithmeticRightShift; return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 8d9c78a9f0..e81ff08c7b 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -89,7 +89,7 @@ inline int extractExponent(T x) } template -NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) +NBL_CONSTEXPR_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { using AsFloat = typename float_of_size::type; return impl::castBackToFloatType(glsl::bitfieldInsert(ieee754::impl::bitCastToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); @@ -97,20 +97,20 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer // performs no overflow tests, returns x*exp2(n) template -NBL_CONSTEXPR_INLINE_FUNC T fastMulExp2(T x, int n) +NBL_CONSTEXPR_FUNC T fastMulExp2(T x, int n) { return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::type>::mantissaMask; } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; using AsFloat = typename float_of_size::type; @@ -118,21 +118,21 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type ext } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSign(T x) { using AsFloat = typename float_of_size::type; return (ieee754::impl::bitCastToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { using AsFloat = typename float_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::signMask; } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) +NBL_CONSTEXPR_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) { using AsUint = typename unsigned_integer_of_size::type; @@ -143,7 +143,7 @@ NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true) +NBL_CONSTEXPR_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true) { using AsFloat = typename float_of_size::type; using AsUint = typename unsigned_integer_of_size::type; diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl index ad8a3f9228..69fba9795f 100644 --- a/include/nbl/builtin/hlsl/ieee754/impl.hlsl +++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl @@ -15,25 +15,25 @@ namespace ieee754 namespace impl { template -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) +NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) { using AsUint = unsigned_integer_of_size_t; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } template -NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) +NBL_CONSTEXPR_FUNC T castBackToFloatType(T x) { using AsFloat = typename float_of_size::type; return bit_cast(x); } -template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } } } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d2fca1165f..650d9ce6ba 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,7 +8,6 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" #include "nbl/builtin/hlsl/portable/vector_t.hlsl" -#include "nbl/builtin/hlsl/mpl.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -33,8 +32,30 @@ NBL_CONSTEXPR uint16_t CodingStages = 5; template struct coding_mask; -template -NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; +template +NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); + +template +NBL_CONSTEXPR portable_vector_t InterleaveMasks = _static_cast >( + truncate >( + vector(coding_mask_v, + coding_mask_v << 1, + coding_mask_v << 2, + coding_mask_v << 3))); + +template +struct sign_mask : integral_constant {}; + +template +NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); + +template +NBL_CONSTEXPR portable_vector_t SignMasks = _static_cast >( + truncate >( + vector(sign_mask_v, + sign_mask_v << 1, + sign_mask_v << 2, + sign_mask_v << 3))); // 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage // mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). @@ -57,23 +78,23 @@ NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ }; -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555) // Groups bits by 1 on, 1 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555ull) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, 0x3333333333333333ull) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, 0x0F0F0F0F0F0F0F0Full) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, 0x00FF00FF00FF00FFull) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, 0x0000FFFF0000FFFFull) // Groups bits by 16 on, 16 off -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249) // Groups bits by 1 on, 2 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, uint64_t(0x30C30C30C30C30C3)) // Groups bits by 2 on, 4 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, uint64_t(0xF00F00F00F00F00F)) // Groups bits by 4 on, 8 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, uint64_t(0x00FF0000FF0000FF)) // Groups bits by 8 on, 16 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, uint64_t(0xFFFF00000000FFFF)) // Groups bits by 16 on, 32 off +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249ull) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, 0x30C30C30C30C30C3ull) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, 0xF00F00F00F00F00Full) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, 0x00FF0000FF0000FFull) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, 0xFFFF00000000FFFFull) // Groups bits by 16 on, 32 off -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111) // Groups bits by 1 on, 3 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111ull) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, 0x0303030303030303ull) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, 0x000F000F000F000Full) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, 0x000000FF000000FFull) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, 0x000000000000FFFFull) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS @@ -81,10 +102,9 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS #undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK #undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK -// ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- - -template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) -struct MortonEncoder +// ----------------------------------------------------------------- MORTON TRANSCODER --------------------------------------------------- +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) +struct Transcoder { template 16), vector, vector > NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) @@ -93,22 +113,26 @@ struct MortonEncoder * * @param [in] decodedValue Cartesian coordinates to interleave and shift */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { - NBL_CONSTEXPR_STATIC encode_t EncodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; left_shift_operator > leftShift; - portable_vector_t interleaved = _static_cast >(decodedValue)& EncodeMasks[CodingStages]; + portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; - NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; - [[unroll]] - for (uint16_t i = Stages; i > 0; i--) - { - interleaved = interleaved | leftShift(interleaved, (uint32_t(1) << (i - 1)) * (Dim - 1)); - interleaved = interleaved & EncodeMasks[i - 1]; + #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\ + interleaved = interleaved & coding_mask_v;\ } + ENCODE_LOOP_ITERATION(4) + ENCODE_LOOP_ITERATION(3) + ENCODE_LOOP_ITERATION(2) + ENCODE_LOOP_ITERATION(1) + ENCODE_LOOP_ITERATION(0) + + #undef ENCODE_LOOP_ITERATION // After interleaving, shift each coordinate left by their index - return leftShift(interleaved, _static_cast >(vector(0, 1, 2, 3))); + return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); } template 16), vector, vector > @@ -118,52 +142,58 @@ struct MortonEncoder * * @param [in] decodedValue Cartesian coordinates to encode */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { - portable_vector_t interleaveShifted = interleaveShift(decodedValue); + const portable_vector_t interleaveShifted = interleaveShift(decodedValue); - encode_t encoded = _static_cast(uint64_t(0)); array_get, encode_t> getter; + encode_t encoded = getter(interleaveShifted, 0); + [[unroll]] - for (uint16_t i = 0; i < Dim; i++) + for (uint16_t i = 1; i < Dim; i++) encoded = encoded | getter(interleaveShifted, i); return encoded; } -}; - -// ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) -struct MortonDecoder -{ template 16), vector, vector > NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + /** + * @brief Decodes a Morton code back to a vector of cartesian coordinates + * + * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below) + */ + NBL_CONSTEXPR_STATIC_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; - arithmetic_right_shift_operator > rightShift; + arithmetic_right_shift_operator encodedRightShift; portable_vector_t decoded; array_set, encode_t> setter; // Write initial values into decoded [[unroll]] for (uint16_t i = 0; i < Dim; i++) - setter(decoded, i, encodedValue); - decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); + setter(decoded, i, encodedRightShift(encodedValue, i)); - NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; - [[unroll]] - for (uint16_t i = 0; i < Stages; i++) - { - decoded = decoded & DecodeMasks[i]; - decoded = decoded | rightShift(decoded, (uint32_t(1) << i) * (Dim - 1)); + arithmetic_right_shift_operator > rightShift; + + #define DECODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + decoded = decoded & coding_mask_v;\ + decoded = decoded | rightShift(decoded, (uint16_t(1) << I) * (Dim - 1));\ } + DECODE_LOOP_ITERATION(0) + DECODE_LOOP_ITERATION(1) + DECODE_LOOP_ITERATION(2) + DECODE_LOOP_ITERATION(3) + DECODE_LOOP_ITERATION(4) + + #undef DECODE_LOOP_ITERATION + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) return _static_cast(decoded); else - return _static_cast(decoded & DecodeMasks[CodingStages]); + return _static_cast(decoded & coding_mask_v); } }; @@ -172,7 +202,7 @@ struct MortonDecoder // `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted template -NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::round_up_to_pot_v)); +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v, uint64_t(16)>)); template struct Equals; @@ -181,13 +211,14 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR portable_vector_t zeros = _static_cast >(_static_cast >(vector(0,0,0,0))); + const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); - portable_vector_t rhsCasted = _static_cast >(rhs); - portable_vector_t xored = rhsCasted ^ value; - return xored == zeros; + const portable_vector_t rhsCasted = _static_cast >(rhs); + const portable_vector_t xored = rhsCasted ^ value; + equal_to > equal; + return equal(xored, zeros); } }; @@ -195,10 +226,11 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - const portable_vector_t interleaved = MortonEncoder::interleaveShift(rhs); - return Equals::__call(value, interleaved); + using right_sign_t = conditional_t, make_unsigned_t >; + const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + return Equals::template __call(value, interleaved); } }; @@ -213,17 +245,28 @@ template { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC portable_vector_t InterleaveMasks = _static_cast >(_static_cast >(vector(coding_mask_v, coding_mask_v << 1, coding_mask_v << 2, coding_mask_v << 3))); - NBL_CONSTEXPR_STATIC portable_vector_t SignMasks = _static_cast >(_static_cast >(vector(SignMask, SignMask << 1, SignMask << 2, SignMask << 3))); ComparisonOp comparison; - // Obtain a vector of deinterleaved coordinates and flip their sign bits - const portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; - // rhs already deinterleaved, just have to cast type and flip sign - const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + NBL_IF_CONSTEXPR(Signed) + { + // Obtain a vector of deinterleaved coordinates and flip their sign bits + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); + } + else + { + // Obtain a vector of deinterleaved coordinates + portable_vector_t thisCoord = InterleaveMasks & value; + // rhs already deinterleaved, just have to cast type + const portable_vector_t rhsCoord = _static_cast >(rhs); - return comparison(thisCoord, rhsCoord); + return comparison(thisCoord, rhsCoord); + } + } }; @@ -231,11 +274,11 @@ template { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - const vector interleaved = MortonEncoder::interleaveShift(rhs); - BaseComparison baseComparison; - return baseComparison(value, interleaved); + using right_sign_t = conditional_t, make_unsigned_t >; + const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + return BaseComparison::template __call(value, interleaved); } }; @@ -283,7 +326,7 @@ struct code create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - retVal.value = impl::MortonEncoder::encode(cartesian); + retVal.value = impl::Transcoder::encode(cartesian); return retVal; } @@ -296,7 +339,7 @@ struct code * @param [in] cartesian Coordinates to encode */ template= Bits) - explicit code(NBL_CONST_REF_ARG(vector) cartesian) + inline explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); } @@ -304,35 +347,35 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template= Bits) - constexpr inline explicit operator vector() const noexcept; + template= Bits && is_signed_v == Signed) + constexpr explicit operator vector() const noexcept; #endif // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value & rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value | rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value ^ rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = ~value; @@ -342,15 +385,15 @@ struct code // Only valid in CPP #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint16_t bits) const; + constexpr this_t operator<<(uint16_t bits) const; - constexpr inline this_t operator>>(uint16_t bits) const; + constexpr this_t operator>>(uint16_t bits) const; #endif // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC { this_t zero; zero.value = _static_cast(0); @@ -363,107 +406,135 @@ struct code // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; + bit_not > bitnot; + // For each coordinate, leave its bits intact and turn every other bit ON + const portable_vector_t counterMaskedValue = bitnot(impl::InterleaveMasks) | value; + // For each coordinate in rhs, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & impl::InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; this_t retVal; - retVal.value = _static_cast(uint64_t(0)); + retVal.value = getter(interleaveShiftedResult, 0); [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - // put 1 bits everywhere in the bits the current axis is not using - // then extract just the axis bits for the right hand coordinate - // carry-1 will propagate the bits across the already set bits - // then clear out the bits not belonging to current axis - // Note: Its possible to clear on `this` and fill on `rhs` but that will - // disable optimizations, we expect the compiler to optimize a lot if the - // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` - retVal.value |= ((value | (~leftShift(Mask, i))) + (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); - } + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; + // For each coordinate, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedValue = impl::InterleaveMasks & value; + // Do the same for each coordinate in rhs + const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & impl::InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; this_t retVal; - retVal.value = _static_cast(uint64_t(0)); + retVal.value = getter(interleaveShiftedResult, 0); [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate - retVal.value |= ((value & leftShift(Mask, i)) - (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); - } + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value == rhs.value; } template) - NBL_CONSTEXPR_INLINE_FUNC vector equals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector equal(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::Equals::__call(value, rhs); + return impl::Equals::template __call(value, rhs); } - NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value != rhs.value; } template) - NBL_CONSTEXPR_INLINE_FUNC vector notEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector notEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !equals(rhs); + return !equal(rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector less(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessThan::__call(value, rhs); + return impl::LessThan::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector lessEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessEquals::__call(value, rhs); + return impl::LessEquals::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector greater(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterThan::__call(value, rhs); + return impl::GreaterThan::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector greaterEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterEquals::__call(value, rhs); + return impl::GreaterEquals::template __call(value, rhs); } }; } //namespace morton +// Specialize the `static_cast_helper` +namespace impl +{ + +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > +{ + NBL_CONSTEXPR_STATIC_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) + { + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + return morton::impl::Transcoder::decode(val.value); + } +}; + +} // namespace impl + template struct left_shift_operator > { using type_t = morton::code; using storage_t = typename type_t::storage_t; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { left_shift_operator valueLeftShift; type_t retVal; @@ -479,7 +550,7 @@ struct arithmetic_right_shift_operator > using type_t = morton::code; using storage_t = typename type_t::storage_t; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { arithmetic_right_shift_operator valueArithmeticRightShift; type_t retVal; @@ -496,10 +567,10 @@ struct arithmetic_right_shift_operator > using type_t = morton::code; using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { vector cartesian = _static_cast >(operand); - cartesian >> scalar_t(bits); + cartesian >>= scalar_t(bits); return type_t::create(cartesian); } }; @@ -507,45 +578,24 @@ struct arithmetic_right_shift_operator > #ifndef __HLSL_VERSION template&& D* Bits <= 64) -constexpr inline morton::code morton::code::operator<<(uint16_t bits) const +constexpr morton::code morton::code::operator<<(uint16_t bits) const { left_shift_operator> leftShift; return leftShift(*this, bits); } template&& D* Bits <= 64) -constexpr inline morton::code morton::code::operator>>(uint16_t bits) const +constexpr morton::code morton::code::operator>>(uint16_t bits) const { arithmetic_right_shift_operator> rightShift; return rightShift(*this, bits); } -#endif - -// Specialize the `static_cast_helper` -namespace impl -{ - -// I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) - { - using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; - return morton::impl::MortonDecoder::decode(val.value); - } -}; - -} // namespace impl - -#ifndef __HLSL_VERSION - template && D* Bits <= 64) -template = Bits) -constexpr inline morton::code::operator vector() const noexcept +template = Bits && is_signed_v == Signed) +constexpr morton::code::operator vector() const noexcept { - return _static_cast, morton::code, Bits, D>>(*this); + return _static_cast, morton::code>(*this); } #endif diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index 8add7a9ed3..901a8e419a 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -114,7 +114,12 @@ NBL_CONSTEXPR_STATIC_INLINE bool is_bda_pointer_v = is_bda_pointer::value; //! General Operations - + +//! Miscellaneous Instructions +template +[[vk::ext_instruction(spv::OpUndef)]] +T undef(); + // template [[vk::ext_instruction(spv::OpAccessChain)]] diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index bc160de788..a6b3db6708 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -684,6 +684,8 @@ template NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; template NBL_CONSTEXPR uint64_t extent_v = extent::value; +template +NBL_CONSTEXPR bool is_fundamental_v = is_fundamental::value; // Overlapping definitions diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index f03d8ae22c..c57eec4e61 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -248,9 +248,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/basic.h") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/intrinsics.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/truncate.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/vector_impl.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") From 2d0ffbadf914f84e4f7d5bfc8fec3b860121f655 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Apr 2025 15:16:08 -0300 Subject: [PATCH 27/28] Fix the last of the operators --- include/nbl/builtin/hlsl/morton.hlsl | 70 +++++++++++++++++----------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 650d9ce6ba..d570e249c8 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -35,13 +35,16 @@ struct coding_mask; template NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); -template -NBL_CONSTEXPR portable_vector_t InterleaveMasks = _static_cast >( - truncate >( - vector(coding_mask_v, - coding_mask_v << 1, - coding_mask_v << 2, - coding_mask_v << 3))); +// It's a complete cointoss whether template variables work or not, since it's a C++14 feature (not supported in HLSL2021). Most of the ones we use in Nabla work, +// but this one will only work for some parameters and not for others. Therefore, this was made into a macro to inline where used + +#define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast >(\ + truncate >(\ + vector(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 1,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 2,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 3))) + template struct sign_mask : integral_constant {}; @@ -49,13 +52,12 @@ struct sign_mask : integral_constant NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); -template -NBL_CONSTEXPR portable_vector_t SignMasks = _static_cast >( - truncate >( - vector(sign_mask_v, - sign_mask_v << 1, - sign_mask_v << 2, - sign_mask_v << 3))); +#define NBL_MORTON_SIGN_MASKS(STORAGE_T, DIM, BITS) _static_cast >(\ + truncate >(\ + vector(sign_mask_v< DIM, BITS >,\ + sign_mask_v< DIM, BITS > << 1,\ + sign_mask_v< DIM, BITS > << 2,\ + sign_mask_v< DIM, BITS > << 3))) // 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage // mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). @@ -213,10 +215,11 @@ struct Equals template) NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); const portable_vector_t rhsCasted = _static_cast >(rhs); - const portable_vector_t xored = rhsCasted ^ value; + const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); equal_to > equal; return equal(xored, zeros); } @@ -247,20 +250,22 @@ struct BaseComparison template) NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); + const portable_vector_t SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits); ComparisonOp comparison; NBL_IF_CONSTEXPR(Signed) { // Obtain a vector of deinterleaved coordinates and flip their sign bits - portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; // rhs already deinterleaved, just have to cast type and flip sign - const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; return comparison(thisCoord, rhsCoord); } else { // Obtain a vector of deinterleaved coordinates - portable_vector_t thisCoord = InterleaveMasks & value; + portable_vector_t thisCoord = InterleaveMasks & value; // rhs already deinterleaved, just have to cast type const portable_vector_t rhsCoord = _static_cast >(rhs); @@ -415,13 +420,14 @@ struct code // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); bit_not > bitnot; // For each coordinate, leave its bits intact and turn every other bit ON - const portable_vector_t counterMaskedValue = bitnot(impl::InterleaveMasks) | value; + const portable_vector_t counterMaskedValue = bitnot(InterleaveMasks) | value; // For each coordinate in rhs, leave its bits intact and turn every other bit OFF - const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF - const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & impl::InterleaveMasks; + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & InterleaveMasks; // Re-encode the result array_get, storage_t> getter; this_t retVal; @@ -429,19 +435,19 @@ struct code [[unroll]] for (uint16_t i = 1; i < D; i++) retVal.value = retVal.value | getter(interleaveShiftedResult, i); - return retVal; } // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); // For each coordinate, leave its bits intact and turn every other bit OFF - const portable_vector_t maskedValue = impl::InterleaveMasks & value; + const portable_vector_t maskedValue = InterleaveMasks & value; // Do the same for each coordinate in rhs - const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF - const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & impl::InterleaveMasks; + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & InterleaveMasks; // Re-encode the result array_get, storage_t> getter; this_t retVal; @@ -540,6 +546,10 @@ struct left_shift_operator > type_t retVal; // Shift every coordinate by `bits` retVal.value = valueLeftShift(operand.value, bits * D); + // Previous shift might move bits to positions that storage has available but the morton code does not use + // Un-decoding the resulting morton is still fine and produces expected results, but some operations such as equality expect these unused bits to be 0 so we mask them off + const uint64_t UsedBitsMask = Bits * D < 64 ? (uint64_t(1) << (Bits * D)) - 1 : ~uint64_t(0); + retVal.value = retVal.value & _static_cast(UsedBitsMask); return retVal; } }; @@ -570,7 +580,12 @@ struct arithmetic_right_shift_operator > NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { vector cartesian = _static_cast >(operand); - cartesian >>= scalar_t(bits); + // To avoid branching, we left-shift each coordinate to put the MSB (of the encoded Morton) at the position of the MSB (of the `scalar_t` used for the decoded coordinate), + // then right-shift again to get correct sign on each coordinate + // The number of bits we shift by to put MSB of Morton at MSB of `scalar_t` is the difference between the bitwidth of `scalar_t` and Bits + const scalar_t ShiftFactor = scalar_t(8 * sizeof(scalar_t) - Bits); + cartesian <<= ShiftFactor; + cartesian >>= ShiftFactor + scalar_t(bits); return type_t::create(cartesian); } }; @@ -600,6 +615,9 @@ constexpr morton::code::operator vector() cons #endif +#undef NBL_MORTON_INTERLEAVE_MASKS +#undef NBL_MORTON_SIGN_MASKS + } //namespace hlsl } //namespace nbl From 68edc322f2ba9c19ab0bd8068da2bae2390d7182 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Apr 2025 15:19:48 -0300 Subject: [PATCH 28/28] Change examples test submodule for master merge --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f2ea51d0b3..f4cc4cd22e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f2ea51d0b3e3388c0f9bae03602ec3b1f658c124 +Subproject commit f4cc4cd22ee4bd5506d794e63caafddf974ed7a4