From be5ef6b4f99545a20dfb7107d796370e55840fb0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 20 Mar 2025 18:42:53 +0700 Subject: [PATCH 1/4] use spirv intrinsic for dot --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 29 ++++++++++--------- .../builtin/hlsl/spirv_intrinsics/core.hlsl | 4 +++ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 1d43d9b14a..99c753a0fe 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -134,6 +134,7 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(find_lsb_helper, findIL #undef FIND_MSB_LSB_RETURN_TYPE template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitReverse_helper, bitReverse, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(dot_helper, dot, (T), (T)(T), typename vector_traits::scalar_type) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(transpose_helper, transpose, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length, (T), (T), typename vector_traits::scalar_type) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T) @@ -599,20 +600,6 @@ struct nClamp_helper } }; -#endif // C++ only specializations - -// C++ and HLSL specializations - -template -NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar) -struct bitReverseAs_helper) > -{ - static T __call(NBL_CONST_REF_ARG(T) val, uint16_t bits) - { - return bitReverse_helper::__call(val) >> promote >(scalar_type_t (sizeof(T) * 8 - bits)); - } -}; - template NBL_PARTIAL_REQ_TOP(concepts::Vectorial) struct dot_helper) > @@ -632,6 +619,20 @@ struct dot_helper) } }; +#endif // C++ only specializations + +// C++ and HLSL specializations + +template +NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar) +struct bitReverseAs_helper) > +{ + static T __call(NBL_CONST_REF_ARG(T) val, uint16_t bits) + { + return bitReverse_helper::__call(val) >> promote >(scalar_type_t (sizeof(T) * 8 - bits)); + } +}; + #ifdef __HLSL_VERSION // SPIR-V already defines specializations for builtin vector types #define VECTOR_SPECIALIZATION_CONCEPT concepts::Vectorial && !is_vector_v diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index d351cab07d..d1a50079aa 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -318,6 +318,10 @@ template && is_vector_v) [[vk::ext_instruction(spv::OpIsInf)]] vector::Dimension> isInf(T val); +template) +[[vk::ext_instruction( spv::OpDot )]] +typename vector_traits::scalar_type dot(Vector lhs, Vector rhs); + template [[vk::ext_instruction( spv::OpTranspose )]] Matrix transpose(Matrix mat); From cc5644c858d482eb01937f898a1001d560060927 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 20 Mar 2025 13:43:07 +0100 Subject: [PATCH 2/4] Added dot HLSL specialization --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 46 +++++++++++-------- .../builtin/hlsl/spirv_intrinsics/core.hlsl | 2 +- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 99c753a0fe..62cc9abc6a 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -600,25 +600,6 @@ struct nClamp_helper } }; -template -NBL_PARTIAL_REQ_TOP(concepts::Vectorial) -struct dot_helper) > -{ - using scalar_type = typename vector_traits::scalar_type; - - static inline scalar_type __call(NBL_CONST_REF_ARG(Vectorial) lhs, NBL_CONST_REF_ARG(Vectorial) rhs) - { - static const uint32_t ArrayDim = vector_traits::Dimension; - static array_get getter; - - scalar_type retval = getter(lhs, 0) * getter(rhs, 0); - for (uint32_t i = 1; i < ArrayDim; ++i) - retval = retval + getter(lhs, i) * getter(rhs, i); - - return retval; - } -}; - #endif // C++ only specializations // C++ and HLSL specializations @@ -889,6 +870,33 @@ struct mix_helper && concepts::B } }; +#ifdef __HLSL_VERSION +#define DOT_HELPER_REQUIREMENT (concepts::Vectorial && !is_vector_v) +#else +#define DOT_HELPER_REQUIREMENT concepts::Vectorial +#endif + +template +NBL_PARTIAL_REQ_TOP(DOT_HELPER_REQUIREMENT) +struct dot_helper +{ + using scalar_type = typename vector_traits::scalar_type; + + static inline scalar_type __call(NBL_CONST_REF_ARG(Vectorial) lhs, NBL_CONST_REF_ARG(Vectorial) rhs) + { + static const uint32_t ArrayDim = vector_traits::Dimension; + static array_get getter; + + scalar_type retval = getter(lhs, 0) * getter(rhs, 0); + for (uint32_t i = 1; i < ArrayDim; ++i) + retval = retval + getter(lhs, i) * getter(rhs, i); + + return retval; + } +}; + +#undef DOT_HELPER_REQUIREMENT + } } } diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index d1a50079aa..7da69c4a55 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -318,7 +318,7 @@ template && is_vector_v) [[vk::ext_instruction(spv::OpIsInf)]] vector::Dimension> isInf(T val); -template) +template) [[vk::ext_instruction( spv::OpDot )]] typename vector_traits::scalar_type dot(Vector lhs, Vector rhs); From 95287fdb66ec5d80727e5fc48571b2b84be4b086 Mon Sep 17 00:00:00 2001 From: Przemek Date: Thu, 20 Mar 2025 15:43:16 +0100 Subject: [PATCH 3/4] Moved fma function --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 33 ++++++++++++++++++- .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 6 ++++ include/nbl/builtin/hlsl/tgmath.hlsl | 8 ++--- include/nbl/builtin/hlsl/tgmath/impl.hlsl | 32 ------------------ 4 files changed, 40 insertions(+), 39 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 62cc9abc6a..d0075c0d17 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -103,7 +103,8 @@ template struct nMax_helper; template struct nClamp_helper; - +template +struct fma_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -163,6 +164,7 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_hel template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMax_helper, nMax, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMin_helper, nMin, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, (T), (T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fma_helper, fma, (T), (T)(T)(T), T) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitCount_helper, bitCount, (T), (T), BITCOUNT_HELPER_RETRUN_TYPE) @@ -600,6 +602,16 @@ struct nClamp_helper } }; +template +requires concepts::FloatingPointScalar +struct fma_helper +{ + static FloatingPoint __call(NBL_CONST_REF_ARG(FloatingPoint) x, NBL_CONST_REF_ARG(FloatingPoint) y, NBL_CONST_REF_ARG(FloatingPoint) z) + { + return std::fma(x, y, z); + } +}; + #endif // C++ only specializations // C++ and HLSL specializations @@ -897,6 +909,25 @@ struct dot_helper #undef DOT_HELPER_REQUIREMENT +template +NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) +struct fma_helper +{ + using return_t = T; + static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(T) z) + { + using traits = hlsl::vector_traits; + array_get getter; + array_set setter; + + return_t output; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(output, i, fma_helper::__call(getter(x, i), getter(y, i), getter(z, i))); + + return output; + } +}; + } } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index b695c4b82b..a5747a5fb7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -295,6 +295,12 @@ inline int32_t2 unpackDouble2x32(T val) return NAMESPACE::unpackDouble2x32(val); } +template +inline T fma(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(T) z) +{ + return cpp_compat_intrinsics_impl::fma_helper::__call(x, y, z); +} + #undef NAMESPACE } diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index c05696e5ab..c2f784cfc0 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -13,6 +13,8 @@ #include #include #include +#include + // C++ headers #ifndef __HLSL_VERSION #include @@ -211,12 +213,6 @@ inline T ceil(NBL_CONST_REF_ARG(T) val) return tgmath_impl::ceil_helper::__call(val); } -template -inline T fma(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(T) z) -{ - return tgmath_impl::fma_helper::__call(x, y, z); -} - template inline T ldexp(NBL_CONST_REF_ARG(T) arg, NBL_CONST_REF_ARG(U) exp) { diff --git a/include/nbl/builtin/hlsl/tgmath/impl.hlsl b/include/nbl/builtin/hlsl/tgmath/impl.hlsl index 6e80ef2fd6..af52104629 100644 --- a/include/nbl/builtin/hlsl/tgmath/impl.hlsl +++ b/include/nbl/builtin/hlsl/tgmath/impl.hlsl @@ -83,8 +83,6 @@ template struct trunc_helper; template struct ceil_helper; -template -struct fma_helper; template struct ldexp_helper; template @@ -138,7 +136,6 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(roundEven_helper, round template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(trunc_helper, trunc, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(ceil_helper, ceil, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(pow_helper, pow, (T), (T)(T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fma_helper, fma, (T), (T)(T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(ldexp_helper, ldexp, (T)(U), (T)(U), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(modfStruct_helper, modfStruct, (T), (T), ModfOutput) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(frexpStruct_helper, frexpStruct, (T), (T), FrexpOutput) @@ -337,16 +334,6 @@ struct roundEven_helper -NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar) -struct fma_helper) > -{ - static FloatingPoint __call(NBL_CONST_REF_ARG(FloatingPoint) x, NBL_CONST_REF_ARG(FloatingPoint) y, NBL_CONST_REF_ARG(FloatingPoint) z) - { - return std::fma(x, y, z); - } -}; - template NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar && concepts::IntegralScalar) struct ldexp_helper && concepts::IntegralScalar) > @@ -510,25 +497,6 @@ struct pow_helper } }; -template -NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) -struct fma_helper -{ - using return_t = T; - static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(T) z) - { - using traits = hlsl::vector_traits; - array_get getter; - array_set setter; - - return_t output; - for (uint32_t i = 0; i < traits::Dimension; ++i) - setter(output, i, fma_helper::__call(getter(x, i), getter(y, i), getter(z, i))); - - return output; - } -}; - template NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && (vector_traits::Dimension == vector_traits::Dimension)) struct ldexp_helper::Dimension == vector_traits::Dimension)) > From f6a69fe0689a319fa8ad81deed3815c7152a167d Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Tue, 1 Apr 2025 16:28:55 +0200 Subject: [PATCH 4/4] Use FMA for unspecialized dot product in intrinsics_impl.hlsl --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index d0075c0d17..c5502faa0b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -882,6 +882,25 @@ struct mix_helper && concepts::B } }; +template +NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) +struct fma_helper +{ + using return_t = T; + static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(T) z) + { + using traits = hlsl::vector_traits; + array_get getter; + array_set setter; + + return_t output; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(output, i, fma_helper::__call(getter(x, i), getter(y, i), getter(z, i))); + + return output; + } +}; + #ifdef __HLSL_VERSION #define DOT_HELPER_REQUIREMENT (concepts::Vectorial && !is_vector_v) #else @@ -901,7 +920,7 @@ struct dot_helper scalar_type retval = getter(lhs, 0) * getter(rhs, 0); for (uint32_t i = 1; i < ArrayDim; ++i) - retval = retval + getter(lhs, i) * getter(rhs, i); + retval = fma_helper::__call(getter(lhs, i), getter(rhs, i), retval); return retval; } @@ -909,27 +928,8 @@ struct dot_helper #undef DOT_HELPER_REQUIREMENT -template -NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) -struct fma_helper -{ - using return_t = T; - static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(T) z) - { - using traits = hlsl::vector_traits; - array_get getter; - array_set setter; - - return_t output; - for (uint32_t i = 0; i < traits::Dimension; ++i) - setter(output, i, fma_helper::__call(getter(x, i), getter(y, i), getter(z, i))); - - return output; - } -}; - } } } -#endif \ No newline at end of file +#endif