diff --git a/examples_tests b/examples_tests index 0f230e1f18..071b862dc7 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0f230e1f1834c04101957f862688ccc4d2002922 +Subproject commit 071b862dc71f6ad61d191924d72dcc00d313e9d0 diff --git a/include/ICameraSceneNode.h b/include/ICameraSceneNode.h index e3975e3802..014c063bf2 100644 --- a/include/ICameraSceneNode.h +++ b/include/ICameraSceneNode.h @@ -46,17 +46,17 @@ class ICameraSceneNode : public ISceneNode The function will figure it out if you've set an orthogonal matrix. \param projection The new projection matrix of the camera. */ - virtual void setProjectionMatrix(const core::matrix4SIMD& projection) =0; + virtual void setProjectionMatrix(const hlsl::float32_t4x4& projection) =0; //! Gets the current projection matrix of the camera. /** \return The current projection matrix of the camera. */ - inline const core::matrix4SIMD& getProjectionMatrix() const { return projMatrix; } + inline const hlsl::float32_t4x4& getProjectionMatrix() const { return projMatrix; } //! Gets the current view matrix of the camera. /** \return The current view matrix of the camera. */ - virtual const core::matrix3x4SIMD& getViewMatrix() const =0; + virtual const hlsl::float32_t3x4& getViewMatrix() const =0; - virtual const core::matrix4SIMD& getConcatenatedMatrix() const =0; + virtual const hlsl::float32_t4x4& getConcatenatedMatrix() const =0; #if 0 //! It is possible to send mouse and key events to the camera. /** Most cameras may ignore this input, but camera scene nodes diff --git a/include/matrix3x4SIMD.h b/include/matrix3x4SIMD.h deleted file mode 100644 index d52f305cec..0000000000 --- a/include/matrix3x4SIMD.h +++ /dev/null @@ -1,263 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX3X4SIMD_H_INCLUDED__ -#define __NBL_MATRIX3X4SIMD_H_INCLUDED__ - -#include "vectorSIMD.h" -#include "quaternion.h" - -namespace nbl::core -{ - -class matrix4x3; - -#define _NBL_MATRIX_ALIGNMENT _NBL_SIMD_ALIGNMENT -static_assert(_NBL_MATRIX_ALIGNMENT>=_NBL_VECTOR_ALIGNMENT,"Matrix must be equally or more aligned than vector!"); - -//! Equivalent of GLSL's mat4x3 -class matrix3x4SIMD// : private AllocationOverrideBase<_NBL_MATRIX_ALIGNMENT> EBO inheritance problem w.r.t `rows[3]` -{ - public: - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VectorCount = 3u; - vectorSIMDf rows[VectorCount]; - - explicit matrix3x4SIMD( const vectorSIMDf& _r0 = vectorSIMDf(1.f, 0.f, 0.f, 0.f), - const vectorSIMDf& _r1 = vectorSIMDf(0.f, 1.f, 0.f, 0.f), - const vectorSIMDf& _r2 = vectorSIMDf(0.f, 0.f, 1.f, 0.f)) : rows{_r0, _r1, _r2} - { - } - - matrix3x4SIMD( float _a00, float _a01, float _a02, float _a03, - float _a10, float _a11, float _a12, float _a13, - float _a20, float _a21, float _a22, float _a23) - : matrix3x4SIMD(vectorSIMDf(_a00, _a01, _a02, _a03), - vectorSIMDf(_a10, _a11, _a12, _a13), - vectorSIMDf(_a20, _a21, _a22, _a23)) - { - } - - explicit matrix3x4SIMD(const float* const _data) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4*i); - } - matrix3x4SIMD(const float* const _data, bool ALIGNED) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4*i, ALIGNED); - } - - float* pointer() { return rows[0].pointer; } - const float* pointer() const { return rows[0].pointer; } - - inline matrix3x4SIMD& set(const matrix4x3& _retarded); - inline matrix4x3 getAsRetardedIrrlichtMatrix() const; - - static inline matrix3x4SIMD concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b); - - static inline matrix3x4SIMD concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b); - - inline matrix3x4SIMD& concatenateAfter(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByA(*this, _other); - } - - inline matrix3x4SIMD& concatenateBefore(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByA(_other, *this); - } - - inline matrix3x4SIMD& concatenateAfterPrecisely(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByAPrecisely(*this, _other); - } - - inline matrix3x4SIMD& concatenateBeforePrecisely(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByAPrecisely(_other, *this); - } - - inline bool operator==(const matrix3x4SIMD& _other) - { - return !(*this != _other); - } - - inline bool operator!=(const matrix3x4SIMD& _other); - - - inline matrix3x4SIMD operator-() const - { - matrix3x4SIMD retval; - retval.rows[0] = -rows[0]; - retval.rows[1] = -rows[1]; - retval.rows[2] = -rows[2]; - return retval; - } - - - inline matrix3x4SIMD& operator+=(const matrix3x4SIMD& _other); - inline matrix3x4SIMD operator+(const matrix3x4SIMD& _other) const - { - matrix3x4SIMD retval(*this); - return retval += _other; - } - - inline matrix3x4SIMD& operator-=(const matrix3x4SIMD& _other); - inline matrix3x4SIMD operator-(const matrix3x4SIMD& _other) const - { - matrix3x4SIMD retval(*this); - return retval -= _other; - } - - inline matrix3x4SIMD& operator*=(float _scalar); - inline matrix3x4SIMD operator*(float _scalar) const - { - matrix3x4SIMD retval(*this); - return retval *= _scalar; - } - - inline matrix3x4SIMD& setTranslation(const vectorSIMDf& _translation) - { - // no faster way of doing it? - rows[0].w = _translation.x; - rows[1].w = _translation.y; - rows[2].w = _translation.z; - return *this; - } - inline vectorSIMDf getTranslation() const; - inline vectorSIMDf getTranslation3D() const; - - inline matrix3x4SIMD& setScale(const vectorSIMDf& _scale); - - inline vectorSIMDf getScale() const; - - inline void transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void transformVect(vectorSIMDf& _in_out) const - { - transformVect(_in_out, _in_out); - } - - inline void pseudoMulWith4x1(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void pseudoMulWith4x1(vectorSIMDf& _in_out) const - { - pseudoMulWith4x1(_in_out,_in_out); - } - - inline void mulSub3x3WithNx1(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void mulSub3x3WithNx1(vectorSIMDf& _in_out) const - { - mulSub3x3WithNx1(_in_out, _in_out); - } - - inline static matrix3x4SIMD buildCameraLookAtMatrixLH( - const vectorSIMDf& position, - const vectorSIMDf& target, - const vectorSIMDf& upVector); - inline static matrix3x4SIMD buildCameraLookAtMatrixRH( - const vectorSIMDf& position, - const vectorSIMDf& target, - const vectorSIMDf& upVector); - - inline matrix3x4SIMD& setRotation(const quaternion& _quat); - - inline matrix3x4SIMD& setScaleRotationAndTranslation( const vectorSIMDf& _scale, - const quaternion& _quat, - const vectorSIMDf& _translation); - - inline vectorSIMDf getPseudoDeterminant() const - { - vectorSIMDf tmp; - return determinant_helper(tmp); - } - - inline bool getInverse(matrix3x4SIMD& _out) const; - bool makeInverse() - { - matrix3x4SIMD tmp; - - if (getInverse(tmp)) - { - *this = tmp; - return true; - } - return false; - } - - // - inline bool getSub3x3InverseTranspose(matrix3x4SIMD& _out) const; - - // - inline bool getSub3x3InverseTransposePacked(float outRows[9]) const - { - matrix3x4SIMD tmp; - if (!getSub3x3InverseTranspose(tmp)) - return false; - - float* _out = outRows; - for (auto i=0; i<3; i++) - { - const auto& row = tmp.rows[i]; - for (auto j=0; j<3; j++) - *(_out++) = row[j]; - } - - return true; - } - - // - inline core::matrix3x4SIMD getSub3x3TransposeCofactors() const; - - // - inline void setTransformationCenter(const vectorSIMDf& _center, const vectorSIMDf& _translation); - - // - static inline matrix3x4SIMD buildAxisAlignedBillboard( - const vectorSIMDf& camPos, - const vectorSIMDf& center, - const vectorSIMDf& translation, - const vectorSIMDf& axis, - const vectorSIMDf& from); - - - // - float& operator()(size_t _i, size_t _j) { return rows[_i].pointer[_j]; } - const float& operator()(size_t _i, size_t _j) const { return rows[_i].pointer[_j]; } - - // - inline const vectorSIMDf& operator[](size_t _rown) const { return rows[_rown]; } - inline vectorSIMDf& operator[](size_t _rown) { return rows[_rown]; } - - private: - static inline vectorSIMDf doJob(const __m128& a, const matrix3x4SIMD& _mtx); - - // really need that dvec<2> or wider - inline __m128d halfRowAsDouble(size_t _n, bool _0) const; - static inline __m128d doJob_d(const __m128d& _a0, const __m128d& _a1, const matrix3x4SIMD& _mtx, bool _xyHalf); - - vectorSIMDf determinant_helper(vectorSIMDf& r1crossr2) const - { - r1crossr2 = core::cross(rows[1], rows[2]); - return core::dot(rows[0], r1crossr2); - } -}; - -inline matrix3x4SIMD concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - return matrix3x4SIMD::concatenateBFollowedByA(_a, _b); -} -/* -inline matrix3x4SIMD concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - return matrix3x4SIMD::concatenateBFollowedByAPrecisely(_a, _b); -} -*/ - -} - -#endif diff --git a/include/matrix3x4SIMD_impl.h b/include/matrix3x4SIMD_impl.h deleted file mode 100644 index 0e9022efd0..0000000000 --- a/include/matrix3x4SIMD_impl.h +++ /dev/null @@ -1,470 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_MATRIX3X4SIMD_IMPL_H_INCLUDED_ -#define _NBL_MATRIX3X4SIMD_IMPL_H_INCLUDED_ - -#include "matrix3x4SIMD.h" -#include "nbl/core/math/glslFunctions.tcc" - -namespace nbl::core -{ - -// TODO: move to another implementation header -inline quaternion::quaternion(const matrix3x4SIMD& m) -{ - const vectorSIMDf one(1.f); - auto Qx = m.rows[0].xxxx()^vectorSIMDu32(0,0,0x80000000u,0x80000000u); - auto Qy = m.rows[1].yyyy()^vectorSIMDu32(0,0x80000000u,0,0x80000000u); - auto Qz = m.rows[2].zzzz()^vectorSIMDu32(0,0x80000000u,0x80000000u,0); - - auto tmp = one+Qx+Qy+Qz; - auto invscales = inversesqrt(tmp)*0.5f; - auto scales = tmp*invscales*0.5f; - - // TODO: speed this up - if (tmp.x > 0.0f) - { - X = (m(2, 1) - m(1, 2)) * invscales.x; - Y = (m(0, 2) - m(2, 0)) * invscales.x; - Z = (m(1, 0) - m(0, 1)) * invscales.x; - W = scales.x; - } - else - { - if (tmp.y>0.f) - { - X = scales.y; - Y = (m(0, 1) + m(1, 0)) * invscales.y; - Z = (m(2, 0) + m(0, 2)) * invscales.y; - W = (m(2, 1) - m(1, 2)) * invscales.y; - } - else if (tmp.z>0.f) - { - X = (m(0, 1) + m(1, 0)) * invscales.z; - Y = scales.z; - Z = (m(1, 2) + m(2, 1)) * invscales.z; - W = (m(0, 2) - m(2, 0)) * invscales.z; - } - else - { - X = (m(0, 2) + m(2, 0)) * invscales.w; - Y = (m(1, 2) + m(2, 1)) * invscales.w; - Z = scales.w; - W = (m(1, 0) - m(0, 1)) * invscales.w; - } - } - - *this = normalize(*this); -} - -inline bool matrix3x4SIMD::operator!=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - if ((rows[i] != _other.rows[i]).any()) - return true; - return false; -} - -inline matrix3x4SIMD& matrix3x4SIMD::operator+=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] += _other.rows[i]; - return *this; -} -inline matrix3x4SIMD& matrix3x4SIMD::operator-=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] -= _other.rows[i]; - return *this; -} -inline matrix3x4SIMD& matrix3x4SIMD::operator*=(float _scalar) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] *= _scalar; - return *this; -} - -#ifdef __NBL_COMPILE_WITH_SSE3 -#define BROADCAST32(fpx) _MM_SHUFFLE(fpx, fpx, fpx, fpx) -#define BUILD_XORMASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_ ? 0x80000000u:0x0u, _y_ ? 0x80000000u:0x0u, _z_ ? 0x80000000u:0x0u, _w_ ? 0x80000000u:0x0u) -#define BUILD_MASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_*0xffffffff, _y_*0xffffffff, _z_*0xffffffff, _w_*0xffffffff) - -inline matrix3x4SIMD matrix3x4SIMD::concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ -#ifdef _NBL_DEBUG - assert(is_aligned_to(&_a, _NBL_SIMD_ALIGNMENT)); - assert(is_aligned_to(&_b, _NBL_SIMD_ALIGNMENT)); -#endif // _NBL_DEBUG - __m128 r0 = _a.rows[0].getAsRegister(); - __m128 r1 = _a.rows[1].getAsRegister(); - __m128 r2 = _a.rows[2].getAsRegister(); - - matrix3x4SIMD out; - out.rows[0] = matrix3x4SIMD::doJob(r0, _b); - out.rows[1] = matrix3x4SIMD::doJob(r1, _b); - out.rows[2] = matrix3x4SIMD::doJob(r2, _b); - - return out; -} - -inline matrix3x4SIMD matrix3x4SIMD::concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - __m128d r00 = _a.halfRowAsDouble(0u, true); - __m128d r01 = _a.halfRowAsDouble(0u, false); - __m128d r10 = _a.halfRowAsDouble(1u, true); - __m128d r11 = _a.halfRowAsDouble(1u, false); - __m128d r20 = _a.halfRowAsDouble(2u, true); - __m128d r21 = _a.halfRowAsDouble(2u, false); - - matrix3x4SIMD out; - - const __m128i mask0011 = BUILD_MASKF(0, 0, 1, 1); - - __m128 second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r00, r01, _b, false)); - out.rows[0] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r00, r01, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r10, r11, _b, false)); - out.rows[1] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r10, r11, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r20, r21, _b, false)); - out.rows[2] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r20, r21, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - return out; -} - -inline vectorSIMDf matrix3x4SIMD::getTranslation() const -{ - __m128 xmm0 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 xmm1 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setr_ps(0.f, 0.f, 0.f, 1.f)); // (2z,3z,2w,3w) - __m128 xmm2 = _mm_movehl_ps(xmm1, xmm0);// (0w,1w,2w,3w) - - return xmm2; -} -inline vectorSIMDf matrix3x4SIMD::getTranslation3D() const -{ - __m128 xmm0 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 xmm1 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setzero_ps()); // (2z,0,2w,0) - __m128 xmm2 = _mm_movehl_ps(xmm1, xmm0);// (0w,1w,2w,0) - - return xmm2; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setScale(const core::vectorSIMDf& _scale) -{ - const vectorSIMDu32 mask0001 = vectorSIMDu32(BUILD_MASKF(0, 0, 0, 1)); - const vectorSIMDu32 mask0010 = vectorSIMDu32(BUILD_MASKF(0, 0, 1, 0)); - const vectorSIMDu32 mask0100 = vectorSIMDu32(BUILD_MASKF(0, 1, 0, 0)); - const vectorSIMDu32 mask1000 = vectorSIMDu32(BUILD_MASKF(1, 0, 0, 0)); - - const vectorSIMDu32& scaleAlias = reinterpret_cast(_scale); - - vectorSIMDu32& rowAlias0 = reinterpret_cast(rows[0]); - vectorSIMDu32& rowAlias1 = reinterpret_cast(rows[1]); - vectorSIMDu32& rowAlias2 = reinterpret_cast(rows[2]); - rowAlias0 = (scaleAlias & reinterpret_cast(mask1000)) | (rowAlias0 & reinterpret_cast(mask0001)); - rowAlias1 = (scaleAlias & reinterpret_cast(mask0100)) | (rowAlias1 & reinterpret_cast(mask0001)); - rowAlias2 = (scaleAlias & reinterpret_cast(mask0010)) | (rowAlias2 & reinterpret_cast(mask0001)); - - return *this; -} - -inline core::vectorSIMDf matrix3x4SIMD::getScale() const -{ - // xmm4-7 will now become columuns of B - __m128 xmm4 = rows[0].getAsRegister(); - __m128 xmm5 = rows[1].getAsRegister(); - __m128 xmm6 = rows[2].getAsRegister(); - __m128 xmm7 = _mm_setzero_ps(); - // g==0 - __m128 xmm0 = _mm_unpacklo_ps(xmm4, xmm5); - __m128 xmm1 = _mm_unpacklo_ps(xmm6, xmm7); // (2x,g,2y,g) - __m128 xmm2 = _mm_unpackhi_ps(xmm4, xmm5); - __m128 xmm3 = _mm_unpackhi_ps(xmm6, xmm7); // (2z,g,2w,g) - xmm4 = _mm_movelh_ps(xmm1, xmm0); //(0x,1x,2x,g) - xmm5 = _mm_movehl_ps(xmm1, xmm0); - xmm6 = _mm_movelh_ps(xmm3, xmm2); //(0z,1z,2z,g) - - // See http://www.robertblum.com/articles/2005/02/14/decomposing-matrices - // We have to do the full calculation. - xmm0 = _mm_mul_ps(xmm4, xmm4);// column 0 squared - xmm1 = _mm_mul_ps(xmm5, xmm5);// column 1 squared - xmm2 = _mm_mul_ps(xmm6, xmm6);// column 2 squared - xmm4 = _mm_hadd_ps(xmm0, xmm1); - xmm5 = _mm_hadd_ps(xmm2, xmm7); - xmm6 = _mm_hadd_ps(xmm4, xmm5); - - return _mm_sqrt_ps(xmm6); -} - -inline void matrix3x4SIMD::transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - vectorSIMDf r0 = rows[0] * _in, - r1 = rows[1] * _in, - r2 = rows[2] * _in; - - _out = - _mm_hadd_ps( - _mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), - _mm_hadd_ps(r2.getAsRegister(), _mm_set1_ps(0.25f)) - ); -} - -inline void matrix3x4SIMD::pseudoMulWith4x1(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - _out = (_in & mask1110) | _mm_castps_si128(vectorSIMDf(0.f, 0.f, 0.f, 1.f).getAsRegister()); - transformVect(_out); -} - -inline void matrix3x4SIMD::mulSub3x3WithNx1(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - auto maskedIn = _in & BUILD_MASKF(1, 1, 1, 0); - vectorSIMDf r0 = rows[0] * maskedIn, - r1 = rows[1] * maskedIn, - r2 = rows[2] * maskedIn; - - _out = - _mm_hadd_ps( - _mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), - _mm_hadd_ps(r2.getAsRegister(), _mm_setzero_ps()) - ); -} - - -inline matrix3x4SIMD matrix3x4SIMD::buildCameraLookAtMatrixLH( - const core::vectorSIMDf& position, - const core::vectorSIMDf& target, - const core::vectorSIMDf& upVector) -{ - const core::vectorSIMDf zaxis = core::normalize(target - position); - const core::vectorSIMDf xaxis = core::normalize(core::cross(upVector, zaxis)); - const core::vectorSIMDf yaxis = core::cross(zaxis, xaxis); - - matrix3x4SIMD r; - r.rows[0] = xaxis; - r.rows[1] = yaxis; - r.rows[2] = zaxis; - r.rows[0].w = -dot(xaxis, position)[0]; - r.rows[1].w = -dot(yaxis, position)[0]; - r.rows[2].w = -dot(zaxis, position)[0]; - - return r; -} -inline matrix3x4SIMD matrix3x4SIMD::buildCameraLookAtMatrixRH( - const core::vectorSIMDf& position, - const core::vectorSIMDf& target, - const core::vectorSIMDf& upVector) -{ - const core::vectorSIMDf zaxis = core::normalize(position - target); - const core::vectorSIMDf xaxis = core::normalize(core::cross(upVector, zaxis)); - const core::vectorSIMDf yaxis = core::cross(zaxis, xaxis); - - matrix3x4SIMD r; - r.rows[0] = xaxis; - r.rows[1] = yaxis; - r.rows[2] = zaxis; - r.rows[0].w = -dot(xaxis, position)[0]; - r.rows[1].w = -dot(yaxis, position)[0]; - r.rows[2].w = -dot(zaxis, position)[0]; - - return r; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setRotation(const core::quaternion& _quat) -{ - const vectorSIMDu32 mask0001 = vectorSIMDu32(BUILD_MASKF(0, 0, 0, 1)); - const __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - - const core::vectorSIMDf& quat = reinterpret_cast(_quat); - rows[0] = ((quat.yyyy() * ((quat.yxwx() & mask1110) * vectorSIMDf(2.f))) + (quat.zzzz() * (quat.zwxx() & mask1110) * vectorSIMDf(2.f, -2.f, 2.f, 0.f))) | (reinterpret_cast(rows[0]) & (mask0001)); - rows[0].x = 1.f - rows[0].x; - - rows[1] = ((quat.zzzz() * ((quat.wzyx() & mask1110) * vectorSIMDf(2.f))) + (quat.xxxx() * (quat.yxwx() & mask1110) * vectorSIMDf(2.f, 2.f, -2.f, 0.f))) | (reinterpret_cast(rows[1]) & (mask0001)); - rows[1].y = 1.f - rows[1].y; - - rows[2] = ((quat.xxxx() * ((quat.zwxx() & mask1110) * vectorSIMDf(2.f))) + (quat.yyyy() * (quat.wzyx() & mask1110) * vectorSIMDf(-2.f, 2.f, 2.f, 0.f))) | (reinterpret_cast(rows[2]) & (mask0001)); - rows[2].z = 1.f - rows[2].z; - - return *this; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setScaleRotationAndTranslation(const vectorSIMDf& _scale, const core::quaternion& _quat, const vectorSIMDf& _translation) -{ - const __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - - const vectorSIMDf& quat = reinterpret_cast(_quat); - const vectorSIMDf dblScale = (_scale * 2.f) & mask1110; - - vectorSIMDf mlt = dblScale ^ BUILD_XORMASKF(0, 1, 0, 0); - rows[0] = ((quat.yyyy() * ((quat.yxwx() & mask1110) * dblScale)) + (quat.zzzz() * (quat.zwxx() & mask1110) * mlt)); - rows[0].x = _scale.x - rows[0].x; - - mlt = dblScale ^ BUILD_XORMASKF(0, 0, 1, 0); - rows[1] = ((quat.zzzz() * ((quat.wzyx() & mask1110) * dblScale)) + (quat.xxxx() * (quat.yxwx() & mask1110) * mlt)); - rows[1].y = _scale.y - rows[1].y; - - mlt = dblScale ^ BUILD_XORMASKF(1, 0, 0, 0); - rows[2] = ((quat.xxxx() * ((quat.zwxx() & mask1110) * dblScale)) + (quat.yyyy() * (quat.wzyx() & mask1110) * mlt)); - rows[2].z = _scale.z - rows[2].z; - - setTranslation(_translation); - - return *this; -} - - -inline bool matrix3x4SIMD::getInverse(matrix3x4SIMD& _out) const //! SUBOPTIMAL - OPTIMIZE! -{ - auto translation = getTranslation(); - // `tmp` will have columns in its `rows` - core::matrix4SIMD tmp; - auto* cols = tmp.rows; - if (!getSub3x3InverseTranspose(reinterpret_cast(tmp))) - return false; - - // find inverse post-translation - cols[3] = -cols[0]*translation.xxxx()-cols[1]*translation.yyyy()-cols[2]*translation.zzzz(); - - // columns into rows - _out = transpose(tmp).extractSub3x4(); - - return true; -} - -inline bool matrix3x4SIMD::getSub3x3InverseTranspose(core::matrix3x4SIMD& _out) const -{ - vectorSIMDf r1crossr2; - const vectorSIMDf d = determinant_helper(r1crossr2); - if (core::iszero(d.x, FLT_MIN)) - return false; - auto rcp = core::reciprocal(d); - - // matrix of cofactors * 1/det - _out = getSub3x3TransposeCofactors(); - _out.rows[0] *= rcp; - _out.rows[1] *= rcp; - _out.rows[2] *= rcp; - - return true; -} - -inline core::matrix3x4SIMD matrix3x4SIMD::getSub3x3TransposeCofactors() const -{ - core::matrix3x4SIMD _out; - _out.rows[0] = core::cross(rows[1], rows[2]); - _out.rows[1] = core::cross(rows[2], rows[0]); - _out.rows[2] = core::cross(rows[0], rows[1]); - return _out; -} - -// TODO: Double check this!- -inline void matrix3x4SIMD::setTransformationCenter(const core::vectorSIMDf& _center, const core::vectorSIMDf& _translation) -{ - core::vectorSIMDf r0 = rows[0] * _center; - core::vectorSIMDf r1 = rows[1] * _center; - core::vectorSIMDf r2 = rows[2] * _center; - core::vectorSIMDf r3(0.f, 0.f, 0.f, 1.f); - - __m128 col3 = _mm_hadd_ps(_mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), _mm_hadd_ps(r2.getAsRegister(), r3.getAsRegister())); - const vectorSIMDf vcol3 = _center - _translation - col3; - - for (size_t i = 0u; i < VectorCount; ++i) - rows[i].w = vcol3.pointer[i]; -} - - -// TODO: Double check this! -inline matrix3x4SIMD matrix3x4SIMD::buildAxisAlignedBillboard( - const core::vectorSIMDf& camPos, - const core::vectorSIMDf& center, - const core::vectorSIMDf& translation, - const core::vectorSIMDf& axis, - const core::vectorSIMDf& from) -{ - // axis of rotation - const core::vectorSIMDf up = core::normalize(axis); - const core::vectorSIMDf forward = core::normalize(camPos - center); - const core::vectorSIMDf right = core::normalize(core::cross(up, forward)); - - // correct look vector - const core::vectorSIMDf look = core::cross(right, up); - - // rotate from to - // axis multiplication by sin - const core::vectorSIMDf vs = core::cross(look, from); - - // cosinus angle - const core::vectorSIMDf ca = core::cross(from, look); - - const core::vectorSIMDf vt(up * (core::vectorSIMDf(1.f) - ca)); - const core::vectorSIMDf wt = vt * up.yzxx(); - const core::vectorSIMDf vtuppca = vt * up + ca; - - matrix3x4SIMD mat; - core::vectorSIMDf& row0 = mat.rows[0]; - core::vectorSIMDf& row1 = mat.rows[1]; - core::vectorSIMDf& row2 = mat.rows[2]; - - row0 = vtuppca & BUILD_MASKF(1, 0, 0, 0); - row1 = vtuppca & BUILD_MASKF(0, 1, 0, 0); - row2 = vtuppca & BUILD_MASKF(0, 0, 1, 0); - - row0 += (wt.xxzx() + vs.xzyx() * core::vectorSIMDf(1.f, 1.f, -1.f, 1.f)) & BUILD_MASKF(0, 1, 1, 0); - row1 += (wt.xxyx() + vs.zxxx() * core::vectorSIMDf(-1.f, 1.f, 1.f, 1.f)) & BUILD_MASKF(1, 0, 1, 0); - row2 += (wt.zyxx() + vs.yxxx() * core::vectorSIMDf(1.f, -1.f, 1.f, 1.f)) & BUILD_MASKF(1, 1, 0, 0); - - mat.setTransformationCenter(center, translation); - return mat; -} - - - -inline vectorSIMDf matrix3x4SIMD::doJob(const __m128& a, const matrix3x4SIMD& _mtx) -{ - __m128 r0 = _mtx.rows[0].getAsRegister(); - __m128 r1 = _mtx.rows[1].getAsRegister(); - __m128 r2 = _mtx.rows[2].getAsRegister(); - - const __m128i mask = _mm_setr_epi32(0, 0, 0, 0xffffffff); - - vectorSIMDf res; - res = _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(0)), r0); - res += _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(1)), r1); - res += _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(2)), r2); - res += vectorSIMDf(a) & mask; // always 0 0 0 a3 -- no shuffle needed - return res; - } - -inline __m128d matrix3x4SIMD::halfRowAsDouble(size_t _n, bool _0) const -{ - return _mm_cvtps_pd(_0 ? rows[_n].xyxx().getAsRegister() : rows[_n].zwxx().getAsRegister()); -} -inline __m128d matrix3x4SIMD::doJob_d(const __m128d& _a0, const __m128d& _a1, const matrix3x4SIMD& _mtx, bool _xyHalf) -{ - __m128d r0 = _mtx.halfRowAsDouble(0u, _xyHalf); - __m128d r1 = _mtx.halfRowAsDouble(1u, _xyHalf); - __m128d r2 = _mtx.halfRowAsDouble(2u, _xyHalf); - - const __m128d mask01 = _mm_castsi128_pd(_mm_setr_epi32(0, 0, 0xffffffff, 0xffffffff)); - - __m128d res; - res = _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 0), r0); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 3), r1)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 0), r2)); - if (!_xyHalf) - res = _mm_add_pd(res, _mm_and_pd(_a1, mask01)); - return res; -} - -#undef BUILD_MASKF -#undef BUILD_XORMASKF -#undef BROADCAST32 -#else -#error "no implementation" -#endif - -} // nbl::core - -#endif diff --git a/include/matrix4SIMD.h b/include/matrix4SIMD.h deleted file mode 100644 index 03126c61f7..0000000000 --- a/include/matrix4SIMD.h +++ /dev/null @@ -1,385 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX4SIMD_H_INCLUDED__ -#define __NBL_MATRIX4SIMD_H_INCLUDED__ - -#include "matrix3x4SIMD.h" - -namespace nbl -{ -namespace core -{ - -template -class aabbox3d; - - -class matrix4SIMD// : public AlignedBase<_NBL_SIMD_ALIGNMENT> don't inherit from AlignedBase (which is empty) because member `rows[4]` inherits from it as well -{ - public: - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VectorCount = 4u; - vectorSIMDf rows[VectorCount]; - - inline explicit matrix4SIMD(const vectorSIMDf& _r0 = vectorSIMDf(1.f, 0.f, 0.f, 0.f), - const vectorSIMDf& _r1 = vectorSIMDf(0.f, 1.f, 0.f, 0.f), - const vectorSIMDf& _r2 = vectorSIMDf(0.f, 0.f, 1.f, 0.f), - const vectorSIMDf& _r3 = vectorSIMDf(0.f, 0.f, 0.f, 1.f)) - : rows{ _r0, _r1, _r2, _r3 } - { - } - - inline matrix4SIMD( float _a00, float _a01, float _a02, float _a03, - float _a10, float _a11, float _a12, float _a13, - float _a20, float _a21, float _a22, float _a23, - float _a30, float _a31, float _a32, float _a33) - : matrix4SIMD( vectorSIMDf(_a00, _a01, _a02, _a03), - vectorSIMDf(_a10, _a11, _a12, _a13), - vectorSIMDf(_a20, _a21, _a22, _a23), - vectorSIMDf(_a30, _a31, _a32, _a33)) - { - } - - inline explicit matrix4SIMD(const float* const _data) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4 * i); - } - inline matrix4SIMD(const float* const _data, bool ALIGNED) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4 * i, ALIGNED); - } - - inline explicit matrix4SIMD(const matrix3x4SIMD& smallMat) - { - *reinterpret_cast(this) = smallMat; - rows[3].set(0.f,0.f,0.f,1.f); - } - - inline matrix3x4SIMD extractSub3x4() const - { - return matrix3x4SIMD(rows[0],rows[1],rows[2]); - } - - //! Access by row - inline const vectorSIMDf& getRow(size_t _rown) const{ return rows[_rown]; } - inline vectorSIMDf& getRow(size_t _rown) { return rows[_rown]; } - - //! Access by element - inline float operator()(size_t _i, size_t _j) const { return rows[_i].pointer[_j]; } - inline float& operator()(size_t _i, size_t _j) { return rows[_i].pointer[_j]; } - - //! Access for memory - inline const float* pointer() const {return rows[0].pointer;} - inline float* pointer() {return rows[0].pointer;} - - - inline bool operator==(const matrix4SIMD& _other) const - { - return !(*this != _other); - } - inline bool operator!=(const matrix4SIMD& _other) const; - - inline matrix4SIMD& operator+=(const matrix4SIMD& _other); - inline matrix4SIMD operator+(const matrix4SIMD& _other) const - { - matrix4SIMD r{*this}; - return r += _other; - } - - inline matrix4SIMD& operator-=(const matrix4SIMD& _other); - inline matrix4SIMD operator-(const matrix4SIMD& _other) const - { - matrix4SIMD r{*this}; - return r -= _other; - } - - inline matrix4SIMD& operator*=(float _scalar); - inline matrix4SIMD operator*(float _scalar) const - { - matrix4SIMD r{*this}; - return r *= _scalar; - } - - static inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b); - static inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b); - - inline bool isIdentity() const - { - return *this == matrix4SIMD(); - } - inline bool isIdentity(float _tolerance) const; - - inline bool isOrthogonal() const - { - return concatenateBFollowedByA(transpose(*this), *this).isIdentity(); - } - inline bool isOrthogonal(float _tolerance) const - { - return concatenateBFollowedByA(transpose(*this), *this).isIdentity(_tolerance); - } - - inline matrix4SIMD& setScale(const core::vectorSIMDf& _scale); - inline matrix4SIMD& setScale(float _scale) - { - return setScale(vectorSIMDf(_scale)); - } - - inline void setTranslation(const float* _t) - { - for (size_t i = 0u; i < 3u; ++i) - rows[i].w = _t[i]; - } - //! Takes into account only x,y,z components of _t - inline void setTranslation(const vectorSIMDf& _t) - { - setTranslation(_t.pointer); - } - inline void setTranslation(const vector3d& _t) - { - setTranslation(&_t.X); - } - - //! Returns last column of the matrix. - inline vectorSIMDf getTranslation() const; - - //! Returns translation part of the matrix (w component is always 0). - inline vectorSIMDf getTranslation3D() const; - - enum class E_MATRIX_INVERSE_PRECISION - { - EMIP_FAST_RECIPROCAL, - EMIP_32BIT, - EMIP_64BBIT - }; - - template - inline bool getInverseTransform(matrix4SIMD& _out) const - { - if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_64BBIT) - { - double a = rows[0][0], b = rows[0][1], c = rows[0][2], d = rows[0][3]; - double e = rows[1][0], f = rows[1][1], g = rows[1][2], h = rows[1][3]; - double i = rows[2][0], j = rows[2][1], k = rows[2][2], l = rows[2][3]; - double m = rows[3][0], n = rows[3][1], o = rows[3][2], p = rows[3][3]; - - double kp_lo = k * p - l * o; - double jp_ln = j * p - l * n; - double jo_kn = j * o - k * n; - double ip_lm = i * p - l * m; - double io_km = i * o - k * m; - double in_jm = i * n - j * m; - - double a11 = +(f * kp_lo - g * jp_ln + h * jo_kn); - double a12 = -(e * kp_lo - g * ip_lm + h * io_km); - double a13 = +(e * jp_ln - f * ip_lm + h * in_jm); - double a14 = -(e * jo_kn - f * io_km + g * in_jm); - - double det = a * a11 + b * a12 + c * a13 + d * a14; - - if (core::iszero(det, DBL_MIN)) - return false; - - double invDet = 1.0 / det; - - _out.rows[0][0] = a11 * invDet; - _out.rows[1][0] = a12 * invDet; - _out.rows[2][0] = a13 * invDet; - _out.rows[3][0] = a14 * invDet; - - _out.rows[0][1] = -(b * kp_lo - c * jp_ln + d * jo_kn) * invDet; - _out.rows[1][1] = +(a * kp_lo - c * ip_lm + d * io_km) * invDet; - _out.rows[2][1] = -(a * jp_ln - b * ip_lm + d * in_jm) * invDet; - _out.rows[3][1] = +(a * jo_kn - b * io_km + c * in_jm) * invDet; - - double gp_ho = g * p - h * o; - double fp_hn = f * p - h * n; - double fo_gn = f * o - g * n; - double ep_hm = e * p - h * m; - double eo_gm = e * o - g * m; - double en_fm = e * n - f * m; - - _out.rows[0][2] = +(b * gp_ho - c * fp_hn + d * fo_gn) * invDet; - _out.rows[1][2] = -(a * gp_ho - c * ep_hm + d * eo_gm) * invDet; - _out.rows[2][2] = +(a * fp_hn - b * ep_hm + d * en_fm) * invDet; - _out.rows[3][2] = -(a * fo_gn - b * eo_gm + c * en_fm) * invDet; - - double gl_hk = g * l - h * k; - double fl_hj = f * l - h * j; - double fk_gj = f * k - g * j; - double el_hi = e * l - h * i; - double ek_gi = e * k - g * i; - double ej_fi = e * j - f * i; - - _out.rows[0][3] = -(b * gl_hk - c * fl_hj + d * fk_gj) * invDet; - _out.rows[1][3] = +(a * gl_hk - c * el_hi + d * ek_gi) * invDet; - _out.rows[2][3] = -(a * fl_hj - b * el_hi + d * ej_fi) * invDet; - _out.rows[3][3] = +(a * fk_gj - b * ek_gi + c * ej_fi) * invDet; - - return true; - } - else - { - auto mat2mul = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A*_B.xwxw()+_A.yxwz()*_B.zyzy(); - }; - auto mat2adjmul = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A.wwxx()*_B-_A.yyzz()*_B.zwxy(); - }; - auto mat2muladj = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A*_B.wxwx()-_A.yxwz()*_B.zyzy(); - }; - - vectorSIMDf A = _mm_movelh_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); - vectorSIMDf B = _mm_movehl_ps(rows[1].getAsRegister(), rows[0].getAsRegister()); - vectorSIMDf C = _mm_movelh_ps(rows[2].getAsRegister(), rows[3].getAsRegister()); - vectorSIMDf D = _mm_movehl_ps(rows[3].getAsRegister(), rows[2].getAsRegister()); - - vectorSIMDf allDets = vectorSIMDf(_mm_shuffle_ps(rows[0].getAsRegister(),rows[2].getAsRegister(),_MM_SHUFFLE(2,0,2,0)))* - vectorSIMDf(_mm_shuffle_ps(rows[1].getAsRegister(),rows[3].getAsRegister(),_MM_SHUFFLE(3,1,3,1))) - - - vectorSIMDf(_mm_shuffle_ps(rows[0].getAsRegister(),rows[2].getAsRegister(),_MM_SHUFFLE(3,1,3,1)))* - vectorSIMDf(_mm_shuffle_ps(rows[1].getAsRegister(),rows[3].getAsRegister(),_MM_SHUFFLE(2,0,2,0))); - - auto detA = allDets.xxxx(); - auto detB = allDets.yyyy(); - auto detC = allDets.zzzz(); - auto detD = allDets.wwww(); - - // https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html - auto D_C = mat2adjmul(D, C); - // A#B - auto A_B = mat2adjmul(A, B); - // X# = |D|A - B(D#C) - auto X_ = detD*A - mat2mul(B, D_C); - // W# = |A|D - C(A#B) - auto W_ = detA*D - mat2mul(C, A_B); - - // |M| = |A|*|D| + ... (continue later) - auto detM = detA*detD; - - // Y# = |B|C - D(A#B)# - auto Y_ = detB*C - mat2muladj(D, A_B); - // Z# = |C|B - A(D#C)# - auto Z_ = detC*B - mat2muladj(A, D_C); - - // |M| = |A|*|D| + |B|*|C| ... (continue later) - detM += detB*detC; - - // tr((A#B)(D#C)) - __m128 tr = (A_B*D_C.xzyw()).getAsRegister(); - tr = _mm_hadd_ps(tr, tr); - tr = _mm_hadd_ps(tr, tr); - // |M| = |A|*|D| + |B|*|C| - tr((A#B)(D#C) - detM -= tr; - - if (core::iszero(detM.x, FLT_MIN)) - return false; - - vectorSIMDf rDetM; - - // (1/|M|, -1/|M|, -1/|M|, 1/|M|) - if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_FAST_RECIPROCAL) - rDetM = vectorSIMDf(1.f, -1.f, -1.f, 1.f)*core::reciprocal(detM); - else if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_32BIT) - rDetM = vectorSIMDf(1.f, -1.f, -1.f, 1.f).preciseDivision(detM); - - X_ *= rDetM; - Y_ *= rDetM; - Z_ *= rDetM; - W_ *= rDetM; - - // apply adjugate and store, here we combine adjugate shuffle and store shuffle - _out.rows[0] = _mm_shuffle_ps(X_.getAsRegister(), Y_.getAsRegister(), _MM_SHUFFLE(1, 3, 1, 3)); - _out.rows[1] = _mm_shuffle_ps(X_.getAsRegister(), Y_.getAsRegister(), _MM_SHUFFLE(0, 2, 0, 2)); - _out.rows[2] = _mm_shuffle_ps(Z_.getAsRegister(), W_.getAsRegister(), _MM_SHUFFLE(1, 3, 1, 3)); - _out.rows[3] = _mm_shuffle_ps(Z_.getAsRegister(), W_.getAsRegister(), _MM_SHUFFLE(0, 2, 0, 2)); - - return true; - } - } - - inline vectorSIMDf sub3x3TransformVect(const vectorSIMDf& _in) const; - - inline void transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void transformVect(vectorSIMDf& _vector) const - { - transformVect(_vector, _vector); - } - - inline void translateVect(vectorSIMDf& _vect) const - { - _vect += getTranslation(); - } - - bool isBoxInFrustum(const aabbox3d& bbox); - - bool perspectiveTransformVect(core::vectorSIMDf& inOutVec) - { - transformVect(inOutVec); - const bool inFront = inOutVec[3] > 0.f; - inOutVec /= inOutVec.wwww(); - return inFront; - } - - core::vector2di fragCoordTransformVect(const core::vectorSIMDf& _in, const core::dimension2du& viewportDimensions) - { - core::vectorSIMDf pos(_in); - pos.w = 1.f; - if (perspectiveTransformVect(pos)) - core::vector2di(-0x80000000, -0x80000000); - - pos[0] *= 0.5f; - pos[1] *= 0.5f; - pos[0] += 0.5f; - pos[1] += 0.5f; - - return core::vector2di(pos[0] * float(viewportDimensions.Width), pos[1] * float(viewportDimensions.Height)); - } - - static inline matrix4SIMD buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar); - static inline matrix4SIMD buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar); - - static inline matrix4SIMD buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar); - static inline matrix4SIMD buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar); - - //! Access by row - inline const vectorSIMDf& operator[](size_t _rown) const { return rows[_rown]; } - //! Access by row - inline vectorSIMDf& operator[](size_t _rown) { return rows[_rown]; } - - private: - //! TODO: implement a dvec<2> - inline __m128d halfRowAsDouble(size_t _n, bool _firstHalf) const; - static inline __m128d concat64_helper(const __m128d& _a0, const __m128d& _a1, const matrix4SIMD& _mtx, bool _firstHalf); -}; - -inline matrix4SIMD operator*(float _scalar, const matrix4SIMD& _mtx) -{ - return _mtx * _scalar; -} - -inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - return matrix4SIMD::concatenateBFollowedByA(_a, _b); -} -/* -inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - return matrix4SIMD::concatenateBFollowedByAPrecisely(_a, _b); -} -*/ - - -}} // nbl::core - -#endif diff --git a/include/matrix4SIMD_impl.h b/include/matrix4SIMD_impl.h deleted file mode 100644 index 02484e7a4c..0000000000 --- a/include/matrix4SIMD_impl.h +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX4SIMD_IMPL_H_INCLUDED__ -#define __NBL_MATRIX4SIMD_IMPL_H_INCLUDED__ - -#include "matrix4SIMD.h" -#include "nbl/core/math/glslFunctions.tcc" -#include "aabbox3d.h" - -namespace nbl -{ -namespace core -{ - - -inline bool matrix4SIMD::operator!=(const matrix4SIMD& _other) const -{ - for (size_t i = 0u; i < VectorCount; ++i) - if ((rows[i] != _other.rows[i]).any()) - return true; - return false; -} - -inline matrix4SIMD& matrix4SIMD::operator+=(const matrix4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] += _other.rows[i]; - return *this; -} - -inline matrix4SIMD& matrix4SIMD::operator-=(const matrix4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] -= _other.rows[i]; - return *this; -} - -inline matrix4SIMD& matrix4SIMD::operator*=(float _scalar) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] *= _scalar; - return *this; -} - -inline bool matrix4SIMD::isIdentity(float _tolerance) const -{ - return core::equals(*this, matrix4SIMD(), core::ROUNDING_ERROR()); -} - -#ifdef __NBL_COMPILE_WITH_SSE3 -#define BROADCAST32(fpx) _MM_SHUFFLE(fpx, fpx, fpx, fpx) -#define BUILD_MASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_*0xffffffff, _y_*0xffffffff, _z_*0xffffffff, _w_*0xffffffff) -inline matrix4SIMD matrix4SIMD::concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - auto calcRow = [](const __m128& _row, const matrix4SIMD& _mtx) - { - __m128 r0 = _mtx.rows[0].getAsRegister(); - __m128 r1 = _mtx.rows[1].getAsRegister(); - __m128 r2 = _mtx.rows[2].getAsRegister(); - __m128 r3 = _mtx.rows[3].getAsRegister(); - - __m128 res; - res = _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(0)), r0); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(1)), r1)); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(2)), r2)); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(3)), r3)); - return res; - }; - - matrix4SIMD r; - for (size_t i = 0u; i < 4u; ++i) - r.rows[i] = calcRow(_a.rows[i].getAsRegister(), _b); - - return r; -} -inline matrix4SIMD matrix4SIMD::concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - matrix4SIMD out; - - __m128i mask0011 = BUILD_MASKF(0, 0, 1, 1); - __m128 second; - - { - __m128d r00 = _a.halfRowAsDouble(0u, true); - __m128d r01 = _a.halfRowAsDouble(0u, false); - second = _mm_cvtpd_ps(concat64_helper(r00, r01, _b, false)); - out.rows[0] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r00, r01, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r10 = _a.halfRowAsDouble(1u, true); - __m128d r11 = _a.halfRowAsDouble(1u, false); - second = _mm_cvtpd_ps(concat64_helper(r10, r11, _b, false)); - out.rows[1] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r10, r11, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r20 = _a.halfRowAsDouble(2u, true); - __m128d r21 = _a.halfRowAsDouble(2u, false); - second = _mm_cvtpd_ps(concat64_helper(r20, r21, _b, false)); - out.rows[2] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r20, r21, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r30 = _a.halfRowAsDouble(3u, true); - __m128d r31 = _a.halfRowAsDouble(3u, false); - second = _mm_cvtpd_ps(concat64_helper(r30, r31, _b, false)); - out.rows[3] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r30, r31, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - return out; -} - -inline matrix4SIMD& matrix4SIMD::setScale(const core::vectorSIMDf& _scale) -{ - const __m128i mask0001 = BUILD_MASKF(0, 0, 0, 1); - - rows[0] = (_scale & BUILD_MASKF(1, 0, 0, 0)) | _mm_castps_si128((rows[0] & mask0001).getAsRegister()); - rows[1] = (_scale & BUILD_MASKF(0, 1, 0, 0)) | _mm_castps_si128((rows[1] & mask0001).getAsRegister()); - rows[2] = (_scale & BUILD_MASKF(0, 0, 1, 0)) | _mm_castps_si128((rows[2] & mask0001).getAsRegister()); - rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return *this; -} - -//! Returns last column of the matrix. -inline vectorSIMDf matrix4SIMD::getTranslation() const -{ - __m128 tmp1 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 tmp2 = _mm_unpackhi_ps(rows[2].getAsRegister(), rows[3].getAsRegister()); // (2z,3z,2w,3w) - __m128 col3 = _mm_movehl_ps(tmp1, tmp2);// (0w,1w,2w,3w) - - return col3; -} -//! Returns translation part of the matrix (w component is always 0). -inline vectorSIMDf matrix4SIMD::getTranslation3D() const -{ - __m128 tmp1 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 tmp2 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setzero_ps()); // (2z,0,2w,0) - __m128 transl = _mm_movehl_ps(tmp1, tmp2);// (0w,1w,2w,0) - - return transl; -} - -inline vectorSIMDf matrix4SIMD::sub3x3TransformVect(const vectorSIMDf& _in) const -{ - matrix4SIMD cp{*this}; - vectorSIMDf out = _in & BUILD_MASKF(1, 1, 1, 0); - transformVect(out); - return out; -} - -inline void matrix4SIMD::transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - vectorSIMDf r[4]; - for (size_t i = 0u; i < VectorCount; ++i) - r[i] = rows[i] * _in; - - _out = _mm_hadd_ps( - _mm_hadd_ps(r[0].getAsRegister(), r[1].getAsRegister()), - _mm_hadd_ps(r[2].getAsRegister(), r[3].getAsRegister()) - ); -} - -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians*0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(w, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -h, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, -zFar/(zFar-zNear), -zNear*zFar/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, -1.f, 0.f); - - return m; -} -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians*0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(w, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -h, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, zFar/(zFar-zNear), -zNear*zFar/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 1.f, 0.f); - - return m; -} - -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(2.f/widthOfViewVolume, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -2.f/heightOfViewVolume, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, -1.f/(zFar-zNear), -zNear/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return m; -} -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(2.f/widthOfViewVolume, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -2.f/heightOfViewVolume, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, 1.f/(zFar-zNear), -zNear/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return m; -} - - - -inline __m128d matrix4SIMD::halfRowAsDouble(size_t _n, bool _firstHalf) const -{ - return _mm_cvtps_pd(_firstHalf ? rows[_n].xyxx().getAsRegister() : rows[_n].zwxx().getAsRegister()); -} -inline __m128d matrix4SIMD::concat64_helper(const __m128d& _a0, const __m128d& _a1, const matrix4SIMD& _mtx, bool _firstHalf) -{ - __m128d r0 = _mtx.halfRowAsDouble(0u, _firstHalf); - __m128d r1 = _mtx.halfRowAsDouble(1u, _firstHalf); - __m128d r2 = _mtx.halfRowAsDouble(2u, _firstHalf); - __m128d r3 = _mtx.halfRowAsDouble(3u, _firstHalf); - - //const __m128d mask01 = _mm_castsi128_pd(_mm_setr_epi32(0, 0, 0xffffffff, 0xffffffff)); - - __m128d res; - res = _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 0), r0); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 3/*0b11*/), r1)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 0), r2)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 3/*0b11*/), r3)); - return res; -} - -#undef BUILD_MASKF -#undef BROADCAST32 -#else -#error "no implementation" -#endif - -inline bool matrix4SIMD::isBoxInFrustum(const aabbox3d& bbox) -{ - vectorSIMDf MinEdge, MaxEdge; - MinEdge.set(bbox.MinEdge); - MaxEdge.set(bbox.MaxEdge); - MinEdge.w = 1.f; - MaxEdge.w = 1.f; - - - auto getClosestDP = [&MinEdge,&MaxEdge](const vectorSIMDf& toDot) -> float - { - return dot(mix(MaxEdge,MinEdge,toDot #include "nbl/builtin/hlsl/cpp_compat/matrix.hlsl" +#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" #include "nbl/asset/ECommonEnums.h" #include "nbl/asset/IDescriptor.h" @@ -181,7 +182,7 @@ class ITopLevelAccelerationStructure : public AccelerationStructure FORCE_OPACITY_MICROMAP_2_STATE_BIT = 0x1u<<4u, FORCE_DISABLE_OPACITY_MICROMAPS_BIT = 0x1u<<5u, }; - // Note: `core::matrix3x4SIMD` is equvalent to VkTransformMatrixKHR, 4x3 row_major matrix + // Note: `hlsl::float32_t3x4` is equvalent to VkTransformMatrixKHR, 4x3 row_major matrix template struct Instance final { @@ -197,18 +198,18 @@ class ITopLevelAccelerationStructure : public AccelerationStructure template struct StaticInstance final { - core::matrix3x4SIMD transform = core::matrix3x4SIMD(); + hlsl::float32_t3x4 transform = hlsl::diagonal(1.0f); Instance base = {}; }; template struct MatrixMotionInstance final { - core::matrix3x4SIMD transform[2] = {core::matrix3x4SIMD(),core::matrix3x4SIMD()}; + hlsl::float32_t3x4 transform[2] = { hlsl::diagonal(1.0f), hlsl::diagonal(1.0f) }; Instance base = {}; }; struct SRT { - // TODO: some operators to convert back and forth from `core::matrix3x4SIMD + // TODO: some operators to convert back and forth from `hlsl::float32_t3x4 float sx; float a; diff --git a/include/nbl/asset/IAnimationLibrary.h b/include/nbl/asset/IAnimationLibrary.h index 9665349103..d650cb25d9 100644 --- a/include/nbl/asset/IAnimationLibrary.h +++ b/include/nbl/asset/IAnimationLibrary.h @@ -34,7 +34,7 @@ class IAnimationLibrary : public virtual core::IReferenceCounted translation[2] = translation[1] = translation[0] = 0.f; quat = core::vectorSIMDu32(128u,128u,128u,255u); // should be (0,0,0,1) encoded } - Keyframe(const core::vectorSIMDf& _scale, const core::quaternion& _quat, const CQuantQuaternionCache* quantCache, const core::vectorSIMDf& _translation) + Keyframe(const core::vectorSIMDf& _scale, const hlsl::quaternion& _quat, const CQuantQuaternionCache* quantCache, const core::vectorSIMDf& _translation) { std::copy(_translation.pointer,_translation.pointer+3,translation); quat = quantCache->template quantize(_quat); @@ -42,13 +42,13 @@ class IAnimationLibrary : public virtual core::IReferenceCounted //scale = ; } - inline core::quaternion getRotation() const + inline hlsl::quaternion getRotation() const { const void* _pix[4] = {&quat,nullptr,nullptr,nullptr}; double out[4]; decodePixels(_pix,out,0u,0u); auto q = core::normalize(core::vectorSIMDf(out[0],out[1],out[2],out[3])); - return reinterpret_cast(&q)[0]; + return reinterpret_cast*>(&q)[0]; } inline core::vectorSIMDf getScale() const diff --git a/include/nbl/asset/ICPUMeshBuffer.h b/include/nbl/asset/ICPUMeshBuffer.h index 532b622090..c6fce408ab 100644 --- a/include/nbl/asset/ICPUMeshBuffer.h +++ b/include/nbl/asset/ICPUMeshBuffer.h @@ -582,18 +582,18 @@ class ICPUMeshBuffer final : public IMeshBuffer(m_inverseBindPoseBufferBinding.buffer->getPointer()); - return reinterpret_cast(ptr+m_inverseBindPoseBufferBinding.offset); + return reinterpret_cast(ptr+m_inverseBindPoseBufferBinding.offset); } - inline core::matrix3x4SIMD* getInverseBindPoses() + inline hlsl::float32_t3x4* getInverseBindPoses() { assert(isMutable()); - return const_cast(const_cast(this)->getInverseBindPoses()); + return const_cast(const_cast(this)->getInverseBindPoses()); } //! diff --git a/include/nbl/asset/ICPUSkeleton.h b/include/nbl/asset/ICPUSkeleton.h index 6f1c576ed8..53d7e66be0 100644 --- a/include/nbl/asset/ICPUSkeleton.h +++ b/include/nbl/asset/ICPUSkeleton.h @@ -42,15 +42,15 @@ class ICPUSkeleton final : public ISkeleton, public IAsset } //! - inline const core::matrix3x4SIMD& getDefaultTransformMatrix(base_t::joint_id_t jointID) const + inline const hlsl::float32_t3x4& getDefaultTransformMatrix(base_t::joint_id_t jointID) const { const uint8_t* ptr = reinterpret_cast(m_defaultTransforms.buffer->getPointer()); - return reinterpret_cast(ptr+m_defaultTransforms.offset)[jointID]; + return reinterpret_cast(ptr+m_defaultTransforms.offset)[jointID]; } - inline core::matrix3x4SIMD& getDefaultTransformMatrix(base_t::joint_id_t jointID) + inline hlsl::float32_t3x4& getDefaultTransformMatrix(base_t::joint_id_t jointID) { assert(isMutable()); - return const_cast(const_cast(this)->getDefaultTransformMatrix(jointID)); + return const_cast(const_cast(this)->getDefaultTransformMatrix(jointID)); } //! diff --git a/include/nbl/asset/IMeshBuffer.h b/include/nbl/asset/IMeshBuffer.h index a4f1b895dc..c68fe3408f 100644 --- a/include/nbl/asset/IMeshBuffer.h +++ b/include/nbl/asset/IMeshBuffer.h @@ -210,7 +210,7 @@ class IMeshBuffer : public virtual core::IReferenceCounted virtual inline bool isSkinned() const { return jointCount>0u && maxJointsPerVx>0u && m_inverseBindPoseBufferBinding.buffer && - m_inverseBindPoseBufferBinding.offset+jointCount*sizeof(core::matrix3x4SIMD)<=m_inverseBindPoseBufferBinding.buffer->getSize(); + m_inverseBindPoseBufferBinding.offset+jointCount*sizeof(hlsl::float32_t3x4)<=m_inverseBindPoseBufferBinding.buffer->getSize(); } //! @@ -227,7 +227,7 @@ class IMeshBuffer : public virtual core::IReferenceCounted if (_maxJointsPerVx==0u || _maxJointsPerVx>4u) return false; - if (_inverseBindPoseBufferBinding.offset+_jointCount*sizeof(core::matrix3x4SIMD)>_inverseBindPoseBufferBinding.buffer->getSize()) + if (_inverseBindPoseBufferBinding.offset+_jointCount*sizeof(hlsl::float32_t3x4)>_inverseBindPoseBufferBinding.buffer->getSize()) return false; m_inverseBindPoseBufferBinding = std::move(_inverseBindPoseBufferBinding); diff --git a/include/nbl/asset/ISkeleton.h b/include/nbl/asset/ISkeleton.h index 7960ca4eef..03ba3af4ea 100644 --- a/include/nbl/asset/ISkeleton.h +++ b/include/nbl/asset/ISkeleton.h @@ -62,7 +62,7 @@ class ISkeleton : public virtual core::IReferenceCounted return; assert(m_parentJointIDs.buffer->getSize()>=m_parentJointIDs.offset+sizeof(joint_id_t)*m_jointCount); - assert(m_defaultTransforms.buffer->getSize()>=m_defaultTransforms.offset+sizeof(core::matrix3x4SIMD)*m_jointCount); + assert(m_defaultTransforms.buffer->getSize()>=m_defaultTransforms.offset+sizeof(hlsl::float32_t3x4)*m_jointCount); } virtual ~ISkeleton() { diff --git a/include/nbl/asset/asset_utils.h b/include/nbl/asset/asset_utils.h index 8e4e35a733..84c8a8df45 100644 --- a/include/nbl/asset/asset_utils.h +++ b/include/nbl/asset/asset_utils.h @@ -31,7 +31,7 @@ inline void fillBufferWithDeadBeef(ICPUBuffer* _buf) #include "nbl/nblpack.h" //! Designed for use with interface blocks declared with `layout (row_major, std140)` -// TODO: change members to core::matrix3x4SIMD and core::matrix4SIMD +// TODO: change members to hlsl::float32_t3x4 and hlsl::float32_t4x4 struct SBasicViewParameters { float MVP[4*4]; diff --git a/include/nbl/asset/metadata/IMeshMetadata.h b/include/nbl/asset/metadata/IMeshMetadata.h index 5ce3c12980..25f72e05c0 100644 --- a/include/nbl/asset/metadata/IMeshMetadata.h +++ b/include/nbl/asset/metadata/IMeshMetadata.h @@ -18,7 +18,7 @@ class IMeshMetadata : public core::Interface public: struct SInstance { - core::matrix3x4SIMD worldTform; + hlsl::float32_t3x4 worldTform; }; core::SRange m_instances; diff --git a/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h b/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h index 416c04823b..7d0b63a141 100644 --- a/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h +++ b/include/nbl/asset/metadata/IRenderpassIndependentPipelineMetadata.h @@ -140,35 +140,35 @@ class IRenderpassIndependentPipelineMetadata : public core::Interface //! A non exhaustive list of commonly used shader input semantics enum E_COMMON_SHADER_INPUT { - //! core::matrix4SIMD giving the total projection onto the screen from model-space coordinates + //! hlsl::float32_t4x4 giving the total projection onto the screen from model-space coordinates ECSI_WORLD_VIEW_PROJ, - //! core::matrix4SIMD giving the mapping from view-space into the pre-divide NDC space + //! hlsl::float32_t4x4 giving the mapping from view-space into the pre-divide NDC space ECSI_PROJ, - //! core::matrix3x4SIMD giving the view-space transformation from model-space coordinates + //! hlsl::float32_t3x4 giving the view-space transformation from model-space coordinates ECSI_WORLD_VIEW, - //! core::matrix3x4SIMD giving the view-space transformation from world-space + //! hlsl::float32_t3x4 giving the view-space transformation from world-space ECSI_VIEW, - //! core::matrix3x4SIMD giving the world-space transformation from model-space (last column is object world-space-position) + //! hlsl::float32_t3x4 giving the world-space transformation from model-space (last column is object world-space-position) ECSI_WORLD, - //! core::matrix4SIMD giving the total projection to model-space coordinates from screen-space + //! hlsl::float32_t4x4 giving the total projection to model-space coordinates from screen-space ECSI_WORLD_VIEW_PROJ_INVERSE, - //! core::matrix4SIMD giving the mapping from the pre-divide NDC space into view-space + //! hlsl::float32_t4x4 giving the mapping from the pre-divide NDC space into view-space ECSI_PROJ_INVERSE, - //! core::matrix3x4SIMD giving the model-space transformation from view-space coordinates + //! hlsl::float32_t3x4 giving the model-space transformation from view-space coordinates ECSI_WORLD_VIEW_INVERSE, - //! core::matrix3x4SIMD giving the world-space transformation from view-space (last column is camera world-space-position) + //! hlsl::float32_t3x4 giving the world-space transformation from view-space (last column is camera world-space-position) ECSI_VIEW_INVERSE, - //! core::matrix3x4SIMD giving the model-space transformation from world-space + //! hlsl::float32_t3x4 giving the model-space transformation from world-space ECSI_WORLD_INVERSE, - //! transpose of core::matrix4SIMD giving the total projection to model-space coordinates from screen-space + //! transpose of hlsl::float32_t4x4 giving the total projection to model-space coordinates from screen-space ECSI_WORLD_VIEW_PROJ_INVERSE_TRANSPOSE, - //! transpose of core::matrix4SIMD giving the mapping from the pre-divide NDC space into view-space + //! transpose of hlsl::float32_t4x4 giving the mapping from the pre-divide NDC space into view-space ECSI_PROJ_INVERSE_TRANSPOSE, - //! transpose of core::matrix3x4SIMD giving the model-space transformation from view-space coordinates (upper 3x3 matrix can be used instead of `gl_NormalMatrix`) + //! transpose of hlsl::float32_t3x4 giving the model-space transformation from view-space coordinates (upper 3x3 matrix can be used instead of `gl_NormalMatrix`) ECSI_WORLD_VIEW_INVERSE_TRANSPOSE, - //! transpose of core::matrix3x4SIMD giving the world-space transformation from view-space (last row is camera world-space-position) + //! transpose of hlsl::float32_t3x4 giving the world-space transformation from view-space (last row is camera world-space-position) ECSI_VIEW_INVERSE_TRANSPOSE, - //! transpose of core::matrix3x4SIMD giving the model-space transformation from world-space (upper 3x3 matrix can transform model space normals to world space) + //! transpose of hlsl::float32_t3x4 giving the model-space transformation from world-space (upper 3x3 matrix can transform model space normals to world space) ECSI_WORLD_INVERSE_TRANSPOSE, //! a simple non-filtered environment map as a cubemap diff --git a/include/nbl/asset/utils/CQuantQuaternionCache.h b/include/nbl/asset/utils/CQuantQuaternionCache.h index 8e46dffb0a..a51549d24d 100644 --- a/include/nbl/asset/utils/CQuantQuaternionCache.h +++ b/include/nbl/asset/utils/CQuantQuaternionCache.h @@ -60,7 +60,7 @@ class CQuantQuaternionCache : public CDirQuantCacheBase - value_type_t quantize(const core::quaternion& quat) + value_type_t quantize(const hlsl::quaternion& quat) { return Base::quantize<4u,CacheFormat>(reinterpret_cast(quat)); } diff --git a/include/nbl/asset/utils/IMeshManipulator.h b/include/nbl/asset/utils/IMeshManipulator.h index f84d85c75d..6aff59200c 100644 --- a/include/nbl/asset/utils/IMeshManipulator.h +++ b/include/nbl/asset/utils/IMeshManipulator.h @@ -18,6 +18,9 @@ #include "nbl/asset/utils/CQuantNormalCache.h" #include "nbl/asset/utils/CQuantQuaternionCache.h" +#include +#include + namespace nbl { namespace asset @@ -351,7 +354,7 @@ class NBL_API2 IMeshManipulator : public virtual core::IReferenceCounted static float DistanceToLine(core::vectorSIMDf P0, core::vectorSIMDf P1, core::vectorSIMDf InPoint); static float DistanceToPlane(core::vectorSIMDf InPoint, core::vectorSIMDf PlanePoint, core::vectorSIMDf PlaneNormal); - static core::matrix3x4SIMD calculateOBB(const nbl::asset::ICPUMeshBuffer* meshbuffer); + static hlsl::float32_t3x4 calculateOBB(const nbl::asset::ICPUMeshBuffer* meshbuffer); //! Calculates bounding box of the meshbuffer static inline core::aabbox3df calculateBoundingBox( @@ -408,8 +411,8 @@ class NBL_API2 IMeshManipulator : public virtual core::IReferenceCounted if (jointIDFLT_MIN) { - core::vectorSIMDf boneSpacePos; - inverseBindPoses[jointID].transformVect(boneSpacePos,pos); + const hlsl::float32_t4x4 transformationMatrix = hlsl::getMatrix3x4As4x4(inverseBindPoses[jointID]); + core::vectorSIMDf boneSpacePos = hlsl::transformVector(transformationMatrix, pos); jointAABBs[jointID].addInternalPoint(boneSpacePos.getAsVector3df()); noJointInfluence = false; } diff --git a/include/nbl/builtin/glsl/math/quaternions.glsl b/include/nbl/builtin/glsl/math/quaternions.glsl index 7dc6ca0279..d94d48ecc9 100644 --- a/include/nbl/builtin/glsl/math/quaternions.glsl +++ b/include/nbl/builtin/glsl/math/quaternions.glsl @@ -1,18 +1,13 @@ #ifndef _NBL_BUILTIN_GLSL_MATH_QUATERNIONS_INCLUDED_ #define _NBL_BUILTIN_GLSL_MATH_QUATERNIONS_INCLUDED_ - - #include - - struct nbl_glsl_quaternion_t { vec4 data; }; - nbl_glsl_quaternion_t nbl_glsl_quaternion_t_constructFromTruncated(in vec3 first3Components) { nbl_glsl_quaternion_t quat; diff --git a/include/nbl/builtin/hlsl/bitreverse.hlsl b/include/nbl/builtin/hlsl/bitreverse.hlsl deleted file mode 100644 index cea9268f45..0000000000 --- a/include/nbl/builtin/hlsl/bitreverse.hlsl +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_BITREVERSE_INCLUDED_ -#define _NBL_BUILTIN_HLSL_BITREVERSE_INCLUDED_ - - -#include - -namespace nbl -{ -namespace hlsl -{ - -template&& Bits <= sizeof(T) * 8) -/** -* @brief Takes the binary representation of `value` as a string of `Bits` bits and returns a value of the same type resulting from reversing the string -* -* @tparam T Type of the value to operate on. -* @tparam Bits The length of the string of bits used to represent `value`. -* -* @param [in] value The value to bitreverse. -*/ -T bitReverseAs(T value) -{ - return bitReverse(value) >> promote >(scalar_type_t (sizeof(T) * 8 - Bits)); -} - -template) -/** -* @brief Takes the binary representation of `value` and returns a value of the same type resulting from reversing the string of bits as if it was `bits` long. -* Keep in mind `bits` cannot exceed `8 * sizeof(T)`. -* -* @tparam T type of the value to operate on. -* -* @param [in] value The value to bitreverse. -* @param [in] bits The length of the string of bits used to represent `value`. -*/ -T bitReverseAs(T value, uint16_t bits) -{ - return bitReverse(value) >> promote >(scalar_type_t (sizeof(T) * 8 - bits)); -} - - -} -} - - - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/camera/view_matrix.hlsl b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl new file mode 100644 index 0000000000..27b2c63239 --- /dev/null +++ b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl @@ -0,0 +1,51 @@ +#ifndef _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PROJECTION_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +// /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) +template +inline matrix buildCameraLookAtMatrixLH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = hlsl::normalize(target - position); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +template +inline matrix buildCameraLookAtMatrixRH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = hlsl::normalize(position - target); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/concepts.hlsl b/include/nbl/builtin/hlsl/concepts.hlsl index 29660b8b45..34c4c2c542 100644 --- a/include/nbl/builtin/hlsl/concepts.hlsl +++ b/include/nbl/builtin/hlsl/concepts.hlsl @@ -4,7 +4,6 @@ #ifndef _NBL_BUILTIN_HLSL_CONCEPTS_INCLUDED_ #define _NBL_BUILTIN_HLSL_CONCEPTS_INCLUDED_ - #include #include #include @@ -68,7 +67,7 @@ concept NBL_CONCEPT_NAME = requires BOOST_PP_EXPR_IF(LOCAL_PARAM_COUNT,(BOOST_PP // #define NBL_IMPL_CONCEPT_REQ_TYPE(...) typename __VA_ARGS__; #define NBL_IMPL_CONCEPT_REQ_EXPR(...) __VA_ARGS__; -#define NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE(E,C,...) {E}; C; +#define NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE(E,C,...) {E}; C; // #define NBL_IMPL_CONCEPT (NBL_IMPL_CONCEPT_REQ_TYPE,NBL_IMPL_CONCEPT_REQ_EXPR,NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE) // @@ -77,56 +76,8 @@ concept NBL_CONCEPT_NAME = requires BOOST_PP_EXPR_IF(LOCAL_PARAM_COUNT,(BOOST_PP #define NBL_CONCEPT_END(SEQ) BOOST_PP_SEQ_FOR_EACH_I(NBL_IMPL_CONCEPT_END_DEF, DUMMY, SEQ) \ } - -#include - -// Alias some of the std concepts in nbl. As this is C++20 only, we don't need to use -// the macros here. -template -concept same_as = std::same_as; - -template -concept derived_from = std::derived_from; - -template -concept convertible_to = std::convertible_to; - -template -concept assignable_from = std::assignable_from; - -template -concept common_with = std::common_with; - -template -concept integral = std::integral; - -template -concept signed_integral = std::signed_integral; - -template -concept unsigned_integral = std::unsigned_integral; - -template -concept floating_point = std::floating_point; - - -// Some other useful concepts. - -template -concept any_of = (same_as || ...); - -template -concept scalar = floating_point || integral; - -template -concept vectorial = is_vector::value; - -template -concept matricial = is_matrix::value; - #else - // to define a concept using `concept Name = SomeContexprBoolCondition;` #define NBL_BOOL_CONCEPT NBL_CONSTEXPR bool @@ -144,7 +95,6 @@ concept matricial = is_matrix::value; // condition, use instead of the closing `>` of a function template #define NBL_FUNC_REQUIRES(...) ,::nbl::hlsl::enable_if_t<(__VA_ARGS__),bool> = true> - // #define NBL_CONCEPT_BEGIN(LOCAL_PARAM_COUNT) namespace BOOST_PP_CAT(__concept__,NBL_CONCEPT_NAME) \ { @@ -153,7 +103,7 @@ concept matricial = is_matrix::value; // #define NBL_IMPL_CONCEPT_REQ_TYPE(...) ::nbl::hlsl::make_void_t #define NBL_IMPL_CONCEPT_REQ_EXPR(...) ::nbl::hlsl::make_void_t -#define NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE(E,C,...) ::nbl::hlsl::enable_if_t > +#define NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE(E,C,...) ::nbl::hlsl::enable_if_t > // #define NBL_IMPL_CONCEPT_SFINAE (NBL_IMPL_CONCEPT_REQ_TYPE,NBL_IMPL_CONCEPT_REQ_EXPR,NBL_IMPL_CONCEPT_REQ_EXPR_RET_TYPE) // diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl new file mode 100644 index 0000000000..4e20c645c8 --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -0,0 +1,83 @@ +// Copyright (C) 2024-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_CORE_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_CORE_HLSL_INCLUDED_ + + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace concepts +{ + +template +NBL_BOOL_CONCEPT same_as = is_same_v; + +template +NBL_BOOL_CONCEPT Integral = nbl::hlsl::is_integral_v; + +template +NBL_BOOL_CONCEPT SignedIntegral = nbl::hlsl::is_signed_v && nbl::hlsl::is_integral_v; + +template +NBL_BOOL_CONCEPT UnsignedIntegral = !nbl::hlsl::is_signed_v && ::nbl::hlsl::is_integral_v; + +template +NBL_BOOL_CONCEPT FloatingPoint = nbl::hlsl::is_floating_point_v; + +template +NBL_BOOL_CONCEPT Boolean = nbl::hlsl::is_same_v || (nbl::hlsl::is_vector_v && nbl::hlsl::is_same_v::scalar_type, bool>); + +template +NBL_BOOL_CONCEPT Scalar = nbl::hlsl::is_scalar_v; + +template +NBL_BOOL_CONCEPT IntegralScalar = nbl::hlsl::is_integral_v && nbl::hlsl::is_scalar_v; + +template +NBL_BOOL_CONCEPT SignedIntegralScalar = nbl::hlsl::is_signed_v && nbl::hlsl::is_integral_v && nbl::hlsl::is_scalar_v; + +template +NBL_BOOL_CONCEPT UnsignedIntegralScalar = !nbl::hlsl::is_signed_v && ::nbl::hlsl::is_integral_v && nbl::hlsl::is_scalar_v; + +template +NBL_BOOL_CONCEPT FloatingPointScalar = nbl::hlsl::is_floating_point_v && nbl::hlsl::is_scalar_v; + +// TODO: implement when hlsl::is_base_of is done +//#define NBL_CONCEPT_NAME DerivedFrom +// ... + +// TODO: implement when hlsl::is_converible is done +//#define NBL_CONCEPT_NAME ConvertibleTo +// ... + +// TODO? +//#define NBL_CONCEPT_NAME AssignableFrom + +// TODO? +//template +//concept common_with = std::common_with; + +namespace impl +{ +template +struct is_emulating_floating_point_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; +}; +} + +//! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) +template +NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; + +} +} +} +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/concepts/matrix.hlsl b/include/nbl/builtin/hlsl/concepts/matrix.hlsl new file mode 100644 index 0000000000..94659c823b --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/matrix.hlsl @@ -0,0 +1,27 @@ +// Copyright (C) 2024-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_MATRIX_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_MATRIX_HLSL_INCLUDED_ + + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace concepts +{ + +template +NBL_BOOL_CONCEPT Matrix = is_matrix::value; + +template +NBL_BOOL_CONCEPT Matricial = matrix_traits::IsMatrix; + +} +} +} +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl new file mode 100644 index 0000000000..edea37a183 --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -0,0 +1,48 @@ +// Copyright (C) 2024-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_VECTOR_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_VECTOR_HLSL_INCLUDED_ + + +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace concepts +{ + +//! Concept for native vectors. +template +NBL_BOOL_CONCEPT Vector = is_vector::value; +template +NBL_BOOL_CONCEPT FloatingPointVector = concepts::Vector && concepts::FloatingPointScalar::scalar_type>; +template +NBL_BOOL_CONCEPT IntVector = concepts::Vector && (is_integral_v::scalar_type>); +template +NBL_BOOL_CONCEPT SignedIntVector = concepts::Vector && concepts::SignedIntegralScalar::scalar_type>; + +//! Concept for native vectors and vector like structs. +template +NBL_BOOL_CONCEPT Vectorial = vector_traits::IsVector; + +#include + +template +NBL_BOOL_CONCEPT FloatingPointVectorial = concepts::Vectorial && concepts::FloatingPointScalar::scalar_type>; +template +NBL_BOOL_CONCEPT FloatingPointLikeVectorial = concepts::Vectorial && concepts::FloatingPointLikeScalar::scalar_type>; +template +NBL_BOOL_CONCEPT IntVectorial = concepts::Vectorial && (is_integral_v::scalar_type>); +template +NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::SignedIntegralScalar::scalar_type>; + +} +} +} +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 7520acbd19..c9ebf7fcf2 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -6,6 +6,15 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace nbl { @@ -13,418 +22,746 @@ namespace hlsl { namespace cpp_compat_intrinsics_impl { -template -struct dot_helper -{ - using scalar_type = typename vector_traits::scalar_type; - static inline scalar_type dot_product(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - static array_get getter; - scalar_type retval = getter(lhs, 0) * getter(rhs, 0); +template +struct dot_helper; +template +struct cross_helper; +template +struct clamp_helper; +template +struct find_msb_helper; +template +struct find_lsb_helper; +template +struct bitReverse_helper; +template +struct transpose_helper; +template +struct length_helper; +template +struct normalize_helper; +template +struct max_helper; +template +struct min_helper; +template +struct bitCount_helper; +template +struct mul_helper; +template +struct determinant_helper; +template +struct inverse_helper; +template +struct rsqrt_helper; +template +struct all_helper; +template +struct any_helper; +template +struct bitReverseAs_helper; +template +struct frac_helper; +template +struct mix_helper; +template +struct sign_helper; +template +struct radians_helper; +template +struct degrees_helper; +template +struct step_helper; +template +struct smoothStep_helper; +template +struct faceForward_helper; +template +struct reflect_helper; +template +struct refract_helper; - static const uint32_t ArrayDim = vector_traits::Dimension; - for (uint32_t i = 1; i < ArrayDim; ++i) - retval = retval + getter(lhs, i) * getter(rhs, i); +#ifdef __HLSL_VERSION // HLSL only specializations - return retval; - } -}; +// it is crucial these partial specializations appear first because thats what makes the helpers match SPIR-V intrinsics first -#define DEFINE_BUILTIN_VECTOR_SPECIALIZATION(FLOAT_TYPE, RETURN_VALUE)\ -template\ -struct dot_helper >\ +#define DECLVAL(r,data,i,_T) BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i,0)) experimental::declval<_T>() +#define DECL_ARG(r,data,i,_T) BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i,0)) const _T arg##i +#define WRAP(r,data,i,_T) BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i,0)) _T +#define ARG(r,data,i,_T) BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i,0)) arg##i + +// the template<> needs to be written ourselves +// return type is __VA_ARGS__ to protect against `,` in templated return types +#define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, SPIRV_FUNCTION_NAME, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\ +NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ +struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ {\ - using VectorType = vector;\ - using ScalarType = typename vector_traits::scalar_type;\ -\ - static inline ScalarType dot_product(NBL_CONST_REF_ARG(VectorType) lhs, NBL_CONST_REF_ARG(VectorType) rhs)\ + using return_t = __VA_ARGS__;\ + static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\ {\ - return RETURN_VALUE;\ + return spirv::SPIRV_FUNCTION_NAME( BOOST_PP_SEQ_FOR_EACH_I(ARG, _, ARG_TYPE_SET) );\ }\ -};\ +}; -#ifdef __HLSL_VERSION -#define BUILTIN_VECTOR_SPECIALIZATION_RET_VAL dot(lhs, rhs) -#else -#define BUILTIN_VECTOR_SPECIALIZATION_RET_VAL glm::dot(lhs, rhs) -#endif +#define FIND_MSB_LSB_RETURN_TYPE conditional_t, vector::Dimension>, int32_t> +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(find_msb_helper, findUMsb, (T), (T), FIND_MSB_LSB_RETURN_TYPE); +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(find_msb_helper, findSMsb, (T), (T), FIND_MSB_LSB_RETURN_TYPE) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(find_lsb_helper, findILsb, (T), (T), FIND_MSB_LSB_RETURN_TYPE) +#undef FIND_MSB_LSB_RETURN_TYPE + +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitReverse_helper, bitReverse, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(transpose_helper, transpose, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length, (T), (T), typename vector_traits::scalar_type) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSqrt, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(frac_helper, fract, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(degrees_helper, degrees, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(max_helper, fMax, (T), (T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(max_helper, uMax, (T), (T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(max_helper, sMax, (T), (T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(min_helper, fMin, (T), (T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(min_helper, uMin, (T), (T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(min_helper, sMin, (T), (T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(step_helper, step, (T), (T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(reflect_helper, reflect, (T), (T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(clamp_helper, fClamp, (T), (T)(T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(clamp_helper, uClamp, (T), (T)(T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(clamp_helper, sClamp, (T), (T)(T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(smoothStep_helper, smoothStep, (T), (T)(T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(faceForward_helper, faceForward, (T), (T)(T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_helper, refract, (T)(U), (T)(T)(U), T) + +#define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitCount_helper, bitCount, (T), (T), BITCOUNT_HELPER_RETRUN_TYPE) +#undef BITCOUNT_HELPER_RETRUN_TYPE + +#undef DECLVAL +#undef DECL_ARG +#undef WRAP +#undef ARG +#undef AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER + +template NBL_PARTIAL_REQ_TOP(is_same_v) +struct find_msb_helper) > +{ + using return_t = int32_t; + static return_t __call(NBL_CONST_REF_ARG(UInt64) val) + { + const uint32_t highBits = uint32_t(val >> 32); + const int32_t highMsb = find_msb_helper::__call(highBits); -DEFINE_BUILTIN_VECTOR_SPECIALIZATION(float16_t, BUILTIN_VECTOR_SPECIALIZATION_RET_VAL) -DEFINE_BUILTIN_VECTOR_SPECIALIZATION(float32_t, BUILTIN_VECTOR_SPECIALIZATION_RET_VAL) -DEFINE_BUILTIN_VECTOR_SPECIALIZATION(float64_t, BUILTIN_VECTOR_SPECIALIZATION_RET_VAL) + if (highMsb == -1) + { + const uint32_t lowBits = uint32_t(val); + const int32_t lowMsb = find_msb_helper::__call(lowBits); + if (lowMsb == -1) + return -1; -#undef BUILTIN_VECTOR_SPECIALIZATION_RET_VAL -#undef DEFINE_BUILTIN_VECTOR_SPECIALIZATION + return lowMsb; + } -template -struct find_msb_helper; + return highMsb + 32; + } +}; +template NBL_PARTIAL_REQ_TOP(is_same_v) +struct find_lsb_helper) > +{ + static int32_t __call(NBL_CONST_REF_ARG(uint64_t) val) + { + const uint32_t lowBits = uint32_t(val); + const int32_t lowLsb = find_lsb_helper::__call(lowBits); -template<> -struct find_msb_helper + if (lowLsb == -1) + { + const uint32_t highBits = uint32_t(val >> 32); + const int32_t highLsb = find_lsb_helper::__call(highBits); + if (highLsb == -1) + return -1; + else + return 32 + highLsb; + } + + return lowLsb; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Matrix&& matrix_traits::Square) +struct inverse_helper&& matrix_traits::Square) > { - static int32_t findMSB(NBL_CONST_REF_ARG(uint32_t) val) + static SquareMatrix __call(NBL_CONST_REF_ARG(SquareMatrix) mat) { -#ifdef __HLSL_VERSION - return spirv::findUMsb(val); -#else - return glm::findMSB(val); -#endif + return spirv::matrixInverse(mat); } }; -template<> -struct find_msb_helper +template NBL_PARTIAL_REQ_TOP(always_true(experimental::declval(), experimental::declval(), experimental::declval()))>) +struct mix_helper(experimental::declval(), experimental::declval(), experimental::declval()))>) > { - static int32_t findMSB(NBL_CONST_REF_ARG(int32_t) val) + using return_t = conditional_t, vector::scalar_type, vector_traits::Dimension>, T>; + static inline return_t __call(const T x, const T y, const U a) { -#ifdef __HLSL_VERSION - return spirv::findSMsb(val); -#else - return glm::findMSB(val); -#endif + T aAsT = a; + return spirv::fMix(x, y, aAsT); + } +}; + +template NBL_PARTIAL_REQ_TOP(matrix_traits::Square) +struct determinant_helper::Square) > +{ + static typename matrix_traits::scalar_type __call(NBL_CONST_REF_ARG(SquareMatrix) mat) + { + return spirv::determinant(mat); } }; -#define DEFINE_FIND_MSB_COMMON_SPECIALIZATION(INPUT_INTEGER_TYPE, INTEGER_TYPE)\ -template<>\ -struct find_msb_helper\ +#else // C++ only specializations + +#define DECL_ARG(r,data,i,_T) BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i,0)) const _T arg##i +#define WRAP(r,data,i,_T) BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i,0)) _T +#define ARG(r,data,i,_T) BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i,0)) arg##i + +// the template<> needs to be written ourselves +// return type is __VA_ARGS__ to protect against `,` in templated return types +#define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, STD_FUNCTION_NAME, REQUIREMENT, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\ +requires REQUIREMENT \ +struct HELPER_NAME\ {\ - static int32_t findMSB(NBL_CONST_REF_ARG(INPUT_INTEGER_TYPE) val)\ + using return_t = __VA_ARGS__;\ + static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\ {\ - return find_msb_helper::findMSB(val);\ + return std::STD_FUNCTION_NAME( BOOST_PP_SEQ_FOR_EACH_I(ARG, _, ARG_TYPE_SET) );\ }\ -};\ +}; -DEFINE_FIND_MSB_COMMON_SPECIALIZATION(int16_t, int32_t) -DEFINE_FIND_MSB_COMMON_SPECIALIZATION(uint16_t, uint32_t) -#ifndef __HLSL_VERSION -DEFINE_FIND_MSB_COMMON_SPECIALIZATION(int8_t, int32_t) -DEFINE_FIND_MSB_COMMON_SPECIALIZATION(uint8_t, uint32_t) -#endif +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(clamp_helper, clamp, concepts::Scalar, (T), (T)(T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(max_helper, max, concepts::Scalar, (T), (T)(T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(min_helper, min, concepts::Scalar, (T), (T)(T), T) + +#undef DECL_ARG +#undef WRAP +#undef ARG +#undef AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER -template<> -struct find_msb_helper +template +requires concepts::IntegralScalar +struct bitReverse_helper { - static int32_t findMSB(NBL_CONST_REF_ARG(uint64_t) val) + static inline T __call(NBL_CONST_REF_ARG(T) arg) { -#ifdef __HLSL_VERSION - const uint32_t highBits = uint32_t(val >> 32); - const int32_t highMsb = find_msb_helper::findMSB(highBits); - - if (highMsb == -1) - { - const uint32_t lowBits = uint32_t(val); - const int32_t lowMsb = find_msb_helper::findMSB(lowBits); - if (lowMsb == -1) - return -1; - - return lowMsb; - } - - return highMsb + 32; -#else - return glm::findMSB(val); -#endif + return glm::bitfieldReverse(arg); } }; +template +requires concepts::Matrix +struct transpose_helper +{ + using transposed_t = typename matrix_traits::transposed_type; -template -struct find_msb_helper > + static transposed_t __call(NBL_CONST_REF_ARG(Matrix) m) + { + return reinterpret_cast(glm::transpose(reinterpret_cast(m))); + } +}; +template +requires concepts::FloatingPointVector +struct length_helper { - static vector findMSB(NBL_CONST_REF_ARG(vector) val) + static inline typename vector_traits::scalar_type __call(NBL_CONST_REF_ARG(Vector) vec) { -#ifdef __HLSL_VERSION - return spirv::findUMsb(val); -#else - return glm::findMSB(val); -#endif + return std::sqrt(dot_helper::__call(vec, vec)); } }; - -template -struct find_msb_helper > +template +requires concepts::FloatingPointLikeVectorial +struct normalize_helper { - static vector findMSB(NBL_CONST_REF_ARG(vector) val) + static inline Vectorial __call(NBL_CONST_REF_ARG(Vectorial) vec) { -#ifdef __HLSL_VERSION - return spirv::findSMsb(val); -#else - return glm::findMSB(val); -#endif + return vec / length_helper::__call(vec); } }; -#ifndef __HLSL_VERSION - +template +requires concepts::IntegralScalar +struct find_lsb_helper +{ + using return_t = int32_t; + static inline T __call(const T arg) + { + return glm::findLSB(arg); + } +}; +template +NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar) +struct find_msb_helper) > +{ + using return_t = int32_t; + static return_t __call(NBL_CONST_REF_ARG(Integer) val) + { + return glm::findMSB(val); + } +}; +// TODO: implemet to be compatible with both C++ and HLSL when it works with DXC +template +requires std::is_enum_v +struct find_lsb_helper +{ + using return_t = int32_t; + static int32_t __call(NBL_CONST_REF_ARG(EnumType) val) + { + using underlying_t = std::underlying_type_t; + return find_lsb_helper::__call(static_cast(val)); + } +}; template - requires std::is_enum_v +requires std::is_enum_v struct find_msb_helper { - static int32_t findMSB(NBL_CONST_REF_ARG(EnumType) val) + using return_t = int32_t; + static return_t __call(NBL_CONST_REF_ARG(EnumType) val) { using underlying_t = std::underlying_type_t; - return find_msb_helper::findMSB(static_cast(val)); + return find_msb_helper::__call(static_cast(val)); } }; -#endif +template +requires concepts::FloatingPointScalar +struct rsqrt_helper +{ + static FloatingPoint __call(NBL_CONST_REF_ARG(FloatingPoint) x) + { + // TODO: https://stackoverflow.com/a/62239778 + return 1.0f / std::sqrt(x); + } +}; -template -struct find_lsb_helper; +template +requires concepts::FloatingPointScalar +struct frac_helper +{ + using return_t = T; + static inline return_t __call(const T x) + { + return x - std::floor(x); + } +}; -template<> -struct find_lsb_helper +template +requires concepts::IntegralScalar +struct bitCount_helper { - static int32_t findLSB(NBL_CONST_REF_ARG(int32_t) val) + using return_t = int32_t; + static return_t __call(NBL_CONST_REF_ARG(Integer) val) { -#ifdef __HLSL_VERSION - return spirv::findILsb(val); -#else - return glm::findLSB(val); -#endif + using UnsignedInteger = typename hlsl::unsigned_integer_of_size_t; + return std::popcount(static_cast(val)); } }; -template<> -struct find_lsb_helper +template +requires concepts::Matrix && matrix_traits::Square +struct inverse_helper { - static int32_t findLSB(NBL_CONST_REF_ARG(uint32_t) val) + static SquareMatrix __call(NBL_CONST_REF_ARG(SquareMatrix) mat) { -#ifdef __HLSL_VERSION - return spirv::findILsb(val); -#else - return glm::findLSB(val); -#endif + return reinterpret_cast(glm::inverse(reinterpret_cast(mat))); } }; -#define DEFINE_FIND_LSB_COMMON_SPECIALIZATION(INPUT_INTEGER_TYPE, INTEGER_TYPE)\ -template<>\ -struct find_lsb_helper\ -{\ - static int32_t findLSB(NBL_CONST_REF_ARG(INPUT_INTEGER_TYPE) val)\ - {\ - return find_lsb_helper::findLSB(val);\ - }\ -};\ +template +requires std::is_enum_v +struct bitCount_helper +{ + using return_t = int32_t; + using underlying_t = std::underlying_type_t; + static return_t __call(NBL_CONST_REF_ARG(EnumT) val) + { + return bitCount_helper::__call(reinterpret_cast(val)); + } +}; -DEFINE_FIND_LSB_COMMON_SPECIALIZATION(int16_t, int32_t) -DEFINE_FIND_LSB_COMMON_SPECIALIZATION(uint16_t, uint32_t) -#ifndef __HLSL_VERSION -DEFINE_FIND_LSB_COMMON_SPECIALIZATION(int8_t, int32_t) -DEFINE_FIND_LSB_COMMON_SPECIALIZATION(uint8_t, uint32_t) -#endif +template +requires concepts::FloatingPoint && (concepts::FloatingPoint || concepts::Boolean) +struct mix_helper +{ + using return_t = T; + static inline return_t __call(const T x, const T y, const U a) + { + return glm::mix(x, y, a); + } +}; -template<> -struct find_lsb_helper +template +requires concepts::FloatingPointScalar || concepts::IntegralScalar +struct sign_helper { - static int32_t findLSB(NBL_CONST_REF_ARG(uint64_t) val) + using return_t = T; + static inline return_t __call(const T val) { -#ifdef __HLSL_VERSION - const uint32_t lowBits = uint32_t(val); - const int32_t lowLsb = find_lsb_helper::findLSB(lowBits); + if (val < 0) + return -1; + if (val > 0) + return 1; - if (lowLsb == -1) - { - const uint32_t highBits = uint32_t(val >> 32); - const int32_t highLsb = find_lsb_helper::findLSB(highBits); - if (highLsb == -1) - return -1; - else - return 32 + highLsb; - } + return 0; + } +}; - return lowLsb; -#else - return glm::findLSB(val); -#endif +template +requires concepts::FloatingPointScalar +struct radians_helper +{ + using return_t = T; + static inline return_t __call(const T degrees) + { + return degrees * (numbers::pi / static_cast(180.0)); } }; -template -struct find_lsb_helper > +template +requires concepts::FloatingPointScalar +struct degrees_helper { - static vector findLSB(NBL_CONST_REF_ARG(vector) val) + using return_t = T; + static inline return_t __call(const T radians) { -#ifdef __HLSL_VERSION - return spirv::findILsb(val); -#else - return glm::findLSB(val); -#endif + return radians * (static_cast(180.0) / numbers::pi); } }; -template -struct find_lsb_helper > +template +requires concepts::FloatingPointScalar +struct step_helper { - static vector findLSB(NBL_CONST_REF_ARG(vector) val) + using return_t = T; + static inline return_t __call(const T edge, const T x) { -#ifdef __HLSL_VERSION - return spirv::findILsb(val); -#else - return glm::findLSB(val); -#endif + return x < edge ? 0.0 : 1.0; } }; -#ifndef __HLSL_VERSION +template +requires concepts::FloatingPointScalar +struct smoothStep_helper +{ + using return_t = T; + static inline return_t __call(const T edge0, const T edge1, const T x) + { + T t = clamp_helper::__call((x - edge0) / (edge1 - edge0), 0, 1); + return t * t * (3 - 2 * t); + } +}; -template -requires std::is_enum_v -struct find_lsb_helper +template +NBL_PARTIAL_REQ_TOP(matrix_traits::Square) +struct determinant_helper::Square) > { - static int32_t findLSB(NBL_CONST_REF_ARG(EnumType) val) + static typename matrix_traits::scalar_type __call(NBL_CONST_REF_ARG(SquareMatrix) mat) { - using underlying_t = std::underlying_type_t; - return find_lsb_helper::findLSB(static_cast(val)); + return glm::determinant(reinterpret_cast(mat)); } }; -#endif +template +requires concepts::FloatingPointVectorial +struct faceForward_helper +{ + using return_t = T; + static inline return_t __call(const T N, const T I, const T Nref) + { + if (dot_helper::__call(Nref, I) < 0.0) + return N; + else + return -N; + } +}; -template -struct find_msb_return_type +template +requires concepts::FloatingPointVector +struct reflect_helper { - using type = int32_t; + using return_t = T; + static inline return_t __call(const T I, const T N) + { + return I - T(2.0 * dot_helper::__call(N, I)) * N; + } }; -template -struct find_msb_return_type > + +template +requires concepts::FloatingPointVector && concepts::FloatingPointScalar +struct refract_helper { - using type = vector; + using return_t = T; + static inline return_t __call(const T I, const T N, const U eta) + { + U k = 1.0 - eta * eta * (1.0 - dot_helper::__call(N, I) * dot_helper::__call(N, I)); + if (k < 0.0) + return T(0.0); + + return eta * I - (eta * dot_helper::__call(N, I) + std::sqrt(k)) * N; + } }; -template -using find_lsb_return_type = find_msb_return_type; -template -struct bitReverse_helper; +#endif // C++ only specializations -template -NBL_PARTIAL_REQ_TOP(hlsl::is_integral_v && hlsl::is_scalar_v) -struct bitReverse_helper&& hlsl::is_scalar_v) > +// C++ and HLSL specializations + +template +NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (Bits <= sizeof(T) * 8)) +struct bitReverseAs_helper && (Bits <= sizeof(T) * 8)) > { - static inline Integer __call(NBL_CONST_REF_ARG(Integer) val) + static T __call(NBL_CONST_REF_ARG(T) val) { -#ifdef __HLSL_VERSION - return spirv::bitReverse(val); -#else - return glm::bitfieldReverse(val); -#endif + return bitReverse_helper::__call(val) >> promote >(scalar_type_t (sizeof(T) * 8 - Bits)); + } + + static T __call(NBL_CONST_REF_ARG(T) val, uint16_t bits) + { + return bitReverse_helper::__call(val) >> promote >(scalar_type_t (sizeof(T) * 8 - bits)); } }; -template -NBL_PARTIAL_REQ_TOP(hlsl::is_vector_v) -struct bitReverse_helper && hlsl::is_vector_v) > +template +NBL_PARTIAL_REQ_TOP(concepts::Vectorial) +struct dot_helper) > +{ + using scalar_type = typename vector_traits::scalar_type; + + static inline scalar_type __call(NBL_CONST_REF_ARG(Vectorial) lhs, NBL_CONST_REF_ARG(Vectorial) rhs) + { + static const uint32_t ArrayDim = vector_traits::Dimension; + static array_get getter; + + scalar_type retval = getter(lhs, 0) * getter(rhs, 0); + for (uint32_t i = 1; i < ArrayDim; ++i) + retval = retval + getter(lhs, i) * getter(rhs, i); + + return retval; + } +}; +template +NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial && (vector_traits::Dimension == 3)) +struct cross_helper && (vector_traits::Dimension == 3)) > { - static Vector __call(NBL_CONST_REF_ARG(Vector) vec) + static FloatingPointLikeVectorial __call(NBL_CONST_REF_ARG(FloatingPointLikeVectorial) lhs, NBL_CONST_REF_ARG(FloatingPointLikeVectorial) rhs) { #ifdef __HLSL_VERSION - return spirv::bitReverse(vec); + return spirv::cross(lhs, rhs); #else - Vector output; - using traits = hlsl::vector_traits; - for (uint32_t i = 0; i < traits::Dimension; ++i) - output[i] = bitReverse_helper >::__call(vec[i]); + using traits = hlsl::vector_traits; + array_get getter; + array_set setter; + + FloatingPointLikeVectorial output; + setter(output, 0, getter(lhs, 1) * getter(rhs, 2) - getter(rhs, 1) * getter(lhs, 2)); + setter(output, 1, getter(lhs, 2) * getter(rhs, 0) - getter(rhs, 2) * getter(lhs, 0)); + setter(output, 2, getter(lhs, 0) * getter(rhs, 1) - getter(rhs, 0) * getter(lhs, 1)); + return output; #endif } }; - -template -struct lerp_helper; - #ifdef __HLSL_VERSION -#define MIX_FUNCTION spirv::fMix +// SPIR-V already defines specializations for builtin vector types +#define VECTOR_SPECIALIZATION_CONCEPT concepts::Vectorial && !is_vector_v #else -#define MIX_FUNCTION glm::mix +#define VECTOR_SPECIALIZATION_CONCEPT concepts::Vectorial #endif -#define DEFINE_LERP_HELPER_COMMON_SPECIALIZATION(TYPE)\ -template<>\ -struct lerp_helper\ -{\ - static inline TYPE lerp(NBL_CONST_REF_ARG(TYPE) x, NBL_CONST_REF_ARG(TYPE) y, NBL_CONST_REF_ARG(TYPE) a)\ - {\ - return MIX_FUNCTION(x, y, a);\ - }\ -};\ -\ -template\ -struct lerp_helper, vector >\ -{\ - static inline vector lerp(NBL_CONST_REF_ARG(vector) x, NBL_CONST_REF_ARG(vector) y, NBL_CONST_REF_ARG(vector) a)\ - {\ - return MIX_FUNCTION(x, y, a);\ - }\ -};\ -\ -template\ -struct lerp_helper, TYPE>\ -{\ - static inline vector lerp(NBL_CONST_REF_ARG(vector) x, NBL_CONST_REF_ARG(vector) y, NBL_CONST_REF_ARG(TYPE) a)\ - {\ - return MIX_FUNCTION(x, y, a);\ - }\ -};\ +template +NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) +struct clamp_helper +{ + using return_t = T; + static return_t __call(NBL_CONST_REF_ARG(T) val, NBL_CONST_REF_ARG(T) min, NBL_CONST_REF_ARG(T) max) + { + using traits = hlsl::vector_traits; + array_get getter; + array_set setter; -DEFINE_LERP_HELPER_COMMON_SPECIALIZATION(float32_t) -DEFINE_LERP_HELPER_COMMON_SPECIALIZATION(float64_t) + return_t output; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(output, i, clamp_helper::__call(getter(val, i), getter(min, i), getter(max, i))); -#undef DEFINE_LERP_HELPER_COMMON_SPECIALIZATION -#undef MIX_FUNCTION + return output; + } +}; template -struct lerp_helper +NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) +struct min_helper { - static inline T lerp(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(bool) a) + static T __call(NBL_CONST_REF_ARG(T) a, NBL_CONST_REF_ARG(T) b) { - if (a) - return y; - else - return x; + using traits = hlsl::vector_traits; + array_get getter; + array_set setter; + + T output; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(output, i, min_helper::__call(getter(a, i), getter(b, i))); + + return output; + } +}; +template +NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) +struct max_helper +{ + static T __call(NBL_CONST_REF_ARG(T) a, NBL_CONST_REF_ARG(T) b) + { + using traits = hlsl::vector_traits; + array_get getter; + array_set setter; + + T output; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(output, i, max_helper::__call(getter(a, i), getter(b, i))); + + return output; } }; -template -struct lerp_helper, vector > +template +NBL_PARTIAL_REQ_TOP(concepts::Matrix && concepts::Vector && (matrix_traits::ColumnCount == vector_traits::Dimension)) +struct mul_helper && concepts::Vector && (matrix_traits::ColumnCount == vector_traits::Dimension)) > { - using output_vec_t = vector; + using lhs_traits = matrix_traits; + using rhs_traits = vector_traits; + using return_t = vector; + static inline return_t __call(LhsT lhs, RhsT rhs) + { + return mul(lhs, rhs); + } +}; - static inline output_vec_t lerp(NBL_CONST_REF_ARG(output_vec_t) x, NBL_CONST_REF_ARG(output_vec_t) y, NBL_CONST_REF_ARG(vector) a) +template +NBL_PARTIAL_REQ_TOP(concepts::Matrix && concepts::Matrix && (matrix_traits::ColumnCount == matrix_traits::RowCount)) +struct mul_helper && concepts::Matrix && (matrix_traits::ColumnCount == matrix_traits::RowCount)) > +{ + using lhs_traits = matrix_traits; + using rhs_traits = matrix_traits; + using return_t = matrix; + static inline return_t __call(LhsT lhs, RhsT rhs) { - output_vec_t retval; - for (uint32_t i = 0; i < vector_traits::Dimension; i++) - retval[i] = a[i] ? y[i] : x[i]; - return retval; + return mul(lhs, rhs); } }; -template -struct transpose_helper; +#define AUTO_SPECIALIZE_HELPER_FOR_VECTOR(HELPER_NAME, REQUIREMENT, RETURN_TYPE)\ +template\ +NBL_PARTIAL_REQ_TOP(REQUIREMENT)\ +struct HELPER_NAME\ +{\ + using return_t = RETURN_TYPE;\ + static return_t __call(NBL_CONST_REF_ARG(T) vec)\ + {\ + using traits = hlsl::vector_traits;\ + using return_t_traits = hlsl::vector_traits;\ + array_get getter;\ + array_set setter;\ +\ + return_t output;\ + for (uint32_t i = 0; i < traits::Dimension; ++i)\ + setter(output, i, HELPER_NAME::__call(getter(vec, i)));\ +\ + return output;\ + }\ +}; -template -struct transpose_helper > +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(rsqrt_helper, concepts::FloatingPointVectorial && VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(bitReverse_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(frac_helper, VECTOR_SPECIALIZATION_CONCEPT,T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(sign_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(degrees_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(radians_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +#define INT32_VECTOR_TYPE vector::Dimension> +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(bitCount_helper, VECTOR_SPECIALIZATION_CONCEPT, INT32_VECTOR_TYPE) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(find_msb_helper, VECTOR_SPECIALIZATION_CONCEPT, INT32_VECTOR_TYPE) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(find_lsb_helper, VECTOR_SPECIALIZATION_CONCEPT, INT32_VECTOR_TYPE) +#undef INT32_VECTOR_TYPE +#undef AUTO_SPECIALIZE_HELPER_FOR_VECTOR + +template +NBL_PARTIAL_REQ_TOP(concepts::Vectorial && is_same_v::scalar_type, bool>) +struct all_helper && is_same_v::scalar_type, bool>) > { - using transposed_t = typename matrix_traits >::transposed_type; + static bool __call(NBL_CONST_REF_ARG(BooleanVector) x) + { + using traits = hlsl::vector_traits; + array_get getter; + array_set setter; + + bool output = true; + for (uint32_t i = 0; i < traits::Dimension; ++i) + output = output && getter(x, i); + + return output; + } +}; - static transposed_t transpose(NBL_CONST_REF_ARG(matrix) m) +template +NBL_PARTIAL_REQ_TOP(concepts::Vectorial && is_same_v::scalar_type, bool>) +struct any_helper && is_same_v::scalar_type, bool>) > +{ + static bool __call(NBL_CONST_REF_ARG(BooleanVector) x) { -#ifdef __HLSL_VERSION - return spirv::transpose(m); -#else - return reinterpret_cast(glm::transpose(reinterpret_cast::Base const&>(m))); -#endif + using traits = hlsl::vector_traits; + array_get getter; + array_set setter; + + bool output = false; + for (uint32_t i = 0; i < traits::Dimension; ++i) + output = output || getter(x, i); + + return output; } }; -template -struct mul_helper +template +NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) +struct step_helper { - static inline RhsT multiply(LhsT lhs, RhsT rhs) + using return_t = T; + static return_t __call(NBL_CONST_REF_ARG(T) edge, NBL_CONST_REF_ARG(T) x) { - return mul(lhs, rhs); + using traits = hlsl::vector_traits; + array_get getter; + array_set setter; + + return_t output; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(output, i, step_helper::__call(getter(edge, i), getter(x, i))); + + return output; + } +}; + +template +NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT) +struct smoothStep_helper +{ + using return_t = T; + static return_t __call(NBL_CONST_REF_ARG(T) edge0, NBL_CONST_REF_ARG(T) edge1, NBL_CONST_REF_ARG(T) x) + { + using traits = hlsl::vector_traits; + array_get getter; + array_set setter; + + return_t output; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(output, i, smoothStep_helper::__call(getter(edge0, i), getter(edge1, i), getter(x, i))); + + return output; } }; diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h new file mode 100644 index 0000000000..e7d98d42f2 --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.h @@ -0,0 +1,151 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_INTRINSICS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_INTRINSICS_INCLUDED_ + +#include +#include + +// this is a C++ only header, hence the `.h` extension, it only implements HLSL's built-in functions +#ifndef __HLSL_VERSION +#include +#include +#include "nbl/core/util/bitflag.h" + +namespace nbl::hlsl +{ +// TODO: remove this macro and write stuff by hand, the aliasing stuff doesn't work +#define NBL_SIMPLE_GLM_PASSTHROUGH(HLSL_ID,GLSL_ID,...) template\ +inline auto HLSL_ID(Args&&... args) \ +{ \ + return glm::GLSL_ID(std::forward(args)...);\ +} +#define NBL_BIT_OP_GLM_PASSTHROUGH(HLSL_ID,GLSL_ID) template \ +inline auto HLSL_ID(const T bitpattern) \ +{ \ + if constexpr (std::is_integral_v) \ + return glm::GLSL_ID(bitpattern); \ + else \ + { \ + if constexpr (std::is_enum_v) \ + { \ + const auto as_underlying = static_cast>(bitpattern); \ + return glm::GLSL_ID(as_underlying); \ + } \ + else \ + { \ + if constexpr (std::is_same_v>) \ + return HLSL_ID(bitpattern.value); \ + } \ + } \ +} + +NBL_BIT_OP_GLM_PASSTHROUGH(bitCount,bitCount) + +NBL_SIMPLE_GLM_PASSTHROUGH(cross,cross) +NBL_SIMPLE_GLM_PASSTHROUGH(clamp,clamp) +NBL_SIMPLE_GLM_PASSTHROUGH(normalize, normalize) + +template +inline scalar_type_t length(const T& vec) +{ + return glm::length(vec); +} + +template +inline scalar_type_t dot(const T& lhs, const T& rhs) +{ + scalar_type_t retval = lhs[0]*rhs[0]; + // whatever has a `scalar_type` specialization should be a pure vector + for (auto i=1; i