Skip to content

Commit

Permalink
Optimized the performance of float object
Browse files Browse the repository at this point in the history
  • Loading branch information
daliziql committed Jun 24, 2024
1 parent e1a80f5 commit 8554eac
Show file tree
Hide file tree
Showing 7 changed files with 355 additions and 364 deletions.
55 changes: 30 additions & 25 deletions includes/rtm/impl/matrix_affine_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,21 @@ namespace rtm
{
RTM_ASSERT(quat_is_normalized(quat_input), "Quaternion is not normalized");

const float_type x2 = quat_get_x(quat_input) + quat_get_x(quat_input);
const float_type y2 = quat_get_y(quat_input) + quat_get_y(quat_input);
const float_type z2 = quat_get_z(quat_input) + quat_get_z(quat_input);
const float_type xx = quat_get_x(quat_input) * x2;
const float_type xy = quat_get_x(quat_input) * y2;
const float_type xz = quat_get_x(quat_input) * z2;
const float_type yy = quat_get_y(quat_input) * y2;
const float_type yz = quat_get_y(quat_input) * z2;
const float_type zz = quat_get_z(quat_input) * z2;
const float_type wx = quat_get_w(quat_input) * x2;
const float_type wy = quat_get_w(quat_input) * y2;
const float_type wz = quat_get_w(quat_input) * z2;
float_type quatval[4];
quat_store(quat_input, quatval);

const float_type x2 = quatval[0] + quatval[0];
const float_type y2 = quatval[1] + quatval[1];
const float_type z2 = quatval[2] + quatval[2];
const float_type xx = quatval[0] * x2;
const float_type xy = quatval[0] * y2;
const float_type xz = quatval[0] * z2;
const float_type yy = quatval[1] * y2;
const float_type yz = quatval[1] * z2;
const float_type zz = quatval[2] * z2;
const float_type wx = quatval[3] * x2;
const float_type wy = quatval[3] * y2;
const float_type wz = quatval[3] * z2;

const vector4 x_axis = vector_set(float_type(1.0) - (yy + zz), xy + wz, xz - wy, float_type(0.0));
const vector4 y_axis = vector_set(xy - wz, float_type(1.0) - (xx + zz), yz + wx, float_type(0.0));
Expand All @@ -80,19 +83,21 @@ namespace rtm
RTM_DISABLE_SECURITY_COOKIE_CHECK inline RTM_SIMD_CALL operator matrix3x4() const RTM_NO_EXCEPT
{
RTM_ASSERT(quat_is_normalized(quat_input), "Quaternion is not normalized");

const float_type x2 = quat_get_x(quat_input) + quat_get_x(quat_input);
const float_type y2 = quat_get_y(quat_input) + quat_get_y(quat_input);
const float_type z2 = quat_get_z(quat_input) + quat_get_z(quat_input);
const float_type xx = quat_get_x(quat_input) * x2;
const float_type xy = quat_get_x(quat_input) * y2;
const float_type xz = quat_get_x(quat_input) * z2;
const float_type yy = quat_get_y(quat_input) * y2;
const float_type yz = quat_get_y(quat_input) * z2;
const float_type zz = quat_get_z(quat_input) * z2;
const float_type wx = quat_get_w(quat_input) * x2;
const float_type wy = quat_get_w(quat_input) * y2;
const float_type wz = quat_get_w(quat_input) * z2;
float_type quatval[4];
quat_store(quat_input, quatval);

const float_type x2 = quatval[0] + quatval[0];
const float_type y2 = quatval[1] + quatval[1];
const float_type z2 = quatval[2] + quatval[2];
const float_type xx = quatval[0] * x2;
const float_type xy = quatval[0] * y2;
const float_type xz = quatval[0] * z2;
const float_type yy = quatval[1] * y2;
const float_type yz = quatval[1] * z2;
const float_type zz = quatval[2] * z2;
const float_type wx = quatval[3] * x2;
const float_type wy = quatval[3] * y2;
const float_type wz = quatval[3] * z2;

const vector4 x_axis = vector_set(float_type(1.0) - (yy + zz), xy + wz, xz - wy, float_type(0.0));
const vector4 y_axis = vector_set(xy - wz, float_type(1.0) - (xx + zz), yz + wx, float_type(0.0));
Expand Down
121 changes: 121 additions & 0 deletions includes/rtm/impl/vector_swizzle.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#pragma once

#include <cstdint>
#include <cstring>
#include <type_traits>

// #include <spatial/core/Platform.hpp>
#include "rtm/math.h"
#include "rtm/types.h"
#include "rtm/impl/compiler_utils.h"
#include "rtm/scalarf.h"
#include "rtm/scalard.h"
#include "rtm/version.h"


RTM_IMPL_FILE_PRAGMA_PUSH

namespace rtm
{
RTM_IMPL_VERSION_NAMESPACE_BEGIN

#if defined(RTM_SSE2_INTRINSICS)

namespace sse2_permute
{
#define SHUFFLEMASK(a0,a1,b2,b3) ( (a0) | ((a1)<<2) | ((b2)<<4) | ((b3)<<6) )

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Float swizzle
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

template<int index0, int index1, int index2, int index3>
RTM_FORCE_INLINE vector4f vector_swizzle_template(const vector4f& vec)
{
return _mm_shuffle_ps(vec, vec, SHUFFLEMASK(index0, index1, index2, index3));
}

template<> RTM_FORCE_INLINE vector4f vector_swizzle_template<0, 1, 2, 3>(const vector4f& vec) { return vec; }
template<> RTM_FORCE_INLINE vector4f vector_swizzle_template<0, 1, 0, 1>(const vector4f& vec) { return _mm_movelh_ps(vec, vec); }
template<> RTM_FORCE_INLINE vector4f vector_swizzle_template<2, 3, 2, 3>(const vector4f& vec) { return _mm_movehl_ps(vec, vec); }
template<> RTM_FORCE_INLINE vector4f vector_swizzle_template<0, 0, 1, 1>(const vector4f& vec) { return _mm_unpacklo_ps(vec, vec); }
template<> RTM_FORCE_INLINE vector4f vector_swizzle_template<2, 2, 3, 3>(const vector4f& vec) { return _mm_unpackhi_ps(vec, vec); }

#if defined(RTM_SSE4_INTRINSICS)
template<> RTM_FORCE_INLINE vector4f vector_swizzle_template<0, 0, 2, 2>(const vector4f& vec) { return _mm_moveldup_ps(vec); }
template<> RTM_FORCE_INLINE vector4f vector_swizzle_template<1, 1, 3, 3>(const vector4f& vec) { return _mm_movehdup_ps(vec); }
#endif

#if defined(RTM_AVX2_INTRINSICS)
template<> RTM_FORCE_INLINE vector4f vector_swizzle_template<0, 0, 0, 0>(const vector4f& vec) { return _mm_broadcastss_ps(vec); }
#endif

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Float replicate
template<int Index>
RTM_FORCE_INLINE vector4f vector_replicate_template(const vector4f& vec)
{
static_assert(Index >= 0 && Index <= 3, "Invalid Index");
return vector_swizzle_template<Index, Index, Index, Index>(vec);
}

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Float shuffle
template<int index0, int index1, int index2, int index3>
RTM_FORCE_INLINE vector4f vector_shuffle_template(const vector4f& vec1, const vector4f& vec2)
{
static_assert(index0 >= 0 && index0 <= 3 && index1 >= 0 && index1 <= 3 && index2 >= 0 && index2 <= 3 && index3 >= 0 && index3 <= 3, "Invalid Index");
return _mm_shuffle_ps(vec1, vec2, SHUFFLEMASK(index0, index1, index2, index3));
}

// Float Shuffle specializations
template<> RTM_FORCE_INLINE vector4f vector_shuffle_template<0, 1, 0, 1>(const vector4f& vec1, const vector4f& vec2) { return _mm_movelh_ps(vec1, vec2); }
template<> RTM_FORCE_INLINE vector4f vector_shuffle_template<2, 3, 2, 3>(const vector4f& vec1, const vector4f& vec2) { return _mm_movehl_ps(vec2, vec1); } // Note: movehl copies first from the 2nd argument

}; // namespace sse2_permute


#define VECTOR_REPLICATE( vec, element_index ) sse2_permute::vector_replicate_template<element_index>(vec)
#define VECTOR_SWIZZLE( vec, x, y, z, w ) sse2_permute::vector_swizzle_template<x,y,z,w>( vec )
#define VECTOR_SHUFFLE( vec1, vec2, x, y, z, w ) sse2_permute::vector_shuffle_template<x,y,z,w>( vec1, vec2 )

#elif defined(RTM_NEON_INTRINSICS) && defined(__clang__)
//now we only support __clang__ neon here

template <int x, int y, int z, int w>
RTM_FORCE_INLINE vector4f vector_swizzle_impl(vector4f vec)
{
return __builtin_shufflevector(vec, vec, x, y, z, w);
}

template <int x, int y, int z, int w>
RTM_FORCE_INLINE vector4f vector_shuffle_impl(vector4f vec1, vector4f vec2)
{
return __builtin_shufflevector(vec1, vec2, x, y, z + 4, w + 4);
}

template <int element_index>
RTM_FORCE_INLINE vector4f vector_replicate_impl(const vector4f& vec)
{
return vdupq_n_f32(vgetq_lane_f32(vec, element_index));
}

template <int element_index>
RTM_FORCE_INLINE float64x2_t vector_replicate_impl(const float64x2_t& vec)
{
return vdupq_n_f64(vgetq_lane_f64(vec, element_index));
}

#define VECTOR_REPLICATE( vec, element_index ) vector_replicate_impl<element_index>(vec)
#define VECTOR_SWIZZLE( vec, x, y, z, w ) vector_swizzle_impl<x, y, z, w>(vec)
#define VECTOR_SHUFFLE( vec1, vec2, x, y, z, w ) vector_shuffle_impl<x, y, z, w>(vec1, vec2)

#else
#pragma error("vector swizzle not implement here!");
#endif

RTM_IMPL_VERSION_NAMESPACE_END
}

RTM_IMPL_FILE_PRAGMA_POP

2 changes: 1 addition & 1 deletion includes/rtm/matrix3x3f.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ namespace rtm
// is to multiply the normal with the cofactor matrix.
// See: https://github.com/graphitemaster/normals_revisited
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL matrix_mul_vector3(vector4f_arg0 vec3, matrix3x3f_arg0 mtx) RTM_NO_EXCEPT
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE vector4f RTM_SIMD_CALL matrix_mul_vector3(vector4f_arg0 vec3, matrix3x3f_argn mtx) RTM_NO_EXCEPT
{
vector4f tmp;

Expand Down
96 changes: 52 additions & 44 deletions includes/rtm/matrix3x4f.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,22 +61,25 @@ namespace rtm
//////////////////////////////////////////////////////////////////////////
// Sets a 3x4 affine matrix from a rotation quaternion and translation.
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK inline matrix3x4f RTM_SIMD_CALL matrix_from_qv(quatf_arg0 quat, vector4f_arg1 translation) RTM_NO_EXCEPT
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE matrix3x4f RTM_SIMD_CALL matrix_from_qv(quatf_arg0 quat, vector4f_arg1 translation) RTM_NO_EXCEPT
{
RTM_ASSERT(quat_is_normalized(quat), "Quaternion is not normalized");

const float x2 = quat_get_x(quat) + quat_get_x(quat);
const float y2 = quat_get_y(quat) + quat_get_y(quat);
const float z2 = quat_get_z(quat) + quat_get_z(quat);
const float xx = quat_get_x(quat) * x2;
const float xy = quat_get_x(quat) * y2;
const float xz = quat_get_x(quat) * z2;
const float yy = quat_get_y(quat) * y2;
const float yz = quat_get_y(quat) * z2;
const float zz = quat_get_z(quat) * z2;
const float wx = quat_get_w(quat) * x2;
const float wy = quat_get_w(quat) * y2;
const float wz = quat_get_w(quat) * z2;
float quatval[4];
quat_store(quat, quatval);

const float x2 = quatval[0] + quatval[0];
const float y2 = quatval[1] + quatval[1];
const float z2 = quatval[2] + quatval[2];
const float xx = quatval[0] * x2;
const float xy = quatval[0] * y2;
const float xz = quatval[0] * z2;
const float yy = quatval[1] * y2;
const float yz = quatval[1] * z2;
const float zz = quatval[2] * z2;
const float wx = quatval[3] * x2;
const float wy = quatval[3] * y2;
const float wz = quatval[3] * z2;

const vector4f x_axis = vector_set(1.0F - (yy + zz), xy + wz, xz - wy, 0.0F);
const vector4f y_axis = vector_set(xy - wz, 1.0F - (xx + zz), yz + wx, 0.0F);
Expand All @@ -87,31 +90,33 @@ namespace rtm
//////////////////////////////////////////////////////////////////////////
// Converts a QV transform into a 3x4 affine matrix.
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK inline matrix3x4f RTM_SIMD_CALL matrix_from_qv(qvf_arg0 transform) RTM_NO_EXCEPT
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE matrix3x4f RTM_SIMD_CALL matrix_from_qv(qvf_argn transform) RTM_NO_EXCEPT
{
return matrix_from_qv(transform.rotation, transform.translation);
}

//////////////////////////////////////////////////////////////////////////
// Sets a 3x4 affine matrix from a rotation quaternion, translation, and scalar scale.
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK inline matrix3x4f RTM_SIMD_CALL matrix_from_qvs(quatf_arg0 quat, vector4f_arg1 translation, float scale) RTM_NO_EXCEPT
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE matrix3x4f RTM_SIMD_CALL matrix_from_qvs(quatf_arg0 quat, vector4f_arg1 translation, float scale) RTM_NO_EXCEPT
{
RTM_ASSERT(quat_is_normalized(quat), "Quaternion is not normalized");

const float x2 = quat_get_x(quat) + quat_get_x(quat);
const float y2 = quat_get_y(quat) + quat_get_y(quat);
const float z2 = quat_get_z(quat) + quat_get_z(quat);
const float xx = quat_get_x(quat) * x2;
const float xy = quat_get_x(quat) * y2;
const float xz = quat_get_x(quat) * z2;
const float yy = quat_get_y(quat) * y2;
const float yz = quat_get_y(quat) * z2;
const float zz = quat_get_z(quat) * z2;
const float wx = quat_get_w(quat) * x2;
const float wy = quat_get_w(quat) * y2;
const float wz = quat_get_w(quat) * z2;

float quatval[4];
quat_store(quat, quatval);

const float x2 = quatval[0] + quatval[0];
const float y2 = quatval[1] + quatval[1];
const float z2 = quatval[2] + quatval[2];
const float xx = quatval[0] * x2;
const float xy = quatval[0] * y2;
const float xz = quatval[0] * z2;
const float yy = quatval[1] * y2;
const float yz = quatval[1] * z2;
const float zz = quatval[2] * z2;
const float wx = quatval[3] * x2;
const float wy = quatval[3] * y2;
const float wz = quatval[3] * z2;
const scalarf scale_s = scalar_set(scale);

const vector4f x_axis = vector_mul(vector_set(1.0F - (yy + zz), xy + wz, xz - wy, 0.0F), scale_s);
Expand All @@ -123,30 +128,33 @@ namespace rtm
//////////////////////////////////////////////////////////////////////////
// Converts a QVS transform into a 3x4 affine matrix.
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK inline matrix3x4f RTM_SIMD_CALL matrix_from_qvs(qvsf_arg0 transform) RTM_NO_EXCEPT
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE matrix3x4f RTM_SIMD_CALL matrix_from_qvs(qvsf_argn transform) RTM_NO_EXCEPT
{
return matrix_from_qvs(transform.rotation, transform.translation_scale, qvs_get_scale(transform));
}

//////////////////////////////////////////////////////////////////////////
// Sets a 3x4 affine matrix from a rotation quaternion, translation, and 3D scale.
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK inline matrix3x4f RTM_SIMD_CALL matrix_from_qvv(quatf_arg0 quat, vector4f_arg1 translation, vector4f_arg2 scale) RTM_NO_EXCEPT
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE matrix3x4f RTM_SIMD_CALL matrix_from_qvv(quatf_arg0 quat, vector4f_arg1 translation, vector4f_arg2 scale) RTM_NO_EXCEPT
{
RTM_ASSERT(quat_is_normalized(quat), "Quaternion is not normalized");

const float x2 = quat_get_x(quat) + quat_get_x(quat);
const float y2 = quat_get_y(quat) + quat_get_y(quat);
const float z2 = quat_get_z(quat) + quat_get_z(quat);
const float xx = quat_get_x(quat) * x2;
const float xy = quat_get_x(quat) * y2;
const float xz = quat_get_x(quat) * z2;
const float yy = quat_get_y(quat) * y2;
const float yz = quat_get_y(quat) * z2;
const float zz = quat_get_z(quat) * z2;
const float wx = quat_get_w(quat) * x2;
const float wy = quat_get_w(quat) * y2;
const float wz = quat_get_w(quat) * z2;
float quatval[4];
quat_store(quat, quatval);

const float x2 = quatval[0] + quatval[0];
const float y2 = quatval[1] + quatval[1];
const float z2 = quatval[2] + quatval[2];
const float xx = quatval[0] * x2;
const float xy = quatval[0] * y2;
const float xz = quatval[0] * z2;
const float yy = quatval[1] * y2;
const float yz = quatval[1] * z2;
const float zz = quatval[2] * z2;
const float wx = quatval[3] * x2;
const float wy = quatval[3] * y2;
const float wz = quatval[3] * z2;

const scalarf scale_x = vector_get_x_as_scalar(scale);
const scalarf scale_y = vector_get_y_as_scalar(scale);
Expand All @@ -161,7 +169,7 @@ namespace rtm
//////////////////////////////////////////////////////////////////////////
// Converts a QVV transform into a 3x4 affine matrix.
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK inline matrix3x4f RTM_SIMD_CALL matrix_from_qvv(qvvf_arg0 transform) RTM_NO_EXCEPT
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE matrix3x4f RTM_SIMD_CALL matrix_from_qvv(qvvf_argn transform) RTM_NO_EXCEPT
{
return matrix_from_qvv(transform.rotation, transform.translation, transform.scale);
}
Expand Down Expand Up @@ -209,7 +217,7 @@ namespace rtm
//////////////////////////////////////////////////////////////////////////
// Returns a new 3x4 matrix where the specified axis has been replaced on the input matrix.
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK inline matrix3x4f RTM_SIMD_CALL matrix_set_axis(matrix3x4f_arg0 input, vector4f_arg5 axis_value, axis4 axis) RTM_NO_EXCEPT
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE matrix3x4f RTM_SIMD_CALL matrix_set_axis(matrix3x4f_arg0 input, vector4f_arg5 axis_value, axis4 axis) RTM_NO_EXCEPT
{
switch (axis)
{
Expand Down
Loading

0 comments on commit 8554eac

Please # to comment.