diff --git a/includes/rtm/mask4d.h b/includes/rtm/mask4d.h index 1cb09c4..08ebc23 100644 --- a/includes/rtm/mask4d.h +++ b/includes/rtm/mask4d.h @@ -400,6 +400,34 @@ namespace rtm #endif } + ////////////////////////////////////////////////////////////////////////// + // Per component logical NOT of the input: ~input + ////////////////////////////////////////////////////////////////////////// + RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE mask4d RTM_SIMD_CALL mask_not(mask4d_arg0 input) RTM_NO_EXCEPT + { +#if defined(RTM_SSE2_INTRINSICS) + const __m128i true_mask = _mm_set_epi64x(0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL); + __m128d xy = _mm_andnot_pd(input.xy, _mm_castsi128_pd(true_mask)); + __m128d zw = _mm_andnot_pd(input.zw, _mm_castsi128_pd(true_mask)); + return mask4d{ xy, zw }; +#else + const uint64_t* input_ = rtm_impl::bit_cast(&input); + + union + { + mask4d vector; + uint64_t scalar[4]; + } result; + + result.scalar[0] = ~input_[0]; + result.scalar[1] = ~input_[1]; + result.scalar[2] = ~input_[2]; + result.scalar[3] = ~input_[3]; + + return result.vector; +#endif + } + RTM_IMPL_VERSION_NAMESPACE_END } diff --git a/includes/rtm/mask4f.h b/includes/rtm/mask4f.h index 89d9dc0..7280048 100644 --- a/includes/rtm/mask4f.h +++ b/includes/rtm/mask4f.h @@ -359,6 +359,34 @@ namespace rtm #endif } + ////////////////////////////////////////////////////////////////////////// + // Per component logical NOT of the input: ~input + ////////////////////////////////////////////////////////////////////////// + RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE mask4f RTM_SIMD_CALL mask_not(mask4f_arg0 input) RTM_NO_EXCEPT + { +#if defined(RTM_SSE2_INTRINSICS) + const __m128i true_mask = _mm_set_epi32(0xFFFFFFFFULL, 0xFFFFFFFFULL, 0xFFFFFFFFULL, 0xFFFFFFFFULL); + return _mm_andnot_ps(input, _mm_castsi128_ps(true_mask)); +#elif defined(RTM_NEON_INTRINSICS) + return vmvnq_u32(input); +#else + const uint32_t* input_ = rtm_impl::bit_cast(&input); + + union + { + mask4f vector; + uint32_t scalar[4]; + } result; + + result.scalar[0] = ~input_[0]; + result.scalar[1] = ~input_[1]; + result.scalar[2] = ~input_[2]; + result.scalar[3] = ~input_[3]; + + return result.vector; +#endif + } + RTM_IMPL_VERSION_NAMESPACE_END } diff --git a/includes/rtm/mask4i.h b/includes/rtm/mask4i.h index a8d5e3f..e633604 100644 --- a/includes/rtm/mask4i.h +++ b/includes/rtm/mask4i.h @@ -327,6 +327,21 @@ namespace rtm #endif } + ////////////////////////////////////////////////////////////////////////// + // Per component logical NOT of the input: ~input + ////////////////////////////////////////////////////////////////////////// + RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE mask4i RTM_SIMD_CALL mask_not(mask4i_arg0 input) RTM_NO_EXCEPT + { +#if defined(RTM_SSE2_INTRINSICS) + const __m128i true_mask = _mm_set_epi32(0xFFFFFFFFULL, 0xFFFFFFFFULL, 0xFFFFFFFFULL, 0xFFFFFFFFULL); + return _mm_andnot_si128(input, true_mask); +#elif defined(RTM_NEON_INTRINSICS) + return RTM_IMPL_MASK4i_SET(vmvnq_u32(RTM_IMPL_MASK4i_GET(input))); +#else + return mask4i{ ~input.x, ~input.y, ~input.z, ~input.w }; +#endif + } + RTM_IMPL_VERSION_NAMESPACE_END } diff --git a/includes/rtm/mask4q.h b/includes/rtm/mask4q.h index 3127f6d..fcbf102 100644 --- a/includes/rtm/mask4q.h +++ b/includes/rtm/mask4q.h @@ -364,6 +364,34 @@ namespace rtm #endif } + ////////////////////////////////////////////////////////////////////////// + // Per component logical NOT of the input: ~input + ////////////////////////////////////////////////////////////////////////// + RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE mask4q RTM_SIMD_CALL mask_not(mask4q_arg0 input) RTM_NO_EXCEPT + { +#if defined(RTM_SSE2_INTRINSICS) + const __m128i true_mask = _mm_set_epi64x(0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL); + __m128i xy = _mm_andnot_si128(input.xy, true_mask); + __m128i zw = _mm_andnot_si128(input.zw, true_mask); + return mask4q{ xy, zw }; +#else + const uint64_t* input_ = rtm_impl::bit_cast(&input); + + union + { + mask4q vector; + uint64_t scalar[4]; + } result; + + result.scalar[0] = ~input_[0]; + result.scalar[1] = ~input_[1]; + result.scalar[2] = ~input_[2]; + result.scalar[3] = ~input_[3]; + + return result.vector; +#endif + } + RTM_IMPL_VERSION_NAMESPACE_END } diff --git a/tests/sources/test_mask4.cpp b/tests/sources/test_mask4.cpp index 81d9caf..3dc4e91 100644 --- a/tests/sources/test_mask4.cpp +++ b/tests/sources/test_mask4.cpp @@ -100,6 +100,26 @@ inline Mask4Type reference_mask_xor(const Mask4Type& input0, const Mask4Type& in return result; } +template +inline Mask4Type reference_mask_not(const Mask4Type& input) +{ + IntType input_[4]; + + static_assert(sizeof(Mask4Type) == sizeof(input_), "Unexpected size"); + std::memcpy(&input_[0], &input, sizeof(Mask4Type)); + + IntType result_[4]; + result_[0] = ~input_[0]; + result_[1] = ~input_[1]; + result_[2] = ~input_[2]; + result_[3] = ~input_[3]; + + Mask4Type result; + std::memcpy(&result, &result_[0], sizeof(Mask4Type)); + + return result; +} + template static void test_mask_impl() { @@ -298,6 +318,10 @@ static void test_mask_impl() CHECK(mask_all_equal(mask_xor(mask0, mask1), reference_mask_xor(mask0, mask1))); CHECK(mask_all_equal(mask_xor(mask0, mask2), reference_mask_xor(mask0, mask2))); CHECK(mask_all_equal(mask_xor(mask1, mask2), reference_mask_xor(mask1, mask2))); + + CHECK(mask_all_equal(mask_not(mask0), reference_mask_not(mask0))); + CHECK(mask_all_equal(mask_not(mask1), reference_mask_not(mask1))); + CHECK(mask_all_equal(mask_not(mask2), reference_mask_not(mask2))); } }