@@ -1007,52 +1007,20 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
1007
1007
#[ inline]
1008
1008
#[ target_feature( enable = "sse" ) ]
1009
1009
#[ cfg_attr( test, assert_instr( shufps, mask = 3 ) ) ]
1010
- #[ rustc_args_required_const( 2 ) ]
1011
- #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1012
- pub unsafe fn _mm_shuffle_ps ( a : __m128 , b : __m128 , mask : i32 ) -> __m128 {
1013
- let mask = ( mask & 0xFF ) as u8 ;
1014
-
1015
- macro_rules! shuffle_done {
1016
- ( $x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
1017
- simd_shuffle4( a, b, [ $x01, $x23, $x45, $x67] )
1018
- } ;
1019
- }
1020
- macro_rules! shuffle_x67 {
1021
- ( $x01: expr, $x23: expr, $x45: expr) => {
1022
- match ( mask >> 6 ) & 0b11 {
1023
- 0b00 => shuffle_done!( $x01, $x23, $x45, 4 ) ,
1024
- 0b01 => shuffle_done!( $x01, $x23, $x45, 5 ) ,
1025
- 0b10 => shuffle_done!( $x01, $x23, $x45, 6 ) ,
1026
- _ => shuffle_done!( $x01, $x23, $x45, 7 ) ,
1027
- }
1028
- } ;
1029
- }
1030
- macro_rules! shuffle_x45 {
1031
- ( $x01: expr, $x23: expr) => {
1032
- match ( mask >> 4 ) & 0b11 {
1033
- 0b00 => shuffle_x67!( $x01, $x23, 4 ) ,
1034
- 0b01 => shuffle_x67!( $x01, $x23, 5 ) ,
1035
- 0b10 => shuffle_x67!( $x01, $x23, 6 ) ,
1036
- _ => shuffle_x67!( $x01, $x23, 7 ) ,
1037
- }
1038
- } ;
1039
- }
1040
- macro_rules! shuffle_x23 {
1041
- ( $x01: expr) => {
1042
- match ( mask >> 2 ) & 0b11 {
1043
- 0b00 => shuffle_x45!( $x01, 0 ) ,
1044
- 0b01 => shuffle_x45!( $x01, 1 ) ,
1045
- 0b10 => shuffle_x45!( $x01, 2 ) ,
1046
- _ => shuffle_x45!( $x01, 3 ) ,
1047
- }
1048
- } ;
1049
- }
1050
- match mask & 0b11 {
1051
- 0b00 => shuffle_x23 ! ( 0 ) ,
1052
- 0b01 => shuffle_x23 ! ( 1 ) ,
1053
- 0b10 => shuffle_x23 ! ( 2 ) ,
1054
- _ => shuffle_x23 ! ( 3 ) ,
1055
- }
1010
+ #[ rustc_legacy_const_generics( 2 ) ]
1011
+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1012
+ pub unsafe fn _mm_shuffle_ps < const mask: i32 > ( a : __m128 , b : __m128 ) -> __m128 {
1013
+ assert ! ( mask >= 0 && mask <= 255 ) ;
1014
+ simd_shuffle4 (
1015
+ a,
1016
+ b,
1017
+ [
1018
+ mask as u32 & 0b11 ,
1019
+ ( mask as u32 >> 2 ) & 0b11 ,
1020
+ ( ( mask as u32 >> 4 ) & 0b11 ) + 4 ,
1021
+ ( ( mask as u32 >> 6 ) & 0b11 ) + 4 ,
1022
+ ] ,
1023
+ )
1056
1024
}
1057
1025
1058
1026
/// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1725,6 +1693,14 @@ pub const _MM_HINT_T2: i32 = 1;
1725
1693
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1726
1694
pub const _MM_HINT_NTA: i32 = 0 ;
1727
1695
1696
+ /// See [`_mm_prefetch`](fn._mm_prefetch.html).
1697
+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1698
+ pub const _MM_HINT_ET0: i32 = 7 ;
1699
+
1700
+ /// See [`_mm_prefetch`](fn._mm_prefetch.html).
1701
+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1702
+ pub const _MM_HINT_ET1: i32 = 6 ;
1703
+
1728
1704
/// Fetch the cache line that contains address `p` using the given `strategy`.
1729
1705
///
1730
1706
/// The `strategy` must be one of:
@@ -1742,6 +1718,10 @@ pub const _MM_HINT_NTA: i32 = 0;
1742
1718
/// but outside of the cache hierarchy. This is used to reduce access latency
1743
1719
/// without polluting the cache.
1744
1720
///
1721
+ /// * [`_MM_HINT_ET0`](constant._MM_HINT_ET0.html) and
1722
+ /// [`_MM_HINT_ET1`](constant._MM_HINT_ET1.html) are similar to `_MM_HINT_T0`
1723
+ /// and `_MM_HINT_T1` but indicate an anticipation to write to the address.
1724
+ ///
1745
1725
/// The actual implementation depends on the particular CPU. This instruction
1746
1726
/// is considered a hint, so the CPU is also free to simply ignore the request.
1747
1727
///
@@ -1769,24 +1749,12 @@ pub const _MM_HINT_NTA: i32 = 0;
1769
1749
#[ cfg_attr( test, assert_instr( prefetcht1, strategy = _MM_HINT_T1) ) ]
1770
1750
#[ cfg_attr( test, assert_instr( prefetcht2, strategy = _MM_HINT_T2) ) ]
1771
1751
#[ cfg_attr( test, assert_instr( prefetchnta, strategy = _MM_HINT_NTA) ) ]
1772
- #[ rustc_args_required_const( 1 ) ]
1773
- #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1774
- pub unsafe fn _mm_prefetch ( p : * const i8 , strategy : i32 ) {
1775
- // The `strategy` must be a compile-time constant, so we use a short form
1776
- // of `constify_imm8!` for now.
1777
- // We use the `llvm.prefetch` instrinsic with `rw` = 0 (read), and
1778
- // `cache type` = 1 (data cache). `locality` is based on our `strategy`.
1779
- macro_rules! pref {
1780
- ( $imm8: expr) => {
1781
- match $imm8 {
1782
- 0 => prefetch( p, 0 , 0 , 1 ) ,
1783
- 1 => prefetch( p, 0 , 1 , 1 ) ,
1784
- 2 => prefetch( p, 0 , 2 , 1 ) ,
1785
- _ => prefetch( p, 0 , 3 , 1 ) ,
1786
- }
1787
- } ;
1788
- }
1789
- pref ! ( strategy)
1752
+ #[ rustc_legacy_const_generics( 1 ) ]
1753
+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1754
+ pub unsafe fn _mm_prefetch < const strategy: i32 > ( p : * const i8 ) {
1755
+ // We use the `llvm.prefetch` instrinsic with `cache type` = 1 (data cache).
1756
+ // `locality` and `rw` are based on our `strategy`.
1757
+ prefetch ( p, ( strategy >> 2 ) & 1 , strategy & 3 , 1 ) ;
1790
1758
}
1791
1759
1792
1760
/// Returns vector of type __m128 with undefined elements.
@@ -2976,7 +2944,7 @@ mod tests {
2976
2944
unsafe fn test_mm_shuffle_ps ( ) {
2977
2945
let a = _mm_setr_ps ( 1.0 , 2.0 , 3.0 , 4.0 ) ;
2978
2946
let b = _mm_setr_ps ( 5.0 , 6.0 , 7.0 , 8.0 ) ;
2979
- let r = _mm_shuffle_ps ( a, b, 0b00_01_01_11 ) ;
2947
+ let r = _mm_shuffle_ps :: < 0b00_01_01_11 > ( a, b) ;
2980
2948
assert_eq_m128 ( r, _mm_setr_ps ( 4.0 , 2.0 , 6.0 , 5.0 ) ) ;
2981
2949
}
2982
2950
0 commit comments