@@ -7866,16 +7866,16 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
7866
7866
case NI_AVX512F_BroadcastVector128ToVector512:
7867
7867
case NI_AVX512F_BroadcastVector256ToVector512:
7868
7868
{
7869
+ assert (!supportsSIMDScalarLoads);
7870
+
7869
7871
if (parentNode->OperIsMemoryLoad ())
7870
7872
{
7871
7873
supportsGeneralLoads = !childNode->OperIsHWIntrinsic ();
7872
7874
break ;
7873
7875
}
7874
- else
7875
- {
7876
- supportsGeneralLoads = true ;
7877
- break ;
7878
- }
7876
+
7877
+ supportsGeneralLoads = true ;
7878
+ break ;
7879
7879
}
7880
7880
7881
7881
case NI_SSE41_ConvertToVector128Int16:
@@ -7941,26 +7941,47 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
7941
7941
}
7942
7942
7943
7943
case NI_SSE2_ConvertToVector128Double:
7944
- case NI_SSE3_MoveAndDuplicate:
7945
7944
case NI_AVX_ConvertToVector256Double:
7945
+ case NI_AVX512F_ConvertToVector512Double:
7946
+ case NI_AVX512F_VL_ConvertToVector128Double:
7947
+ case NI_AVX512F_VL_ConvertToVector256Double:
7946
7948
{
7947
7949
assert (!supportsSIMDScalarLoads);
7948
7950
7949
7951
// Most instructions under the non-VEX encoding require aligned operands.
7950
7952
// Those used for Sse2.ConvertToVector128Double (CVTDQ2PD and CVTPS2PD)
7951
- // and Sse3.MoveAndDuplicate (MOVDDUP) are exceptions and don't fail for
7952
- // unaligned inputs as they read mem64 (half the vector width) instead
7953
+ // are exceptions and don't fail for unaligned inputs as they read half
7954
+ // the vector width instead
7953
7955
7954
7956
supportsAlignedSIMDLoads = !comp->opts .MinOpts ();
7955
7957
supportsUnalignedSIMDLoads = true ;
7956
7958
7957
7959
const unsigned expectedSize = genTypeSize (parentNode->TypeGet ()) / 2 ;
7958
7960
const unsigned operandSize = genTypeSize (childNode->TypeGet ());
7959
7961
7960
- // For broadcasts we can only optimize constants and memory operands
7961
- const bool broadcastIsContainable = childNode->OperIsConst () || childNode->isMemoryOp ();
7962
- supportsGeneralLoads =
7963
- broadcastIsContainable && supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
7962
+ if (childNode->OperIsConst () || childNode->isMemoryOp ())
7963
+ {
7964
+ // For broadcasts we can only optimize constants and memory operands
7965
+ // since we're going from a smaller base type to a larger base type
7966
+ supportsGeneralLoads = supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
7967
+ }
7968
+ break ;
7969
+ }
7970
+
7971
+ case NI_SSE3_MoveAndDuplicate:
7972
+ {
7973
+ // Most instructions under the non-VEX encoding require aligned operands.
7974
+ // Those used for Sse3.MoveAndDuplicate (MOVDDUP) are exceptions and don't
7975
+ // fail for unaligned inputs as they read half the vector width instead
7976
+
7977
+ supportsAlignedSIMDLoads = !comp->opts .MinOpts ();
7978
+ supportsUnalignedSIMDLoads = true ;
7979
+
7980
+ const unsigned expectedSize = genTypeSize (parentNode->TypeGet ()) / 2 ;
7981
+ const unsigned operandSize = genTypeSize (childNode->TypeGet ());
7982
+
7983
+ supportsGeneralLoads = supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
7984
+ supportsSIMDScalarLoads = true ;
7964
7985
break ;
7965
7986
}
7966
7987
@@ -7986,8 +8007,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
7986
8007
break ;
7987
8008
}
7988
8009
}
7989
-
7990
- assert (supportsSIMDScalarLoads == false );
7991
8010
break ;
7992
8011
}
7993
8012
0 commit comments