Skip to content

Commit e3ff66b

Browse files
Ensure that Sse3.MoveAndDuplicate correctly tracks supporting SIMD scalar loads (#97783)
1 parent a97ddd2 commit e3ff66b

File tree

1 file changed

+33
-14
lines changed

1 file changed

+33
-14
lines changed

src/coreclr/jit/lowerxarch.cpp

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7866,16 +7866,16 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
78667866
case NI_AVX512F_BroadcastVector128ToVector512:
78677867
case NI_AVX512F_BroadcastVector256ToVector512:
78687868
{
7869+
assert(!supportsSIMDScalarLoads);
7870+
78697871
if (parentNode->OperIsMemoryLoad())
78707872
{
78717873
supportsGeneralLoads = !childNode->OperIsHWIntrinsic();
78727874
break;
78737875
}
7874-
else
7875-
{
7876-
supportsGeneralLoads = true;
7877-
break;
7878-
}
7876+
7877+
supportsGeneralLoads = true;
7878+
break;
78797879
}
78807880

78817881
case NI_SSE41_ConvertToVector128Int16:
@@ -7941,26 +7941,47 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
79417941
}
79427942

79437943
case NI_SSE2_ConvertToVector128Double:
7944-
case NI_SSE3_MoveAndDuplicate:
79457944
case NI_AVX_ConvertToVector256Double:
7945+
case NI_AVX512F_ConvertToVector512Double:
7946+
case NI_AVX512F_VL_ConvertToVector128Double:
7947+
case NI_AVX512F_VL_ConvertToVector256Double:
79467948
{
79477949
assert(!supportsSIMDScalarLoads);
79487950

79497951
// Most instructions under the non-VEX encoding require aligned operands.
79507952
// Those used for Sse2.ConvertToVector128Double (CVTDQ2PD and CVTPS2PD)
7951-
// and Sse3.MoveAndDuplicate (MOVDDUP) are exceptions and don't fail for
7952-
// unaligned inputs as they read mem64 (half the vector width) instead
7953+
// are exceptions and don't fail for unaligned inputs as they read half
7954+
// the vector width instead
79537955

79547956
supportsAlignedSIMDLoads = !comp->opts.MinOpts();
79557957
supportsUnalignedSIMDLoads = true;
79567958

79577959
const unsigned expectedSize = genTypeSize(parentNode->TypeGet()) / 2;
79587960
const unsigned operandSize = genTypeSize(childNode->TypeGet());
79597961

7960-
// For broadcasts we can only optimize constants and memory operands
7961-
const bool broadcastIsContainable = childNode->OperIsConst() || childNode->isMemoryOp();
7962-
supportsGeneralLoads =
7963-
broadcastIsContainable && supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
7962+
if (childNode->OperIsConst() || childNode->isMemoryOp())
7963+
{
7964+
// For broadcasts we can only optimize constants and memory operands
7965+
// since we're going from a smaller base type to a larger base type
7966+
supportsGeneralLoads = supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
7967+
}
7968+
break;
7969+
}
7970+
7971+
case NI_SSE3_MoveAndDuplicate:
7972+
{
7973+
// Most instructions under the non-VEX encoding require aligned operands.
7974+
// Those used for Sse3.MoveAndDuplicate (MOVDDUP) are exceptions and don't
7975+
// fail for unaligned inputs as they read half the vector width instead
7976+
7977+
supportsAlignedSIMDLoads = !comp->opts.MinOpts();
7978+
supportsUnalignedSIMDLoads = true;
7979+
7980+
const unsigned expectedSize = genTypeSize(parentNode->TypeGet()) / 2;
7981+
const unsigned operandSize = genTypeSize(childNode->TypeGet());
7982+
7983+
supportsGeneralLoads = supportsUnalignedSIMDLoads && (operandSize >= expectedSize);
7984+
supportsSIMDScalarLoads = true;
79647985
break;
79657986
}
79667987

@@ -7986,8 +8007,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
79868007
break;
79878008
}
79888009
}
7989-
7990-
assert(supportsSIMDScalarLoads == false);
79918010
break;
79928011
}
79938012

0 commit comments

Comments
 (0)