Skip to content

Commit 1df4af6

Browse files
authored
Reapply "[AArch64][SVE] Pair SVE fill/spill into LDP/STP with -msve-vector-bits=128." (#135177)
Reapplies #134068. The first patch was missing a check to prevent attempts to pair SVE fill/spill with other Neon load/store instructions, which could happen specifically if the Neon instruction was unscaled.
1 parent 53cd5cf commit 1df4af6

File tree

4 files changed

+401
-1
lines changed

4 files changed

+401
-1
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -2760,6 +2760,9 @@ bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
27602760
case AArch64::LDRXpre:
27612761
case AArch64::LDURSWi:
27622762
case AArch64::LDRSWpre:
2763+
// SVE instructions.
2764+
case AArch64::LDR_ZXI:
2765+
case AArch64::STR_ZXI:
27632766
return true;
27642767
}
27652768
}
@@ -2912,6 +2915,18 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
29122915
return false;
29132916
}
29142917

2918+
// Pairing SVE fills/spills is only valid for little-endian targets that
2919+
// implement VLS 128.
2920+
switch (MI.getOpcode()) {
2921+
default:
2922+
break;
2923+
case AArch64::LDR_ZXI:
2924+
case AArch64::STR_ZXI:
2925+
if (!Subtarget.isLittleEndian() ||
2926+
Subtarget.getSVEVectorSizeInBits() != 128)
2927+
return false;
2928+
}
2929+
29152930
// Check if this load/store has a hint to avoid pair formation.
29162931
// MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
29172932
if (isLdStPairSuppressed(MI))

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

+29-1
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
298298
case AArch64::STRXui:
299299
case AArch64::STRXpre:
300300
case AArch64::STURXi:
301+
case AArch64::STR_ZXI:
301302
case AArch64::LDRDui:
302303
case AArch64::LDURDi:
303304
case AArch64::LDRDpre:
@@ -316,6 +317,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
316317
case AArch64::LDRSui:
317318
case AArch64::LDURSi:
318319
case AArch64::LDRSpre:
320+
case AArch64::LDR_ZXI:
319321
return Opc;
320322
case AArch64::LDRSWui:
321323
return AArch64::LDRWui;
@@ -361,6 +363,7 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
361363
return AArch64::STPDpre;
362364
case AArch64::STRQui:
363365
case AArch64::STURQi:
366+
case AArch64::STR_ZXI:
364367
return AArch64::STPQi;
365368
case AArch64::STRQpre:
366369
return AArch64::STPQpre;
@@ -386,6 +389,7 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
386389
return AArch64::LDPDpre;
387390
case AArch64::LDRQui:
388391
case AArch64::LDURQi:
392+
case AArch64::LDR_ZXI:
389393
return AArch64::LDPQi;
390394
case AArch64::LDRQpre:
391395
return AArch64::LDPQpre;
@@ -1225,6 +1229,16 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
12251229
(void)MIBSXTW;
12261230
LLVM_DEBUG(dbgs() << " Extend operand:\n ");
12271231
LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1232+
} else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1233+
// We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1234+
// variant of the registers.
1235+
MachineOperand &MOp0 = MIB->getOperand(0);
1236+
MachineOperand &MOp1 = MIB->getOperand(1);
1237+
assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) &&
1238+
AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
1239+
MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
1240+
MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
1241+
LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
12281242
} else {
12291243
LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
12301244
}
@@ -1499,6 +1513,12 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
14991513
if (OpcA == OpcB)
15001514
return !AArch64InstrInfo::isPreLdSt(FirstMI);
15011515

1516+
// Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1517+
// allow pairing them with other instructions.
1518+
if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1519+
OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1520+
return false;
1521+
15021522
// Two pre ld/st of different opcodes cannot be merged either
15031523
if (AArch64InstrInfo::isPreLdSt(FirstMI) && AArch64InstrInfo::isPreLdSt(MI))
15041524
return false;
@@ -2659,7 +2679,8 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
26592679
// Get the needed alignments to check them if
26602680
// ldp-aligned-only/stp-aligned-only features are opted.
26612681
uint64_t MemAlignment = MemOp->getAlign().value();
2662-
uint64_t TypeAlignment = Align(MemOp->getSize().getValue()).value();
2682+
uint64_t TypeAlignment =
2683+
Align(MemOp->getSize().getValue().getKnownMinValue()).value();
26632684

26642685
if (MemAlignment < 2 * TypeAlignment) {
26652686
NumFailedAlignmentCheck++;
@@ -2820,11 +2841,18 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
28202841
}
28212842
// 3) Find loads and stores that can be merged into a single load or store
28222843
// pair instruction.
2844+
// When compiling for SVE 128, also try to combine SVE fill/spill
2845+
// instructions into LDP/STP.
28232846
// e.g.,
28242847
// ldr x0, [x2]
28252848
// ldr x1, [x2, #8]
28262849
// ; becomes
28272850
// ldp x0, x1, [x2]
2851+
// e.g.,
2852+
// ldr z0, [x2]
2853+
// ldr z1, [x2, #1, mul vl]
2854+
// ; becomes
2855+
// ldp q0, q1, [x2]
28282856

28292857
if (MBB.getParent()->getRegInfo().tracksLiveness()) {
28302858
DefinedInBB.clear();

0 commit comments

Comments
 (0)