@@ -298,6 +298,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
298
298
case AArch64::STRXui:
299
299
case AArch64::STRXpre:
300
300
case AArch64::STURXi:
301
+ case AArch64::STR_ZXI:
301
302
case AArch64::LDRDui:
302
303
case AArch64::LDURDi:
303
304
case AArch64::LDRDpre:
@@ -316,6 +317,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
316
317
case AArch64::LDRSui:
317
318
case AArch64::LDURSi:
318
319
case AArch64::LDRSpre:
320
+ case AArch64::LDR_ZXI:
319
321
return Opc;
320
322
case AArch64::LDRSWui:
321
323
return AArch64::LDRWui;
@@ -361,6 +363,7 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
361
363
return AArch64::STPDpre;
362
364
case AArch64::STRQui:
363
365
case AArch64::STURQi:
366
+ case AArch64::STR_ZXI:
364
367
return AArch64::STPQi;
365
368
case AArch64::STRQpre:
366
369
return AArch64::STPQpre;
@@ -386,6 +389,7 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
386
389
return AArch64::LDPDpre;
387
390
case AArch64::LDRQui:
388
391
case AArch64::LDURQi:
392
+ case AArch64::LDR_ZXI:
389
393
return AArch64::LDPQi;
390
394
case AArch64::LDRQpre:
391
395
return AArch64::LDPQpre;
@@ -1225,6 +1229,16 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
1225
1229
(void )MIBSXTW;
1226
1230
LLVM_DEBUG (dbgs () << " Extend operand:\n " );
1227
1231
LLVM_DEBUG (((MachineInstr *)MIBSXTW)->print (dbgs ()));
1232
+ } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1233
+ // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1234
+ // variant of the registers.
1235
+ MachineOperand &MOp0 = MIB->getOperand (0 );
1236
+ MachineOperand &MOp1 = MIB->getOperand (1 );
1237
+ assert (AArch64::ZPRRegClass.contains (MOp0.getReg ()) &&
1238
+ AArch64::ZPRRegClass.contains (MOp1.getReg ()) && " Invalid register." );
1239
+ MOp0.setReg (AArch64::Q0 + (MOp0.getReg () - AArch64::Z0));
1240
+ MOp1.setReg (AArch64::Q0 + (MOp1.getReg () - AArch64::Z0));
1241
+ LLVM_DEBUG (((MachineInstr *)MIB)->print (dbgs ()));
1228
1242
} else {
1229
1243
LLVM_DEBUG (((MachineInstr *)MIB)->print (dbgs ()));
1230
1244
}
@@ -1499,6 +1513,12 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
1499
1513
if (OpcA == OpcB)
1500
1514
return !AArch64InstrInfo::isPreLdSt (FirstMI);
1501
1515
1516
+ // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1517
+ // allow pairing them with other instructions.
1518
+ if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1519
+ OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1520
+ return false ;
1521
+
1502
1522
// Two pre ld/st of different opcodes cannot be merged either
1503
1523
if (AArch64InstrInfo::isPreLdSt (FirstMI) && AArch64InstrInfo::isPreLdSt (MI))
1504
1524
return false ;
@@ -2659,7 +2679,8 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2659
2679
// Get the needed alignments to check them if
2660
2680
// ldp-aligned-only/stp-aligned-only features are opted.
2661
2681
uint64_t MemAlignment = MemOp->getAlign ().value ();
2662
- uint64_t TypeAlignment = Align (MemOp->getSize ().getValue ()).value ();
2682
+ uint64_t TypeAlignment =
2683
+ Align (MemOp->getSize ().getValue ().getKnownMinValue ()).value ();
2663
2684
2664
2685
if (MemAlignment < 2 * TypeAlignment) {
2665
2686
NumFailedAlignmentCheck++;
@@ -2820,11 +2841,18 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
2820
2841
}
2821
2842
// 3) Find loads and stores that can be merged into a single load or store
2822
2843
// pair instruction.
2844
+ // When compiling for SVE 128, also try to combine SVE fill/spill
2845
+ // instructions into LDP/STP.
2823
2846
// e.g.,
2824
2847
// ldr x0, [x2]
2825
2848
// ldr x1, [x2, #8]
2826
2849
// ; becomes
2827
2850
// ldp x0, x1, [x2]
2851
+ // e.g.,
2852
+ // ldr z0, [x2]
2853
+ // ldr z1, [x2, #1, mul vl]
2854
+ // ; becomes
2855
+ // ldp q0, q1, [x2]
2828
2856
2829
2857
if (MBB.getParent ()->getRegInfo ().tracksLiveness ()) {
2830
2858
DefinedInBB.clear ();
0 commit comments