@@ -442,6 +442,8 @@ static std::optional<unsigned> getSmallBestKnownTC(ScalarEvolution &SE,
442
442
namespace {
443
443
// Forward declare GeneratedRTChecks.
444
444
class GeneratedRTChecks;
445
+
446
+ using SCEV2ValueTy = DenseMap<const SCEV *, Value *>;
445
447
} // namespace
446
448
447
449
namespace llvm {
@@ -497,8 +499,10 @@ class InnerLoopVectorizer {
497
499
/// loop and the start value for the canonical induction, if it is != 0. The
498
500
/// latter is the case when vectorizing the epilogue loop. In the case of
499
501
/// epilogue vectorization, this function is overriden to handle the more
500
- /// complex control flow around the loops.
501
- virtual std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton();
502
+ /// complex control flow around the loops. \p ExpandedSCEVs is used to
503
+ /// look up SCEV expansions for expressions needed during skeleton creation.
504
+ virtual std::pair<BasicBlock *, Value *>
505
+ createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs);
502
506
503
507
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
504
508
void fixVectorizedLoop(VPTransformState &State, VPlan &Plan);
@@ -555,12 +559,13 @@ class InnerLoopVectorizer {
555
559
556
560
/// Create a new phi node for the induction variable \p OrigPhi to resume
557
561
/// iteration count in the scalar epilogue, from where the vectorized loop
558
- /// left off. In cases where the loop skeleton is more complicated (eg.
559
- /// epilogue vectorization) and the resume values can come from an additional
560
- /// bypass block, the \p AdditionalBypass pair provides information about the
561
- /// bypass block and the end value on the edge from bypass to this loop.
562
+ /// left off. \p Step is the SCEV-expanded induction step to use. In cases
563
+ /// where the loop skeleton is more complicated (i.e., epilogue vectorization)
564
+ /// and the resume values can come from an additional bypass block, the \p
565
+ /// AdditionalBypass pair provides information about the bypass block and the
566
+ /// end value on the edge from bypass to this loop.
562
567
PHINode *createInductionResumeValue(
563
- PHINode *OrigPhi, const InductionDescriptor &ID,
568
+ PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
564
569
ArrayRef<BasicBlock *> BypassBlocks,
565
570
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
566
571
@@ -646,6 +651,7 @@ class InnerLoopVectorizer {
646
651
/// block, the \p AdditionalBypass pair provides information about the bypass
647
652
/// block and the end value on the edge from bypass to this loop.
648
653
void createInductionResumeValues(
654
+ const SCEV2ValueTy &ExpandedSCEVs,
649
655
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
650
656
651
657
/// Complete the loop skeleton by adding debug MDs, creating appropriate
@@ -835,15 +841,18 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
835
841
836
842
// Override this function to handle the more complex control flow around the
837
843
// three loops.
838
- std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton() final {
839
- return createEpilogueVectorizedLoopSkeleton();
844
+ std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton(
845
+
846
+ const SCEV2ValueTy &ExpandedSCEVs) final {
847
+
848
+ return createEpilogueVectorizedLoopSkeleton(ExpandedSCEVs);
840
849
}
841
850
842
851
/// The interface for creating a vectorized skeleton using one of two
843
852
/// different strategies, each corresponding to one execution of the vplan
844
853
/// as described above.
845
854
virtual std::pair<BasicBlock *, Value *>
846
- createEpilogueVectorizedLoopSkeleton() = 0;
855
+ createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs ) = 0;
847
856
848
857
/// Holds and updates state information required to vectorize the main loop
849
858
/// and its epilogue in two separate passes. This setup helps us avoid
@@ -871,7 +880,8 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
871
880
EPI, LVL, CM, BFI, PSI, Check) {}
872
881
/// Implements the interface for creating a vectorized skeleton using the
873
882
/// *main loop* strategy (ie the first pass of vplan execution).
874
- std::pair<BasicBlock *, Value *> createEpilogueVectorizedLoopSkeleton() final;
883
+ std::pair<BasicBlock *, Value *>
884
+ createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) final;
875
885
876
886
protected:
877
887
/// Emits an iteration count bypass check once for the main loop (when \p
@@ -901,7 +911,8 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
901
911
}
902
912
/// Implements the interface for creating a vectorized skeleton using the
903
913
/// *epilogue loop* strategy (ie the second pass of vplan execution).
904
- std::pair<BasicBlock *, Value *> createEpilogueVectorizedLoopSkeleton() final;
914
+ std::pair<BasicBlock *, Value *>
915
+ createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) final;
905
916
906
917
protected:
907
918
/// Emits an iteration count bypass check after the main vector loop has
@@ -2424,21 +2435,6 @@ static void buildScalarSteps(Value *ScalarIV, Value *Step,
2424
2435
}
2425
2436
}
2426
2437
2427
- // Generate code for the induction step. Note that induction steps are
2428
- // required to be loop-invariant
2429
- static Value *CreateStepValue(const SCEV *Step, ScalarEvolution &SE,
2430
- Instruction *InsertBefore,
2431
- Loop *OrigLoop = nullptr) {
2432
- const DataLayout &DL = SE.getDataLayout();
2433
- assert((!OrigLoop || SE.isLoopInvariant(Step, OrigLoop)) &&
2434
- "Induction step should be loop invariant");
2435
- if (auto *E = dyn_cast<SCEVUnknown>(Step))
2436
- return E->getValue();
2437
-
2438
- SCEVExpander Exp(SE, DL, "induction");
2439
- return Exp.expandCodeFor(Step, Step->getType(), InsertBefore);
2440
- }
2441
-
2442
2438
/// Compute the transformed value of Index at offset StartValue using step
2443
2439
/// StepValue.
2444
2440
/// For integer induction, returns StartValue + Index * StepValue.
@@ -3142,7 +3138,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
3142
3138
}
3143
3139
3144
3140
PHINode *InnerLoopVectorizer::createInductionResumeValue(
3145
- PHINode *OrigPhi, const InductionDescriptor &II,
3141
+ PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
3146
3142
ArrayRef<BasicBlock *> BypassBlocks,
3147
3143
std::pair<BasicBlock *, Value *> AdditionalBypass) {
3148
3144
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
@@ -3161,17 +3157,13 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
3161
3157
if (II.getInductionBinOp() && isa<FPMathOperator>(II.getInductionBinOp()))
3162
3158
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
3163
3159
3164
- Value *Step =
3165
- CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint());
3166
3160
EndValue =
3167
3161
emitTransformedIndex(B, VectorTripCount, II.getStartValue(), Step, II);
3168
3162
EndValue->setName("ind.end");
3169
3163
3170
3164
// Compute the end value for the additional bypass (if applicable).
3171
3165
if (AdditionalBypass.first) {
3172
3166
B.SetInsertPoint(&(*AdditionalBypass.first->getFirstInsertionPt()));
3173
- Value *Step =
3174
- CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint());
3175
3167
EndValueFromAdditionalBypass = emitTransformedIndex(
3176
3168
B, AdditionalBypass.second, II.getStartValue(), Step, II);
3177
3169
EndValueFromAdditionalBypass->setName("ind.end");
@@ -3200,7 +3192,22 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
3200
3192
return BCResumeVal;
3201
3193
}
3202
3194
3195
+ /// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
3196
+ /// expansion results.
3197
+ static Value *getExpandedStep(const InductionDescriptor &ID,
3198
+ const SCEV2ValueTy &ExpandedSCEVs) {
3199
+ const SCEV *Step = ID.getStep();
3200
+ if (auto *C = dyn_cast<SCEVConstant>(Step))
3201
+ return C->getValue();
3202
+ if (auto *U = dyn_cast<SCEVUnknown>(Step))
3203
+ return U->getValue();
3204
+ auto I = ExpandedSCEVs.find(Step);
3205
+ assert(I != ExpandedSCEVs.end() && "SCEV must be expanded at this point");
3206
+ return I->second;
3207
+ }
3208
+
3203
3209
void InnerLoopVectorizer::createInductionResumeValues(
3210
+ const SCEV2ValueTy &ExpandedSCEVs,
3204
3211
std::pair<BasicBlock *, Value *> AdditionalBypass) {
3205
3212
assert(((AdditionalBypass.first && AdditionalBypass.second) ||
3206
3213
(!AdditionalBypass.first && !AdditionalBypass.second)) &&
@@ -3216,7 +3223,8 @@ void InnerLoopVectorizer::createInductionResumeValues(
3216
3223
PHINode *OrigPhi = InductionEntry.first;
3217
3224
const InductionDescriptor &II = InductionEntry.second;
3218
3225
PHINode *BCResumeVal = createInductionResumeValue(
3219
- OrigPhi, II, LoopBypassBlocks, AdditionalBypass);
3226
+ OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), LoopBypassBlocks,
3227
+ AdditionalBypass);
3220
3228
OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
3221
3229
}
3222
3230
}
@@ -3257,7 +3265,8 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() {
3257
3265
}
3258
3266
3259
3267
std::pair<BasicBlock *, Value *>
3260
- InnerLoopVectorizer::createVectorizedLoopSkeleton() {
3268
+ InnerLoopVectorizer::createVectorizedLoopSkeleton(
3269
+ const SCEV2ValueTy &ExpandedSCEVs) {
3261
3270
/*
3262
3271
In this function we generate a new loop. The new loop will contain
3263
3272
the vectorized instructions while the old loop will continue to run the
@@ -3312,7 +3321,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
3312
3321
emitMemRuntimeChecks(LoopScalarPreHeader);
3313
3322
3314
3323
// Emit phis for the new starting index of the scalar loop.
3315
- createInductionResumeValues();
3324
+ createInductionResumeValues(ExpandedSCEVs );
3316
3325
3317
3326
return {completeLoopSkeleton(), nullptr};
3318
3327
}
@@ -7674,11 +7683,9 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
7674
7683
}
7675
7684
}
7676
7685
7677
- void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
7678
- VPlan &BestVPlan,
7679
- InnerLoopVectorizer &ILV,
7680
- DominatorTree *DT,
7681
- bool IsEpilogueVectorization) {
7686
+ SCEV2ValueTy LoopVectorizationPlanner::executePlan(
7687
+ ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
7688
+ InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization) {
7682
7689
assert(BestVPlan.hasVF(BestVF) &&
7683
7690
"Trying to execute plan with unsupported VF");
7684
7691
assert(BestVPlan.hasUF(BestUF) &&
@@ -7710,7 +7717,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
7710
7717
// middle block. The vector loop is created during VPlan execution.
7711
7718
Value *CanonicalIVStartValue;
7712
7719
std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
7713
- ILV.createVectorizedLoopSkeleton();
7720
+ ILV.createVectorizedLoopSkeleton(State.ExpandedSCEVs );
7714
7721
7715
7722
// Only use noalias metadata when using memory checks guaranteeing no overlap
7716
7723
// across all iterations.
@@ -7778,6 +7785,8 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
7778
7785
ILV.fixVectorizedLoop(State, BestVPlan);
7779
7786
7780
7787
ILV.printDebugTracesAtEnd();
7788
+
7789
+ return State.ExpandedSCEVs;
7781
7790
}
7782
7791
7783
7792
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -7799,7 +7808,8 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
7799
7808
/// This function is partially responsible for generating the control flow
7800
7809
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7801
7810
std::pair<BasicBlock *, Value *>
7802
- EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
7811
+ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
7812
+ const SCEV2ValueTy &ExpandedSCEVs) {
7803
7813
createVectorLoopSkeleton("");
7804
7814
7805
7815
// Generate the code to check the minimum iteration count of the vector
@@ -7917,7 +7927,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
7917
7927
/// This function is partially responsible for generating the control flow
7918
7928
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7919
7929
std::pair<BasicBlock *, Value *>
7920
- EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
7930
+ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
7931
+ const SCEV2ValueTy &ExpandedSCEVs) {
7921
7932
createVectorLoopSkeleton("vec.epilog.");
7922
7933
7923
7934
// Now, compare the remaining count and if there aren't enough iterations to
@@ -8015,7 +8026,8 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
8015
8026
// check, then the resume value for the induction variable comes from
8016
8027
// the trip count of the main vector loop, hence passing the AdditionalBypass
8017
8028
// argument.
8018
- createInductionResumeValues({VecEpilogueIterationCountCheck,
8029
+ createInductionResumeValues(ExpandedSCEVs,
8030
+ {VecEpilogueIterationCountCheck,
8019
8031
EPI.VectorTripCount} /* AdditionalBypass */);
8020
8032
8021
8033
return {completeLoopSkeleton(), EPResumeVal};
@@ -10387,8 +10399,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10387
10399
EPI, &LVL, &CM, BFI, PSI, Checks);
10388
10400
10389
10401
VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF);
10390
- LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV ,
10391
- DT, true);
10402
+ auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
10403
+ BestMainPlan, MainILV, DT, true);
10392
10404
++LoopsVectorized;
10393
10405
10394
10406
// Second pass vectorizes the epilogue and adjusts the control flow
@@ -10442,7 +10454,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10442
10454
}
10443
10455
10444
10456
ResumeV = MainILV.createInductionResumeValue(
10445
- IndPhi, *ID, {EPI.MainLoopIterationCountCheck});
10457
+ IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
10458
+ {EPI.MainLoopIterationCountCheck});
10446
10459
}
10447
10460
assert(ResumeV && "Must have a resume value");
10448
10461
VPValue *StartVal = BestEpiPlan.getVPValueOrAddLiveIn(ResumeV);
0 commit comments