From ca0aa2b075b2b3ff4520a4851b8aa545c5b13a46 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 26 Jul 2021 15:21:16 +0100 Subject: [PATCH] Recommit "[VPlan] Add recipe for first-order rec phis, make splicing explicit." This reverts the revert commit b1777b04dc4b1a9fee0e7effa7e177892ab32ef0. The patch originally got reverted due to a crash: https://bugs.chromium.org/p/chromium/issues/detail?id=1232798#c2 The underlying issue was that we were not using the stored values from the modified memory recipes, but the out-of-date values directly from the IR (accessed via the VPlan). This should be fixed in d995d6376. A reduced version of the reproducer has been added in 93664503be6b. --- lib/Transforms/Vectorize/LoopVectorize.cpp | 150 ++++++------------ lib/Transforms/Vectorize/VPlan.cpp | 65 +++++++- lib/Transforms/Vectorize/VPlan.h | 52 +++++- lib/Transforms/Vectorize/VPlanTransforms.cpp | 21 --- lib/Transforms/Vectorize/VPlanValue.h | 2 + .../first-order-recurrence-complex.ll | 14 +- ...-order-recurrence-sink-replicate-region.ll | 51 +++--- .../LoopVectorize/first-order-recurrence.ll | 43 ++--- test/Transforms/LoopVectorize/induction.ll | 8 +- .../LoopVectorize/interleaved-accesses.ll | 8 +- .../scalable-first-order-recurrence.ll | 15 +- .../vplan-sink-scalars-and-merge.ll | 7 +- 12 files changed, 238 insertions(+), 198 deletions(-) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index a841be1879a..27d6161adfe 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4164,14 +4164,10 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { // the incoming edges. VPBasicBlock *Header = State.Plan->getEntry()->getEntryBasicBlock(); for (VPRecipeBase &R : Header->phis()) { - auto *PhiR = dyn_cast(&R); - if (!PhiR) - continue; - auto *OrigPhi = cast(PhiR->getUnderlyingValue()); - if (auto *ReductionPhi = dyn_cast(PhiR)) { + if (auto *ReductionPhi = dyn_cast(&R)) fixReduction(ReductionPhi, State); - } else if (Legal->isFirstOrderRecurrence(OrigPhi)) - fixFirstOrderRecurrence(PhiR, State); + else if (auto *FOR = dyn_cast(&R)) + fixFirstOrderRecurrence(FOR, State); } } @@ -4200,7 +4196,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, // // In this example, s1 is a recurrence because it's value depends on the // previous iteration. In the first phase of vectorization, we created a - // temporary value for s1. We now complete the vectorization and produce the + // vector phi v1 for s1. We now complete the vectorization and produce the // shorthand vector IR shown below (for VF = 4, UF = 1). // // vector.ph: @@ -4226,82 +4222,19 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, // After execution completes the vector loop, we extract the next value of // the recurrence (x) to use as the initial value in the scalar loop. - auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue(); - auto *IdxTy = Builder.getInt32Ty(); - auto *One = ConstantInt::get(IdxTy, 1); - - // Create a vector from the initial value. - auto *VectorInit = ScalarInit; - if (VF.isVector()) { - Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); - auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF); - auto *LastIdx = Builder.CreateSub(RuntimeVF, One); - VectorInit = Builder.CreateInsertElement( - PoisonValue::get(VectorType::get(VectorInit->getType(), VF)), - VectorInit, LastIdx, "vector.recur.init"); - } - - VPValue *PreviousDef = PhiR->getBackedgeValue(); - // We constructed a temporary phi node in the first phase of vectorization. - // This phi node will eventually be deleted. - Builder.SetInsertPoint(cast(State.get(PhiR, 0))); - - // Create a phi node for the new recurrence. The current value will either be - // the initial value inserted into a vector or loop-varying vector value. - auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur"); - VecPhi->addIncoming(VectorInit, LoopVectorPreHeader); - - // Get the vectorized previous value of the last part UF - 1. It appears last - // among all unrolled iterations, due to the order of their construction. - Value *PreviousLastPart = State.get(PreviousDef, UF - 1); - - // Find and set the insertion point after the previous value if it is an - // instruction. - BasicBlock::iterator InsertPt; - // Note that the previous value may have been constant-folded so it is not - // guaranteed to be an instruction in the vector loop. - // FIXME: Loop invariant values do not form recurrences. We should deal with - // them earlier. - if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart)) - InsertPt = LoopVectorBody->getFirstInsertionPt(); - else { - Instruction *PreviousInst = cast(PreviousLastPart); - if (isa(PreviousLastPart)) - // If the previous value is a phi node, we should insert after all the phi - // nodes in the block containing the PHI to avoid breaking basic block - // verification. Note that the basic block may be different to - // LoopVectorBody, in case we predicate the loop. - InsertPt = PreviousInst->getParent()->getFirstInsertionPt(); - else - InsertPt = ++PreviousInst->getIterator(); - } - Builder.SetInsertPoint(&*InsertPt); - - // The vector from which to take the initial value for the current iteration - // (actual or unrolled). Initially, this is the vector phi node. - Value *Incoming = VecPhi; - - // Shuffle the current and previous vector and update the vector parts. - for (unsigned Part = 0; Part < UF; ++Part) { - Value *PreviousPart = State.get(PreviousDef, Part); - Value *PhiPart = State.get(PhiR, Part); - auto *Shuffle = VF.isVector() - ? Builder.CreateVectorSplice(Incoming, PreviousPart, -1) - : Incoming; - PhiPart->replaceAllUsesWith(Shuffle); - cast(PhiPart)->eraseFromParent(); - State.reset(PhiR, Shuffle, Part); - Incoming = PreviousPart; - } + auto *VecPhi = cast(State.get(PhiR, 0)); // Fix the latch value of the new recurrence in the vector loop. + VPValue *PreviousDef = PhiR->getBackedgeValue(); + Value *Incoming = State.get(PreviousDef, UF - 1); VecPhi->addIncoming(Incoming, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); // Extract the last vector element in the middle block. This will be the // initial value for the recurrence when jumping to the scalar loop. auto *ExtractForScalar = Incoming; if (VF.isVector()) { + auto *One = ConstantInt::get(IdxTy, 1); Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF); auto *LastIdx = Builder.CreateSub(RuntimeVF, One); @@ -4330,6 +4263,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, Builder.SetInsertPoint(&*LoopScalarPreHeader->begin()); PHINode *Phi = cast(PhiR->getUnderlyingValue()); auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init"); + auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue(); for (auto *BB : predecessors(LoopScalarPreHeader)) { auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit; Start->addIncoming(Incoming, BB); @@ -4785,18 +4719,6 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, // Phi nodes have cycles, so we need to vectorize them in two stages. This is // stage #1: We create a new vector PHI node with no incoming edges. We'll use // this value when we vectorize all of the instructions that use the PHI. - if (Legal->isFirstOrderRecurrence(P)) { - Type *VecTy = State.VF.isScalar() - ? PN->getType() - : VectorType::get(PN->getType(), State.VF); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *EntryPart = PHINode::Create( - VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); - State.set(PhiR, EntryPart, Part); - } - return; - } assert(!Legal->isReductionVariable(P) && "reductions should be handled elsewhere"); @@ -9076,7 +8998,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, CM.isInLoopReduction(Phi), CM.useOrderedReductions(RdxDesc)); } else { - PhiRecipe = new VPWidenPHIRecipe(Phi, *StartV); + PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV); } // Record the incoming value from the backedge, so we can add the incoming @@ -9317,23 +9239,22 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( // --------------------------------------------------------------------------- // Apply Sink-After legal constraints. + auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { + auto *Region = dyn_cast_or_null(R->getParent()->getParent()); + if (Region && Region->isReplicator()) { + assert(Region->getNumSuccessors() == 1 && + Region->getNumPredecessors() == 1 && "Expected SESE region!"); + assert(R->getParent()->size() == 1 && + "A recipe in an original replicator region must be the only " + "recipe in its block"); + return Region; + } + return nullptr; + }; for (auto &Entry : SinkAfter) { VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); - auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { - auto *Region = - dyn_cast_or_null(R->getParent()->getParent()); - if (Region && Region->isReplicator()) { - assert(Region->getNumSuccessors() == 1 && - Region->getNumPredecessors() == 1 && "Expected SESE region!"); - assert(R->getParent()->size() == 1 && - "A recipe in an original replicator region must be the only " - "recipe in its block"); - return Region; - } - return nullptr; - }; auto *TargetRegion = GetReplicateRegion(Target); auto *SinkRegion = GetReplicateRegion(Sink); if (!SinkRegion) { @@ -9365,8 +9286,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBlockUtils::connectBlocks(SinkRegion, TargetSucc); } else { // The sink source is in a replicate region, we need to move the whole - // replicate region, which should only contain a single recipe in the main - // block. + // replicate region, which should only contain a single recipe in the + // main block. auto *SplitBlock = Target->getParent()->splitAt(std::next(Target->getIterator())); @@ -9380,6 +9301,29 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( } } + // Introduce a recipe to combine the incoming and previous values of a + // first-order recurrence. + for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) { + auto *RecurPhi = dyn_cast(&R); + if (!RecurPhi) + continue; + + auto *RecurSplice = cast( + Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice, + {RecurPhi, RecurPhi->getBackedgeValue()})); + + VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe(); + if (auto *Region = GetReplicateRegion(PrevRecipe)) { + VPBasicBlock *Succ = cast(Region->getSingleSuccessor()); + RecurSplice->moveBefore(*Succ, Succ->getFirstNonPhi()); + } else + RecurSplice->moveAfter(PrevRecipe); + RecurPhi->replaceAllUsesWith(RecurSplice); + // Set the first operand of RecurSplice to RecurPhi again, after replacing + // all users. + RecurSplice->setOperand(0, RecurPhi); + } + // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a // single VPInterleaveRecipe at its insertion point. diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index 344c9974f71..5f39fe1c17a 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -687,6 +687,30 @@ void VPInstruction::generateInstruction(VPTransformState &State, State.set(this, Call, Part); break; } + case VPInstruction::FirstOrderRecurrenceSplice: { + // Generate code to combine the previous and current values in vector v3. + // + // vector.ph: + // v_init = vector(..., ..., ..., a[-1]) + // br vector.body + // + // vector.body + // i = phi [0, vector.ph], [i+4, vector.body] + // v1 = phi [v_init, vector.ph], [v2, vector.body] + // v2 = a[i, i+1, i+2, i+3]; + // v3 = vector(v1(3), v2(0, 1, 2)) + + // For the first part, use the recurrence phi (v1), otherwise v2. + auto *V1 = State.get(getOperand(0), 0); + Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); + if (!PartMinus1->getType()->isVectorTy()) { + State.set(this, PartMinus1, Part); + } else { + Value *V2 = State.get(getOperand(1), Part); + State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part); + } + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -729,7 +753,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; - + case VPInstruction::FirstOrderRecurrenceSplice: + O << "first-order splice"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -1222,6 +1248,43 @@ void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { + auto &Builder = State.Builder; + // Create a vector from the initial value. + auto *VectorInit = getStartValue()->getLiveInIRValue(); + + Type *VecTy = State.VF.isScalar() + ? VectorInit->getType() + : VectorType::get(VectorInit->getType(), State.VF); + + if (State.VF.isVector()) { + auto *IdxTy = Builder.getInt32Ty(); + auto *One = ConstantInt::get(IdxTy, 1); + IRBuilder<>::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); + auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); + auto *LastIdx = Builder.CreateSub(RuntimeVF, One); + VectorInit = Builder.CreateInsertElement( + PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); + } + + // Create a phi node for the new recurrence. + PHINode *EntryPart = PHINode::Create( + VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); + EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader); + State.set(this, EntryPart, 0); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; + printAsOperand(O, SlotTracker); + O << " = phi "; + printOperands(O, SlotTracker); +} +#endif + void VPReductionPHIRecipe::execute(VPTransformState &State) { PHINode *PN = cast(getUnderlyingValue()); auto &Builder = State.Builder; diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index d92ea16ad05..bdf09d15c27 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -776,7 +776,10 @@ class VPInstruction : public VPRecipeBase, public VPValue { public: /// VPlan opcodes, extending LLVM IR with idiomatics instructions. enum { - Not = Instruction::OtherOpsEnd + 1, + FirstOrderRecurrenceSplice = + Instruction::OtherOpsEnd + 1, // Combines the incoming and previous + // values of a first-order recurrence. + Not, ICmpULE, SLPLoad, SLPStore, @@ -1060,8 +1063,12 @@ class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { SmallVector IncomingBlocks; protected: - VPWidenPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi) - : VPRecipeBase(VPDefID, {}), VPValue(VPVID, Phi, this) {} + VPWidenPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi, + VPValue *Start = nullptr) + : VPRecipeBase(VPDefID, {}), VPValue(VPVID, Phi, this) { + if (Start) + addOperand(Start); + } public: /// Create a VPWidenPHIRecipe for \p Phi @@ -1078,10 +1085,12 @@ public: /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *B) { return B->getVPDefID() == VPRecipeBase::VPWidenPHISC || + B->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC || B->getVPDefID() == VPRecipeBase::VPReductionPHISC; } static inline bool classof(const VPValue *V) { return V->getVPValueID() == VPValue::VPVWidenPHISC || + V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC || V->getVPValueID() == VPValue::VPVReductionPHISC; } @@ -1106,6 +1115,12 @@ public: return getOperand(1); } + /// Returns the backedge value as a recipe. The backedge value is guaranteed + /// to be a recipe. + VPRecipeBase *getBackedgeRecipe() { + return cast(getBackedgeValue()->getDef()); + } + /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) { addOperand(IncomingV); @@ -1119,6 +1134,34 @@ public: VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; } }; +/// A recipe for handling first-order recurrence phis. The start value is the +/// first operand of the recipe and the incoming value from the backedge is the +/// second operand. +struct VPFirstOrderRecurrencePHIRecipe : public VPWidenPHIRecipe { + VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start) + : VPWidenPHIRecipe(VPVFirstOrderRecurrencePHISC, + VPFirstOrderRecurrencePHISC, Phi, &Start) {} + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC; + } + static inline bool classof(const VPWidenPHIRecipe *D) { + return D->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC; + } + + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for handling reduction phis. The start value is the first operand /// of the recipe and the incoming value from the backedge is the second /// operand. @@ -1138,10 +1181,9 @@ public: VPReductionPHIRecipe(PHINode *Phi, RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop = false, bool IsOrdered = false) - : VPWidenPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi), + : VPWidenPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi, &Start), RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) { assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop"); - addOperand(&Start); } ~VPReductionPHIRecipe() override = default; diff --git a/lib/Transforms/Vectorize/VPlanTransforms.cpp b/lib/Transforms/Vectorize/VPlanTransforms.cpp index 500a679f618..52b5ae083d0 100644 --- a/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -221,27 +221,6 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { // region. Such dependencies should be rejected during earlier dependence // checks, which guarantee accesses can be re-ordered for vectorization. // - // If a recipe is used by a first-order recurrence phi, we cannot move it at - // the moment: a recipe R feeding a first order recurrence phi must allow - // for a *vector* shuffle to be inserted immediately after it, and therefore - // if R is *scalarized and predicated* it must appear last in its basic - // block. In addition, other recipes may need to "sink after" R, so best if - // R not be moved at all. - auto IsImmovableRecipe = [](VPRecipeBase &R) { - assert(R.getNumDefinedValues() <= 1 && - "no multi-defs are expected in predicated blocks"); - for (VPUser *U : R.getVPSingleValue()->users()) { - auto *UI = dyn_cast(U); - if (!UI) - continue; - if (isa(UI) && !isa(UI)) - return true; - } - return false; - }; - if (any_of(*Then1, IsImmovableRecipe)) - continue; - // Move recipes to the successor region. for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); diff --git a/lib/Transforms/Vectorize/VPlanValue.h b/lib/Transforms/Vectorize/VPlanValue.h index 866b0602e80..fd92201614d 100644 --- a/lib/Transforms/Vectorize/VPlanValue.h +++ b/lib/Transforms/Vectorize/VPlanValue.h @@ -101,6 +101,7 @@ public: // Phi-like VPValues. Need to be kept together. VPVBlendSC, + VPVFirstOrderRecurrencePHISC, VPVWidenPHISC, VPVWidenCanonicalIVSC, VPVWidenIntOrFpInductionSC, @@ -331,6 +332,7 @@ public: // Phi-like recipes. Need to be kept together. VPBlendSC, + VPFirstOrderRecurrencePHISC, VPWidenPHISC, VPWidenCanonicalIVSC, VPWidenIntOrFpInductionSC, diff --git a/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index 8b8ca971cf7..cad57883f41 100644 --- a/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -16,9 +16,9 @@ define void @can_sink_after_store(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -100,9 +100,9 @@ define void @sink_sdiv(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 { ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -183,9 +183,9 @@ define void @can_sink_with_additional_user(i32 %x, i32* %ptr, i64 %tc) { ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -502,7 +502,7 @@ define i16 @multiple_exit(i16* %p, i32 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 @@ -523,7 +523,7 @@ define i16 @multiple_exit(i16* %p, i32 %n) { ; CHECK-NEXT: store i16 [[SCALAR_RECUR]], i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: if.end: ; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[SCALAR_RECUR]], [[FOR_COND]] ] ; CHECK-NEXT: ret i16 [[REC_LCSSA]] @@ -586,7 +586,7 @@ define i16 @multiple_exit2(i16* %p, i32 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 @@ -607,7 +607,7 @@ define i16 @multiple_exit2(i16* %p, i32 %n) { ; CHECK-NEXT: store i16 [[SCALAR_RECUR]], i16* [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: if.end: ; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_COND]] ], [ 10, [[FOR_BODY]] ] ; CHECK-NEXT: ret i16 [[REC_LCSSA]] diff --git a/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 8803a609da5..bbc35761745 100644 --- a/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -10,7 +10,7 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize { ; CHECK-LABEL: sink_replicate_region_1 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI ir<%0> = phi ir<0>, ir<%conv> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: Successor(s): loop.0 @@ -37,6 +37,7 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6> +; CHECK-NEXT: EMIT vp<%8> = first-order splice ir<%0> ir<%conv> ; CHECK-NEXT: Successor(s): pred.srem ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem: { @@ -46,17 +47,17 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize { ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V) +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%8>, ir<%x> (S->V) ; CHECK-NEXT: Successor(s): pred.srem.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1.split ; CHECK-EMPTY: ; CHECK-NEXT: loop.1.split: -; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<%9> +; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<%10> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -83,13 +84,14 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize { ; CHECK-LABEL: sink_replicate_region_2 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: EMIT vp<%5> = first-order splice ir<%recur> ir<%recur.next> ; CHECK-NEXT: Successor(s): loop.0.split ; CHECK-EMPTY: ; CHECK-NEXT: loop.0.split: @@ -102,14 +104,14 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize { ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%5>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1 @@ -141,7 +143,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize ; CHECK-LABEL: sink_replicate_region_3_reduction ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> ; CHECK-NEXT: EMIT vp<%4> = icmp ule ir<%iv> vp<%0> @@ -149,6 +151,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: EMIT vp<%6> = first-order splice ir<%recur> ir<%recur.next> ; CHECK-NEXT: Successor(s): pred.srem ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem: { @@ -158,19 +161,19 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize ; CHECK-NEXT: CondBit: vp<%4> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> (S->V) +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%6>, ir<%x> (S->V) ; CHECK-NEXT: Successor(s): pred.srem.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.0.split ; CHECK-EMPTY: ; CHECK-NEXT: loop.0.split: -; CHECK-NEXT: WIDEN ir<%add> = add vp<%7>, ir<%recur.next> +; CHECK-NEXT: WIDEN ir<%add> = add vp<%8>, ir<%recur.next> ; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add> -; CHECK-NEXT: EMIT vp<%10> = select vp<%4> ir<%and.red.next> ir<%and.red> +; CHECK-NEXT: EMIT vp<%11> = select vp<%4> ir<%and.red.next> ir<%and.red> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -200,7 +203,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8* ; CHECK-LABEL: sink_replicate_region_4_requires_split_at_end_of_block ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI ir<%0> = phi ir<0>, ir<%conv> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> @@ -227,6 +230,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8* ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6> +; CHECK-NEXT: EMIT vp<%8> = first-order splice ir<%0> ir<%conv> ; CHECK-NEXT: Successor(s): loop.1.split ; CHECK: loop.1.split: @@ -239,19 +243,19 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8* ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V) +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%8>, ir<%x> (S->V) ; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue ; CHECK: pred.load.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem> -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.2> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%rem> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%12> = ir<%lv.2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK: loop.2: -; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%10> -; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%11> +; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%11> +; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%12> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -283,7 +287,7 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 ; CHECK-LABEL: sink_replicate_region_after_replicate_region ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: Successor(s): loop.0 @@ -293,6 +297,7 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: EMIT vp<%5> = first-order splice ir<%recur> ir<%recur.next> ; CHECK-NEXT: Successor(s): pred.srem ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem: { @@ -302,11 +307,11 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%5>, ir<%x> ; CHECK-NEXT: Successor(s): pred.srem.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1.split @@ -321,13 +326,13 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%7> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem.div> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%rem.div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.2 diff --git a/test/Transforms/LoopVectorize/first-order-recurrence.ll b/test/Transforms/LoopVectorize/first-order-recurrence.ll index dac44944006..c65f62cef65 100644 --- a/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -337,7 +337,8 @@ for.end: ; UNROLL-NO-IC-LABEL: @constant_folded_previous_value( ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , %vector.ph ], [ , %vector.body ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> +; UNROLL-NO-IC: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> +; CHECK-NO-IC-NEXT: add nuw i64 ; UNROLL-NO-IC: br i1 {{.*}}, label %middle.block, label %vector.body ; define void @constant_folded_previous_value() { @@ -641,10 +642,10 @@ define void @sink_dead_inst() { ; SINK-AFTER-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; SINK-AFTER-NEXT: %vec.ind = phi <4 x i16> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] ; SINK-AFTER-NEXT: %vector.recur = phi <4 x i16> [ , %vector.ph ], [ %3, %vector.body ] -; SINK-AFTER-NEXT: %vector.recur2 = phi <4 x i32> [ , %vector.ph ], [ %1, %vector.body ] +; SINK-AFTER-NEXT: %vector.recur1 = phi <4 x i32> [ , %vector.ph ], [ %1, %vector.body ] ; SINK-AFTER-NEXT: %0 = add <4 x i16> %vec.ind, ; SINK-AFTER-NEXT: %1 = zext <4 x i16> %0 to <4 x i32> -; SINK-AFTER-NEXT: %2 = shufflevector <4 x i32> %vector.recur2, <4 x i32> %1, <4 x i32> +; SINK-AFTER-NEXT: %2 = shufflevector <4 x i32> %vector.recur1, <4 x i32> %1, <4 x i32> ; SINK-AFTER-NEXT: %3 = add <4 x i16> %0, ; SINK-AFTER-NEXT: %4 = shufflevector <4 x i16> %vector.recur, <4 x i16> %3, <4 x i32> ; SINK-AFTER-NEXT: %5 = sub <4 x i16> %4, @@ -704,30 +705,30 @@ define i32 @sink_into_replication_region(i32 %y) { ; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] -; CHECK: pred.udiv.if4: +; CHECK: pred.udiv.if3: ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = udiv i32 219220132, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i32 1 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE5]] -; CHECK: pred.udiv.continue5: +; CHECK: pred.udiv.continue4: ; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF4]] ] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] -; CHECK: pred.udiv.if6: +; CHECK: pred.udiv.if5: ; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], -2 ; CHECK-NEXT: [[TMP14:%.*]] = udiv i32 219220132, [[TMP13]] ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i32 2 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE7]] -; CHECK: pred.udiv.continue7: +; CHECK: pred.udiv.continue6: ; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP15]], [[PRED_UDIV_IF6]] ] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 ; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9]] -; CHECK: pred.udiv.if8: +; CHECK: pred.udiv.if7: ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], -3 ; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP19]], i32 3 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE9]] -; CHECK: pred.udiv.continue9: +; CHECK: pred.udiv.continue8: ; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP20]], [[PRED_UDIV_IF8]] ] ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> ; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI1]], [[TMP22]] @@ -798,27 +799,27 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] -; CHECK: pred.udiv.if5: +; CHECK: pred.udiv.if4: ; CHECK-NEXT: [[TMP11:%.*]] = udiv i32 219220132, [[TMP2]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP11]], i32 1 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE6]] -; CHECK: pred.udiv.continue6: +; CHECK: pred.udiv.continue5: ; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF5]] ] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]] -; CHECK: pred.udiv.if7: +; CHECK: pred.udiv.if6: ; CHECK-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP3]] ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP15]], i32 2 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE8]] -; CHECK: pred.udiv.continue8: +; CHECK: pred.udiv.continue7: ; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP16]], [[PRED_UDIV_IF7]] ] ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 ; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]] -; CHECK: pred.udiv.if9: +; CHECK: pred.udiv.if8: ; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP4]] ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP19]], i32 3 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE10]] -; CHECK: pred.udiv.continue10: +; CHECK: pred.udiv.continue9: ; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP20]], [[PRED_UDIV_IF9]] ] ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> ; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI4]], [[TMP22]] @@ -832,31 +833,31 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] -; CHECK: pred.store.if11: +; CHECK: pred.store.if10: ; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP29]] ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP30]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.continue12: +; CHECK: pred.store.continue11: ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 ; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] -; CHECK: pred.store.if13: +; CHECK: pred.store.if12: ; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP33]] ; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP34]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] -; CHECK: pred.store.continue14: +; CHECK: pred.store.continue13: ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 ; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] -; CHECK: pred.store.if15: +; CHECK: pred.store.if14: ; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[INDEX]], 3 ; CHECK-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP37]] ; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP38]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] -; CHECK: pred.store.continue16: +; CHECK: pred.store.continue15: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], ; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll index 3788aaa72ac..6de7b073734 100644 --- a/test/Transforms/LoopVectorize/induction.ll +++ b/test/Transforms/LoopVectorize/induction.ll @@ -860,10 +860,10 @@ define i64 @trunc_with_first_order_recurrence() { ; CHECK-NEXT: %vec.phi = phi <2 x i64> ; CHECK-NEXT: %vec.ind = phi <2 x i64> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] ; CHECK-NEXT: %vec.ind2 = phi <2 x i32> [ , %vector.ph ], [ %vec.ind.next3, %vector.body ] -; CHECK-NEXT: %vector.recur = phi <2 x i32> [ , %vector.ph ], [ %vec.ind5, %vector.body ] -; CHECK-NEXT: %vec.ind5 = phi <2 x i32> [ , %vector.ph ], [ %vec.ind.next6, %vector.body ] -; CHECK-NEXT: %vec.ind7 = phi <2 x i32> [ , %vector.ph ], [ %vec.ind.next8, %vector.body ] -; CHECK-NEXT: shufflevector <2 x i32> %vector.recur, <2 x i32> %vec.ind5, <2 x i32> +; CHECK-NEXT: %vector.recur = phi <2 x i32> [ , %vector.ph ], [ %vec.ind4, %vector.body ] +; CHECK-NEXT: %vec.ind4 = phi <2 x i32> [ , %vector.ph ], [ %vec.ind.next5, %vector.body ] +; CHECK-NEXT: %vec.ind6 = phi <2 x i32> [ , %vector.ph ], [ %vec.ind.next7, %vector.body ] +; CHECK-NEXT: shufflevector <2 x i32> %vector.recur, <2 x i32> %vec.ind4, <2 x i32> entry: br label %loop diff --git a/test/Transforms/LoopVectorize/interleaved-accesses.ll b/test/Transforms/LoopVectorize/interleaved-accesses.ll index 3ded17f813b..e56b607342e 100644 --- a/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1530,11 +1530,11 @@ define void @PR34743(i16* %a, i32* %b, i64 %n) { ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, <8 x i16>* [[TMP10]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC8]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC8]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC8]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[STRIDED_VEC8]] to <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP11]] ; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* diff --git a/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 0e660a9ea53..ad07c1853d9 100644 --- a/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -172,8 +172,9 @@ define void @constant_folded_previous_value() { ; CHECK-VF4UF2-LABEL: @constant_folded_previous_value ; CHECK-VF4UF2: vector.body ; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi [ %vector.recur.init, %vector.ph ], [ shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer), %vector.body ] -; CHECK-VF4UF2-NEXT: %[[SPLICE1:.*]] = call @llvm.experimental.vector.splice.nxv4i64( %vector.recur, shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer), i32 -1) +; CHECK-VF4UF2: %[[SPLICE1:.*]] = call @llvm.experimental.vector.splice.nxv4i64( %vector.recur, shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer), i32 -1) ; CHECK-VF4UF2: %[[SPLICE2:.*]] = call @llvm.experimental.vector.splice.nxv4i64( shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer), shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer), i32 -1) +; CHECK-VF4UF2: br i1 {{.*}}, label %middle.block, label %vector.body entry: br label %scalar.body @@ -197,15 +198,17 @@ for.end: define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; CHECK-VF4UF2-LABEL: @extract_second_last_iteration ; CHECK-VF4UF2: vector.ph -; CHECK-VF4UF2: %[[SPLAT_INS1:.*]] = insertelement poison, i32 %x, i32 0 -; CHECK-VF4UF2: %[[SPLAT1:.*]] = shufflevector %[[SPLAT_INS1]], poison, zeroinitializer -; CHECK-VF4UF2: %[[SPLAT_INS2:.*]] = insertelement poison, i32 %x, i32 0 -; CHECK-VF4UF2: %[[SPLAT2:.*]] = shufflevector %[[SPLAT_INS2]], poison, zeroinitializer +; CHECK-VF4UF2: call i32 @llvm.vscale.i32() +; CHECK-VF4UF2: call i32 @llvm.vscale.i32() ; CHECK-VF4UF2: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF2: %[[MUL1:.*]] = mul i32 %[[VSCALE1]], 4 ; CHECK-VF4UF2: %[[SUB1:.*]] = sub i32 %[[MUL1]], 1 ; CHECK-VF4UF2: %[[VEC_RECUR_INIT:.*]] = insertelement poison, i32 0, i32 %[[SUB1]] -; CHECK-VF4UF2: vector.body +; CHECK-VF4UF2: %[[SPLAT_INS1:.*]] = insertelement poison, i32 %x, i32 0 +; CHECK-VF4UF2: %[[SPLAT1:.*]] = shufflevector %[[SPLAT_INS1]], poison, zeroinitializer +; CHECK-VF4UF2: %[[SPLAT_INS2:.*]] = insertelement poison, i32 %x, i32 0 +; CHECK-VF4UF2: %[[SPLAT2:.*]] = shufflevector %[[SPLAT_INS2]], poison, zeroinitializer +; ; CHECK-VF4UF2: vector.body ; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi [ %[[VEC_RECUR_INIT]], %vector.ph ], [ %[[ADD2:.*]], %vector.body ] ; CHECK-VF4UF2: %[[ADD1:.*]] = add %{{.*}}, %[[SPLAT1]] ; CHECK-VF4UF2: %[[ADD2]] = add %{{.*}}, %[[SPLAT2]] diff --git a/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 934d55d92ae..13b45d054fe 100644 --- a/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -766,7 +766,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next -; CHECK-NEXT: WIDEN-PHI ir<%for> = phi ir<0>, ir<%lv.a> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, ir<%lv.a> ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> ; CHECK-NEXT: Successor(s): pred.load @@ -788,6 +788,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: +; CHECK-NEXT: EMIT vp<%7> = first-order splice ir<%for> ir<%lv.a> ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: @@ -800,12 +801,12 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%for>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%div> = sdiv vp<%7>, vp<%6> ; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%div> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.2