From baa1dd5bfcb9bc9caa29c56dcf972a76722dea22 Mon Sep 17 00:00:00 2001 From: Gil Rapaport Date: Mon, 7 Oct 2019 17:24:33 +0300 Subject: [PATCH] [LV] Apply sink-after & interleave-groups as VPlan transformations (NFCI) This recommits 11ed1c0239fd51fd2f064311dc7725277ed0a994 (reverted in 9f08ce0d2197d4f163dfa4633eae2347ce8fc881 for failing an assert) with a fix: tryToWidenMemory() now first checks if the widening decision is to interleave, thus maintaining previous behavior where tryToInterleaveMemory() was called first, giving priority to interleave decisions over widening/scalarization. This commit adds the test case that exposed this bug as a LIT. --- include/llvm/Analysis/VectorUtils.h | 9 +- .../Vectorize/LoopVectorizationPlanner.h | 9 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 213 ++++++++---------- lib/Transforms/Vectorize/VPRecipeBuilder.h | 44 +++- lib/Transforms/Vectorize/VPlan.cpp | 23 +- lib/Transforms/Vectorize/VPlan.h | 16 ++ .../LoopVectorize/first-order-recurrence.ll | 35 +++ .../interleaved-accesses-uniform-load.ll | 49 ++++ unittests/Transforms/Vectorize/VPlanTest.cpp | 1 + 9 files changed, 264 insertions(+), 135 deletions(-) create mode 100644 test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h index 4a61c2bc35c..5dc14dbe657 100644 --- a/include/llvm/Analysis/VectorUtils.h +++ b/include/llvm/Analysis/VectorUtils.h @@ -542,13 +542,10 @@ public: /// formation for predicated accesses, we may be able to relax this limitation /// in the future once we handle more complicated blocks. void reset() { - SmallPtrSet *, 4> DelSet; - // Avoid releasing a pointer twice. - for (auto &I : InterleaveGroupMap) - DelSet.insert(I.second); - for (auto *Ptr : DelSet) - delete Ptr; InterleaveGroupMap.clear(); + for (auto *Ptr : InterleaveGroups) + delete Ptr; + InterleaveGroups.clear(); RequiresScalarEpilogue = false; } diff --git a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index a5e85f27fab..614f931cbc6 100644 --- a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -201,6 +201,9 @@ class LoopVectorizationPlanner { /// The profitability analysis. LoopVectorizationCostModel &CM; + /// The interleaved access analysis. + InterleavedAccessInfo &IAI; + SmallVector VPlans; /// This class is used to enable the VPlan to invoke a method of ILV. This is @@ -223,8 +226,10 @@ public: LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, LoopVectorizationLegality *Legal, - LoopVectorizationCostModel &CM) - : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} + LoopVectorizationCostModel &CM, + InterleavedAccessInfo &IAI) + : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), + IAI(IAI) {} /// Plan how to best vectorize, return the best VF and its cost, or None if /// vectorization and interleaving should be avoided up front. diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 9368dd7c8b1..5c7ff8d76b4 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { return BlockMaskCache[BB] = BlockMask; } -VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I, - VFRange &Range, - VPlanPtr &Plan) { - const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I); - if (!IG) - return nullptr; - - // Now check if IG is relevant for VF's in the given range. - auto isIGMember = [&](Instruction *I) -> std::function { - return [=](unsigned VF) -> bool { - return (VF >= 2 && // Query is illegal for VF == 1 - CM.getWideningDecision(I, VF) == - LoopVectorizationCostModel::CM_Interleave); - }; - }; - if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range)) - return nullptr; - - // I is a member of an InterleaveGroup for VF's in the (possibly trimmed) - // range. If it's the primary member of the IG construct a VPInterleaveRecipe. - // Otherwise, it's an adjunct member of the IG, do not construct any Recipe. - assert(I == IG->getInsertPos() && - "Generating a recipe for an adjunct member of an interleave group"); - - VPValue *Mask = nullptr; - if (Legal->isMaskRequired(I)) - Mask = createBlockInMask(I->getParent(), Plan); - - return new VPInterleaveRecipe(IG, Mask); -} - VPWidenMemoryInstructionRecipe * VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan) { @@ -6750,15 +6719,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, auto willWiden = [&](unsigned VF) -> bool { if (VF == 1) return false; - if (CM.isScalarAfterVectorization(I, VF) || - CM.isProfitableToScalarize(I, VF)) - return false; LoopVectorizationCostModel::InstWidening Decision = CM.getWideningDecision(I, VF); assert(Decision != LoopVectorizationCostModel::CM_Unknown && "CM decision should be taken at this point."); - assert(Decision != LoopVectorizationCostModel::CM_Interleave && - "Interleave memory opportunity should be caught earlier."); + if (Decision == LoopVectorizationCostModel::CM_Interleave) + return true; + if (CM.isScalarAfterVectorization(I, VF) || + CM.isProfitableToScalarize(I, VF)) + return false; return Decision != LoopVectorizationCostModel::CM_Scalarize; }; @@ -6923,15 +6892,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB, if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) return false; + // If this ingredient's recipe is to be recorded, keep its recipe a singleton + // to avoid having to split recipes later. + bool IsSingleton = Ingredient2Recipe.count(I); + // Success: widen this instruction. We optimize the common case where // consecutive instructions can be represented by a single recipe. - if (!VPBB->empty()) { - VPWidenRecipe *LastWidenRecipe = dyn_cast(&VPBB->back()); - if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I)) - return true; - } + if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() && + LastExtensibleRecipe->appendInstruction(I)) + return true; - VPBB->appendRecipe(new VPWidenRecipe(I)); + VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I); + if (!IsSingleton) + LastExtensibleRecipe = WidenRecipe; + setRecipe(I, WidenRecipe); + VPBB->appendRecipe(WidenRecipe); return true; } @@ -6947,6 +6922,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range); auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated); + setRecipe(I, Recipe); // Find if I uses a predicated instruction. If so, it will use its scalar // value. Avoid hoisting the insert-element which packs the scalar value into @@ -7005,36 +6981,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr, bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range, VPlanPtr &Plan, VPBasicBlock *VPBB) { VPRecipeBase *Recipe = nullptr; - // Check if Instr should belong to an interleave memory recipe, or already - // does. In the latter case Instr is irrelevant. - if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) { + + // First, check for specific widening recipes that deal with memory + // operations, inductions and Phi nodes. + if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) || + (Recipe = tryToOptimizeInduction(Instr, Range)) || + (Recipe = tryToBlend(Instr, Plan)) || + (isa(Instr) && + (Recipe = new VPWidenPHIRecipe(cast(Instr))))) { + setRecipe(Instr, Recipe); VPBB->appendRecipe(Recipe); return true; } - // Check if Instr is a memory operation that should be widened. - if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) { - VPBB->appendRecipe(Recipe); - return true; - } - - // Check if Instr should form some PHI recipe. - if ((Recipe = tryToOptimizeInduction(Instr, Range))) { - VPBB->appendRecipe(Recipe); - return true; - } - if ((Recipe = tryToBlend(Instr, Plan))) { - VPBB->appendRecipe(Recipe); - return true; - } - if (PHINode *Phi = dyn_cast(Instr)) { - VPBB->appendRecipe(new VPWidenPHIRecipe(Phi)); - return true; - } - - // Check if Instr is to be widened by a general VPWidenRecipe, after - // having first checked for specific widening recipes that deal with - // Interleave Groups, Inductions and Phi nodes. + // Check if Instr is to be widened by a general VPWidenRecipe. if (tryToWiden(Instr, VPBB, Range)) return true; @@ -7090,19 +7050,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl &NeedDef, SmallPtrSetImpl &DeadInstructions) { + // Hold a mapping from predicated instructions to their recipes, in order to // fix their AlsoPack behavior if a user is determined to replicate and use a // scalar instead of vector value. DenseMap PredInst2Recipe; DenseMap &SinkAfter = Legal->getSinkAfter(); - DenseMap SinkAfterInverse; + + SmallPtrSet *, 1> InterleaveGroups; + + VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); + + // --------------------------------------------------------------------------- + // Pre-construction: record ingredients whose recipes we'll need to further + // process after constructing the initial VPlan. + // --------------------------------------------------------------------------- + + // Mark instructions we'll need to sink later and their targets as + // ingredients whose recipe we'll need to record. + for (auto &Entry : SinkAfter) { + RecipeBuilder.recordRecipeOf(Entry.first); + RecipeBuilder.recordRecipeOf(Entry.second); + } + + // For each interleave group which is relevant for this (possibly trimmed) + // Range, add it to the set of groups to be later applied to the VPlan and add + // placeholders for its members' Recipes which we'll be replacing with a + // single VPInterleaveRecipe. + for (InterleaveGroup *IG : IAI.getInterleaveGroups()) { + auto applyIG = [IG, this](unsigned VF) -> bool { + return (VF >= 2 && // Query is illegal for VF == 1 + CM.getWideningDecision(IG->getInsertPos(), VF) == + LoopVectorizationCostModel::CM_Interleave); + }; + if (!getDecisionAndClampRange(applyIG, Range)) + continue; + InterleaveGroups.insert(IG); + for (unsigned i = 0; i < IG->getFactor(); i++) + if (Instruction *Member = IG->getMember(i)) + RecipeBuilder.recordRecipeOf(Member); + }; + + // --------------------------------------------------------------------------- + // Build initial VPlan: Scan the body of the loop in a topological order to + // visit each basic block after having visited its predecessor basic blocks. + // --------------------------------------------------------------------------- // Create a dummy pre-entry VPBasicBlock to start building the VPlan. VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); auto Plan = std::make_unique(VPBB); - VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); // Represent values that will have defs inside VPlan. for (Value *V : NeedDef) Plan->addVPValue(V); @@ -7123,8 +7121,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( std::vector Ingredients; - // Organize the ingredients to vectorize from current basic block in the - // right order. + // Introduce each ingredient into VPlan. for (Instruction &I : BB->instructionsWithoutDebug()) { Instruction *Instr = &I; @@ -7134,43 +7131,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( DeadInstructions.find(Instr) != DeadInstructions.end()) continue; - // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct - // member of the IG, do not construct any Recipe for it. - const InterleaveGroup *IG = - CM.getInterleavedAccessGroup(Instr); - if (IG && Instr != IG->getInsertPos() && - Range.Start >= 2 && // Query is illegal for VF == 1 - CM.getWideningDecision(Instr, Range.Start) == - LoopVectorizationCostModel::CM_Interleave) { - auto SinkCandidate = SinkAfterInverse.find(Instr); - if (SinkCandidate != SinkAfterInverse.end()) - Ingredients.push_back(SinkCandidate->second); - continue; - } - - // Move instructions to handle first-order recurrences, step 1: avoid - // handling this instruction until after we've handled the instruction it - // should follow. - auto SAIt = SinkAfter.find(Instr); - if (SAIt != SinkAfter.end()) { - LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after" - << *SAIt->second - << " to vectorize a 1st order recurrence.\n"); - SinkAfterInverse[SAIt->second] = Instr; - continue; - } - - Ingredients.push_back(Instr); - - // Move instructions to handle first-order recurrences, step 2: push the - // instruction to be sunk at its insertion point. - auto SAInvIt = SinkAfterInverse.find(Instr); - if (SAInvIt != SinkAfterInverse.end()) - Ingredients.push_back(SAInvIt->second); - } - - // Introduce each ingredient into VPlan. - for (Instruction *Instr : Ingredients) { if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB)) continue; @@ -7195,6 +7155,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBlockUtils::disconnectBlocks(PreEntry, Entry); delete PreEntry; + // --------------------------------------------------------------------------- + // Transform initial VPlan: Apply previously taken decisions, in order, to + // bring the VPlan to its final state. + // --------------------------------------------------------------------------- + + // Apply Sink-After legal constraints. + for (auto &Entry : SinkAfter) { + VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); + VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); + Sink->moveAfter(Target); + } + + // Interleave memory: for each Interleave Group we marked earlier as relevant + // for this VPlan, replace the Recipes widening its memory instructions with a + // single VPInterleaveRecipe at its insertion point. + for (auto IG : InterleaveGroups) { + auto *Recipe = cast( + RecipeBuilder.getRecipe(IG->getInsertPos())); + (new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe); + + for (unsigned i = 0; i < IG->getFactor(); ++i) + if (Instruction *Member = IG->getMember(i)) { + RecipeBuilder.getRecipe(Member)->eraseFromParent(); + } + } + // Finally, if tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the end of the latch. if (CM.foldTailByMasking()) { @@ -7427,12 +7413,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { } void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { - if (!User) + VPValue *Mask = getMask(); + if (!Mask) return State.ILV->vectorizeMemoryInstruction(&Instr); - // Last (and currently only) operand is a mask. InnerLoopVectorizer::VectorParts MaskValues(State.UF); - VPValue *Mask = User->getOperand(User->getNumOperands() - 1); for (unsigned Part = 0; Part < State.UF; ++Part) MaskValues[Part] = State.get(Mask, Part); State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); @@ -7481,7 +7466,7 @@ static bool processLoopInVPlanNativePath( // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an // optional argument if we don't need it in the future. - LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM); + LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI); // Get user vectorization factor. const unsigned UserVF = Hints.getWidth(); @@ -7641,7 +7626,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { CM.collectValuesToIgnore(); // Use the planner for vectorization. - LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM); + LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI); // Get user vectorization factor. unsigned UserVF = Hints.getWidth(); diff --git a/lib/Transforms/Vectorize/VPRecipeBuilder.h b/lib/Transforms/Vectorize/VPRecipeBuilder.h index 0ca6a6b93cf..598fb00e956 100644 --- a/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -47,6 +47,24 @@ class VPRecipeBuilder { EdgeMaskCacheTy EdgeMaskCache; BlockMaskCacheTy BlockMaskCache; + // VPlan-VPlan transformations support: Hold a mapping from ingredients to + // their recipe. To save on memory, only do so for selected ingredients, + // marked by having a nullptr entry in this map. If those ingredients get a + // VPWidenRecipe, also avoid compressing other ingredients into it to avoid + // having to split such recipes later. + DenseMap Ingredient2Recipe; + VPWidenRecipe *LastExtensibleRecipe = nullptr; + + /// Set the recipe created for given ingredient. This operation is a no-op for + /// ingredients that were not marked using a nullptr entry in the map. + void setRecipe(Instruction *I, VPRecipeBase *R) { + if (!Ingredient2Recipe.count(I)) + return; + assert(Ingredient2Recipe[I] == nullptr && + "Recipe already set for ingredient"); + Ingredient2Recipe[I] = R; + } + public: /// A helper function that computes the predicate of the block BB, assuming /// that the header block of the loop is set to True. It returns the *entry* @@ -57,16 +75,22 @@ public: /// and DST. VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); - /// Check if \I belongs to an Interleave Group within the given VF \p Range, - /// \return true in the first returned value if so and false otherwise. - /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG - /// for \p Range.Start, and provide it as the second returned value. - /// Note that if \I is an adjunct member of an IG for \p Range.Start, the - /// \return value is , as it is handled by another recipe. - /// \p Range.End may be decreased to ensure same decision from \p Range.Start - /// to \p Range.End. - VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range, - VPlanPtr &Plan); + /// Mark given ingredient for recording its recipe once one is created for + /// it. + void recordRecipeOf(Instruction *I) { + assert((!Ingredient2Recipe.count(I) || Ingredient2Recipe[I] == nullptr) && + "Recipe already set for ingredient"); + Ingredient2Recipe[I] = nullptr; + } + + /// Return the recipe created for given ingredient. + VPRecipeBase *getRecipe(Instruction *I) { + assert(Ingredient2Recipe.count(I) && + "Recording this ingredients recipe was not requested"); + assert(Ingredient2Recipe[I] != nullptr && + "Ingredient doesn't have a recipe"); + return Ingredient2Recipe[I]; + } /// Check if \I is a memory instruction to be widened for \p Range.Start and /// potentially masked. Such instructions are handled by a recipe that takes diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index 4b80d1fb20a..bc32e54be72 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -275,18 +275,35 @@ void VPRegionBlock::execute(VPTransformState *State) { } void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { + assert(!Parent && "Recipe already in some VPBasicBlock"); + assert(InsertPos->getParent() && + "Insertion position not in any VPBasicBlock"); Parent = InsertPos->getParent(); Parent->getRecipeList().insert(InsertPos->getIterator(), this); } +void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { + assert(!Parent && "Recipe already in some VPBasicBlock"); + assert(InsertPos->getParent() && + "Insertion position not in any VPBasicBlock"); + Parent = InsertPos->getParent(); + Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); +} + +void VPRecipeBase::removeFromParent() { + assert(getParent() && "Recipe not in any VPBasicBlock"); + getParent()->getRecipeList().remove(getIterator()); + Parent = nullptr; +} + iplist::iterator VPRecipeBase::eraseFromParent() { + assert(getParent() && "Recipe not in any VPBasicBlock"); return getParent()->getRecipeList().erase(getIterator()); } void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { - InsertPos->getParent()->getRecipeList().splice( - std::next(InsertPos->getIterator()), getParent()->getRecipeList(), - getIterator()); + removeFromParent(); + insertAfter(InsertPos); } void VPInstruction::generateInstruction(VPTransformState &State, diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index 6eeec0f21fd..226c6c0279d 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -567,6 +567,7 @@ public: /// instructions. class VPRecipeBase : public ilist_node_with_parent { friend VPBasicBlock; + friend class VPBlockUtils; private: const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -615,10 +616,18 @@ public: /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); + /// Insert an unlinked Recipe into a basic block immediately after + /// the specified Recipe. + void insertAfter(VPRecipeBase *InsertPos); + /// Unlink this recipe from its current VPBasicBlock and insert it into /// the VPBasicBlock that MovePos lives in, right after MovePos. void moveAfter(VPRecipeBase *MovePos); + /// This method unlinks 'this' from the containing basic block, but does not + /// delete it. + void removeFromParent(); + /// This method unlinks 'this' from the containing basic block and deletes it. /// /// \returns an iterator pointing to the element after the erased one @@ -973,6 +982,13 @@ public: return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; } + /// Return the mask used by this recipe. Note that a full mask is represented + /// by a nullptr. + VPValue *getMask() { + // Mask is the last operand. + return User ? User->getOperand(User->getNumOperands() - 1) : nullptr; + } + /// Generate the wide load/store. void execute(VPTransformState &State) override; diff --git a/test/Transforms/LoopVectorize/first-order-recurrence.ll b/test/Transforms/LoopVectorize/first-order-recurrence.ll index 998f412674b..f86dcd7e2e7 100644 --- a/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -572,3 +572,38 @@ for.body: for.end: ret void } + +; Do not sink branches: While branches are if-converted and do not require +; sinking, instructions with side effects (e.g. loads) conditioned by those +; branches will become users of the condition bit after vectorization and would +; need to be sunk if the loop is vectorized. +define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unnamed_addr #0 { +; NO-SINK-AFTER-LABEL: do_not_sink_branch +; NO-SINK-AFTER-NOT: vector.ph: +; NO-SINK-AFTER: } +entry: + %cmp530 = icmp slt i32 0, %tc + br label %for.body4 + +for.body4: ; preds = %cond.end, %entry + %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %cond.end ] + %cmp534 = phi i1 [ %cmp530, %entry ], [ %cmp5, %cond.end ] + br i1 %cmp534, label %cond.true, label %cond.end + +cond.true: ; preds = %for.body4 + %arrayidx7 = getelementptr inbounds i32, i32* %in, i32 %indvars.iv + %in.val = load i32, i32* %arrayidx7, align 4 + br label %cond.end + +cond.end: ; preds = %for.body4, %cond.true + %cond = phi i32 [ %in.val, %cond.true ], [ 0, %for.body4 ] + %arrayidx8 = getelementptr inbounds i32, i32* %out, i32 %indvars.iv + store i32 %cond, i32* %arrayidx8, align 4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %cmp5 = icmp slt i32 %indvars.iv.next, %tc + %exitcond = icmp eq i32 %indvars.iv.next, %x + br i1 %exitcond, label %for.end12.loopexit, label %for.body4 + +for.end12.loopexit: ; preds = %cond.end + ret void +} diff --git a/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll b/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll new file mode 100644 index 00000000000..b56470cec0a --- /dev/null +++ b/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll @@ -0,0 +1,49 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true < %s + +; Make sure the vectorizer can handle this loop: The strided load is only used +; by the loop's exit condition, which is not vectorized, and is therefore +; considered uniform while also forming an interleave group. + +%0 = type { i32 ()*, i32 } + +@0 = internal unnamed_addr constant [59 x %0] [%0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 {i32 ()* null, i32 258}, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer, +%0 zeroinitializer], align 8 + +define dso_local void @test_dead_load(i32 %arg) { +; CHECK-LABEL: @test_dead_load( +; CHECK: vector.body: +; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* %3, align 8 +; CHECK: %strided.vec = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> +bb1: + br label %bb2 + +bb2: + %tmp = phi %0* [ %tmp6, %bb2 ], [ getelementptr inbounds ([59 x %0], [59 x %0]* @0, i64 0, i64 0), %bb1 ] + %tmp3 = getelementptr inbounds %0, %0* %tmp, i64 0, i32 1 + %tmp4 = load i32, i32* %tmp3, align 8 + %tmp5 = icmp eq i32 %tmp4, 258 + %tmp6 = getelementptr inbounds %0, %0* %tmp, i64 1 + br i1 %tmp5, label %bb65, label %bb2 + +bb65: + unreachable +} diff --git a/unittests/Transforms/Vectorize/VPlanTest.cpp b/unittests/Transforms/Vectorize/VPlanTest.cpp index 57567e7d843..67936a83efa 100644 --- a/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -83,6 +83,7 @@ TEST(VPInstructionTest, moveAfter) { CHECK_ITERATOR(VPBB1, I2, I1); CHECK_ITERATOR(VPBB2, I4, I3, I5); + EXPECT_EQ(I3->getParent(), I4->getParent()); } } // namespace