mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[LV] Apply sink-after & interleave-groups as VPlan transformations (NFCI)
This recommits 11ed1c0239fd51fd2f064311dc7725277ed0a994 (reverted in 9f08ce0d2197d4f163dfa4633eae2347ce8fc881 for failing an assert) with a fix: tryToWidenMemory() now first checks if the widening decision is to interleave, thus maintaining previous behavior where tryToInterleaveMemory() was called first, giving priority to interleave decisions over widening/scalarization. This commit adds the test case that exposed this bug as a LIT.
This commit is contained in:
parent
b62361ac54
commit
baa1dd5bfc
@ -542,13 +542,10 @@ public:
|
||||
/// formation for predicated accesses, we may be able to relax this limitation
|
||||
/// in the future once we handle more complicated blocks.
|
||||
void reset() {
|
||||
SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
|
||||
// Avoid releasing a pointer twice.
|
||||
for (auto &I : InterleaveGroupMap)
|
||||
DelSet.insert(I.second);
|
||||
for (auto *Ptr : DelSet)
|
||||
delete Ptr;
|
||||
InterleaveGroupMap.clear();
|
||||
for (auto *Ptr : InterleaveGroups)
|
||||
delete Ptr;
|
||||
InterleaveGroups.clear();
|
||||
RequiresScalarEpilogue = false;
|
||||
}
|
||||
|
||||
|
@ -201,6 +201,9 @@ class LoopVectorizationPlanner {
|
||||
/// The profitability analysis.
|
||||
LoopVectorizationCostModel &CM;
|
||||
|
||||
/// The interleaved access analysis.
|
||||
InterleavedAccessInfo &IAI;
|
||||
|
||||
SmallVector<VPlanPtr, 4> VPlans;
|
||||
|
||||
/// This class is used to enable the VPlan to invoke a method of ILV. This is
|
||||
@ -223,8 +226,10 @@ public:
|
||||
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
|
||||
const TargetTransformInfo *TTI,
|
||||
LoopVectorizationLegality *Legal,
|
||||
LoopVectorizationCostModel &CM)
|
||||
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
|
||||
LoopVectorizationCostModel &CM,
|
||||
InterleavedAccessInfo &IAI)
|
||||
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
|
||||
IAI(IAI) {}
|
||||
|
||||
/// Plan how to best vectorize, return the best VF and its cost, or None if
|
||||
/// vectorization and interleaving should be avoided up front.
|
||||
|
@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
|
||||
return BlockMaskCache[BB] = BlockMask;
|
||||
}
|
||||
|
||||
VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
|
||||
VFRange &Range,
|
||||
VPlanPtr &Plan) {
|
||||
const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I);
|
||||
if (!IG)
|
||||
return nullptr;
|
||||
|
||||
// Now check if IG is relevant for VF's in the given range.
|
||||
auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> {
|
||||
return [=](unsigned VF) -> bool {
|
||||
return (VF >= 2 && // Query is illegal for VF == 1
|
||||
CM.getWideningDecision(I, VF) ==
|
||||
LoopVectorizationCostModel::CM_Interleave);
|
||||
};
|
||||
};
|
||||
if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range))
|
||||
return nullptr;
|
||||
|
||||
// I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
|
||||
// range. If it's the primary member of the IG construct a VPInterleaveRecipe.
|
||||
// Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
|
||||
assert(I == IG->getInsertPos() &&
|
||||
"Generating a recipe for an adjunct member of an interleave group");
|
||||
|
||||
VPValue *Mask = nullptr;
|
||||
if (Legal->isMaskRequired(I))
|
||||
Mask = createBlockInMask(I->getParent(), Plan);
|
||||
|
||||
return new VPInterleaveRecipe(IG, Mask);
|
||||
}
|
||||
|
||||
VPWidenMemoryInstructionRecipe *
|
||||
VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
|
||||
VPlanPtr &Plan) {
|
||||
@ -6750,15 +6719,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
|
||||
auto willWiden = [&](unsigned VF) -> bool {
|
||||
if (VF == 1)
|
||||
return false;
|
||||
if (CM.isScalarAfterVectorization(I, VF) ||
|
||||
CM.isProfitableToScalarize(I, VF))
|
||||
return false;
|
||||
LoopVectorizationCostModel::InstWidening Decision =
|
||||
CM.getWideningDecision(I, VF);
|
||||
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
|
||||
"CM decision should be taken at this point.");
|
||||
assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
|
||||
"Interleave memory opportunity should be caught earlier.");
|
||||
if (Decision == LoopVectorizationCostModel::CM_Interleave)
|
||||
return true;
|
||||
if (CM.isScalarAfterVectorization(I, VF) ||
|
||||
CM.isProfitableToScalarize(I, VF))
|
||||
return false;
|
||||
return Decision != LoopVectorizationCostModel::CM_Scalarize;
|
||||
};
|
||||
|
||||
@ -6923,15 +6892,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
|
||||
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
|
||||
return false;
|
||||
|
||||
// If this ingredient's recipe is to be recorded, keep its recipe a singleton
|
||||
// to avoid having to split recipes later.
|
||||
bool IsSingleton = Ingredient2Recipe.count(I);
|
||||
|
||||
// Success: widen this instruction. We optimize the common case where
|
||||
// consecutive instructions can be represented by a single recipe.
|
||||
if (!VPBB->empty()) {
|
||||
VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
|
||||
if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
|
||||
return true;
|
||||
}
|
||||
if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() &&
|
||||
LastExtensibleRecipe->appendInstruction(I))
|
||||
return true;
|
||||
|
||||
VPBB->appendRecipe(new VPWidenRecipe(I));
|
||||
VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I);
|
||||
if (!IsSingleton)
|
||||
LastExtensibleRecipe = WidenRecipe;
|
||||
setRecipe(I, WidenRecipe);
|
||||
VPBB->appendRecipe(WidenRecipe);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -6947,6 +6922,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
|
||||
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
|
||||
|
||||
auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
|
||||
setRecipe(I, Recipe);
|
||||
|
||||
// Find if I uses a predicated instruction. If so, it will use its scalar
|
||||
// value. Avoid hoisting the insert-element which packs the scalar value into
|
||||
@ -7005,36 +6981,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
|
||||
bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
|
||||
VPlanPtr &Plan, VPBasicBlock *VPBB) {
|
||||
VPRecipeBase *Recipe = nullptr;
|
||||
// Check if Instr should belong to an interleave memory recipe, or already
|
||||
// does. In the latter case Instr is irrelevant.
|
||||
if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
|
||||
|
||||
// First, check for specific widening recipes that deal with memory
|
||||
// operations, inductions and Phi nodes.
|
||||
if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
|
||||
(Recipe = tryToOptimizeInduction(Instr, Range)) ||
|
||||
(Recipe = tryToBlend(Instr, Plan)) ||
|
||||
(isa<PHINode>(Instr) &&
|
||||
(Recipe = new VPWidenPHIRecipe(cast<PHINode>(Instr))))) {
|
||||
setRecipe(Instr, Recipe);
|
||||
VPBB->appendRecipe(Recipe);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if Instr is a memory operation that should be widened.
|
||||
if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) {
|
||||
VPBB->appendRecipe(Recipe);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if Instr should form some PHI recipe.
|
||||
if ((Recipe = tryToOptimizeInduction(Instr, Range))) {
|
||||
VPBB->appendRecipe(Recipe);
|
||||
return true;
|
||||
}
|
||||
if ((Recipe = tryToBlend(Instr, Plan))) {
|
||||
VPBB->appendRecipe(Recipe);
|
||||
return true;
|
||||
}
|
||||
if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
|
||||
VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if Instr is to be widened by a general VPWidenRecipe, after
|
||||
// having first checked for specific widening recipes that deal with
|
||||
// Interleave Groups, Inductions and Phi nodes.
|
||||
// Check if Instr is to be widened by a general VPWidenRecipe.
|
||||
if (tryToWiden(Instr, VPBB, Range))
|
||||
return true;
|
||||
|
||||
@ -7090,19 +7050,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
|
||||
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
|
||||
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
|
||||
|
||||
// Hold a mapping from predicated instructions to their recipes, in order to
|
||||
// fix their AlsoPack behavior if a user is determined to replicate and use a
|
||||
// scalar instead of vector value.
|
||||
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
|
||||
|
||||
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
|
||||
DenseMap<Instruction *, Instruction *> SinkAfterInverse;
|
||||
|
||||
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
|
||||
|
||||
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pre-construction: record ingredients whose recipes we'll need to further
|
||||
// process after constructing the initial VPlan.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Mark instructions we'll need to sink later and their targets as
|
||||
// ingredients whose recipe we'll need to record.
|
||||
for (auto &Entry : SinkAfter) {
|
||||
RecipeBuilder.recordRecipeOf(Entry.first);
|
||||
RecipeBuilder.recordRecipeOf(Entry.second);
|
||||
}
|
||||
|
||||
// For each interleave group which is relevant for this (possibly trimmed)
|
||||
// Range, add it to the set of groups to be later applied to the VPlan and add
|
||||
// placeholders for its members' Recipes which we'll be replacing with a
|
||||
// single VPInterleaveRecipe.
|
||||
for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
|
||||
auto applyIG = [IG, this](unsigned VF) -> bool {
|
||||
return (VF >= 2 && // Query is illegal for VF == 1
|
||||
CM.getWideningDecision(IG->getInsertPos(), VF) ==
|
||||
LoopVectorizationCostModel::CM_Interleave);
|
||||
};
|
||||
if (!getDecisionAndClampRange(applyIG, Range))
|
||||
continue;
|
||||
InterleaveGroups.insert(IG);
|
||||
for (unsigned i = 0; i < IG->getFactor(); i++)
|
||||
if (Instruction *Member = IG->getMember(i))
|
||||
RecipeBuilder.recordRecipeOf(Member);
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Build initial VPlan: Scan the body of the loop in a topological order to
|
||||
// visit each basic block after having visited its predecessor basic blocks.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
|
||||
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
|
||||
auto Plan = std::make_unique<VPlan>(VPBB);
|
||||
|
||||
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
|
||||
// Represent values that will have defs inside VPlan.
|
||||
for (Value *V : NeedDef)
|
||||
Plan->addVPValue(V);
|
||||
@ -7123,8 +7121,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
|
||||
std::vector<Instruction *> Ingredients;
|
||||
|
||||
// Organize the ingredients to vectorize from current basic block in the
|
||||
// right order.
|
||||
// Introduce each ingredient into VPlan.
|
||||
for (Instruction &I : BB->instructionsWithoutDebug()) {
|
||||
Instruction *Instr = &I;
|
||||
|
||||
@ -7134,43 +7131,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
DeadInstructions.find(Instr) != DeadInstructions.end())
|
||||
continue;
|
||||
|
||||
// I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
|
||||
// member of the IG, do not construct any Recipe for it.
|
||||
const InterleaveGroup<Instruction> *IG =
|
||||
CM.getInterleavedAccessGroup(Instr);
|
||||
if (IG && Instr != IG->getInsertPos() &&
|
||||
Range.Start >= 2 && // Query is illegal for VF == 1
|
||||
CM.getWideningDecision(Instr, Range.Start) ==
|
||||
LoopVectorizationCostModel::CM_Interleave) {
|
||||
auto SinkCandidate = SinkAfterInverse.find(Instr);
|
||||
if (SinkCandidate != SinkAfterInverse.end())
|
||||
Ingredients.push_back(SinkCandidate->second);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Move instructions to handle first-order recurrences, step 1: avoid
|
||||
// handling this instruction until after we've handled the instruction it
|
||||
// should follow.
|
||||
auto SAIt = SinkAfter.find(Instr);
|
||||
if (SAIt != SinkAfter.end()) {
|
||||
LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after"
|
||||
<< *SAIt->second
|
||||
<< " to vectorize a 1st order recurrence.\n");
|
||||
SinkAfterInverse[SAIt->second] = Instr;
|
||||
continue;
|
||||
}
|
||||
|
||||
Ingredients.push_back(Instr);
|
||||
|
||||
// Move instructions to handle first-order recurrences, step 2: push the
|
||||
// instruction to be sunk at its insertion point.
|
||||
auto SAInvIt = SinkAfterInverse.find(Instr);
|
||||
if (SAInvIt != SinkAfterInverse.end())
|
||||
Ingredients.push_back(SAInvIt->second);
|
||||
}
|
||||
|
||||
// Introduce each ingredient into VPlan.
|
||||
for (Instruction *Instr : Ingredients) {
|
||||
if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB))
|
||||
continue;
|
||||
|
||||
@ -7195,6 +7155,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
|
||||
delete PreEntry;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Transform initial VPlan: Apply previously taken decisions, in order, to
|
||||
// bring the VPlan to its final state.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Apply Sink-After legal constraints.
|
||||
for (auto &Entry : SinkAfter) {
|
||||
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
|
||||
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
|
||||
Sink->moveAfter(Target);
|
||||
}
|
||||
|
||||
// Interleave memory: for each Interleave Group we marked earlier as relevant
|
||||
// for this VPlan, replace the Recipes widening its memory instructions with a
|
||||
// single VPInterleaveRecipe at its insertion point.
|
||||
for (auto IG : InterleaveGroups) {
|
||||
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
|
||||
RecipeBuilder.getRecipe(IG->getInsertPos()));
|
||||
(new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe);
|
||||
|
||||
for (unsigned i = 0; i < IG->getFactor(); ++i)
|
||||
if (Instruction *Member = IG->getMember(i)) {
|
||||
RecipeBuilder.getRecipe(Member)->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, if tail is folded by masking, introduce selects between the phi
|
||||
// and the live-out instruction of each reduction, at the end of the latch.
|
||||
if (CM.foldTailByMasking()) {
|
||||
@ -7427,12 +7413,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
|
||||
}
|
||||
|
||||
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
|
||||
if (!User)
|
||||
VPValue *Mask = getMask();
|
||||
if (!Mask)
|
||||
return State.ILV->vectorizeMemoryInstruction(&Instr);
|
||||
|
||||
// Last (and currently only) operand is a mask.
|
||||
InnerLoopVectorizer::VectorParts MaskValues(State.UF);
|
||||
VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
|
||||
for (unsigned Part = 0; Part < State.UF; ++Part)
|
||||
MaskValues[Part] = State.get(Mask, Part);
|
||||
State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
|
||||
@ -7481,7 +7466,7 @@ static bool processLoopInVPlanNativePath(
|
||||
// Use the planner for outer loop vectorization.
|
||||
// TODO: CM is not used at this point inside the planner. Turn CM into an
|
||||
// optional argument if we don't need it in the future.
|
||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
|
||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);
|
||||
|
||||
// Get user vectorization factor.
|
||||
const unsigned UserVF = Hints.getWidth();
|
||||
@ -7641,7 +7626,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
||||
CM.collectValuesToIgnore();
|
||||
|
||||
// Use the planner for vectorization.
|
||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM);
|
||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);
|
||||
|
||||
// Get user vectorization factor.
|
||||
unsigned UserVF = Hints.getWidth();
|
||||
|
@ -47,6 +47,24 @@ class VPRecipeBuilder {
|
||||
EdgeMaskCacheTy EdgeMaskCache;
|
||||
BlockMaskCacheTy BlockMaskCache;
|
||||
|
||||
// VPlan-VPlan transformations support: Hold a mapping from ingredients to
|
||||
// their recipe. To save on memory, only do so for selected ingredients,
|
||||
// marked by having a nullptr entry in this map. If those ingredients get a
|
||||
// VPWidenRecipe, also avoid compressing other ingredients into it to avoid
|
||||
// having to split such recipes later.
|
||||
DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
|
||||
VPWidenRecipe *LastExtensibleRecipe = nullptr;
|
||||
|
||||
/// Set the recipe created for given ingredient. This operation is a no-op for
|
||||
/// ingredients that were not marked using a nullptr entry in the map.
|
||||
void setRecipe(Instruction *I, VPRecipeBase *R) {
|
||||
if (!Ingredient2Recipe.count(I))
|
||||
return;
|
||||
assert(Ingredient2Recipe[I] == nullptr &&
|
||||
"Recipe already set for ingredient");
|
||||
Ingredient2Recipe[I] = R;
|
||||
}
|
||||
|
||||
public:
|
||||
/// A helper function that computes the predicate of the block BB, assuming
|
||||
/// that the header block of the loop is set to True. It returns the *entry*
|
||||
@ -57,16 +75,22 @@ public:
|
||||
/// and DST.
|
||||
VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan);
|
||||
|
||||
/// Check if \I belongs to an Interleave Group within the given VF \p Range,
|
||||
/// \return true in the first returned value if so and false otherwise.
|
||||
/// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG
|
||||
/// for \p Range.Start, and provide it as the second returned value.
|
||||
/// Note that if \I is an adjunct member of an IG for \p Range.Start, the
|
||||
/// \return value is <true, nullptr>, as it is handled by another recipe.
|
||||
/// \p Range.End may be decreased to ensure same decision from \p Range.Start
|
||||
/// to \p Range.End.
|
||||
VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range,
|
||||
VPlanPtr &Plan);
|
||||
/// Mark given ingredient for recording its recipe once one is created for
|
||||
/// it.
|
||||
void recordRecipeOf(Instruction *I) {
|
||||
assert((!Ingredient2Recipe.count(I) || Ingredient2Recipe[I] == nullptr) &&
|
||||
"Recipe already set for ingredient");
|
||||
Ingredient2Recipe[I] = nullptr;
|
||||
}
|
||||
|
||||
/// Return the recipe created for given ingredient.
|
||||
VPRecipeBase *getRecipe(Instruction *I) {
|
||||
assert(Ingredient2Recipe.count(I) &&
|
||||
"Recording this ingredients recipe was not requested");
|
||||
assert(Ingredient2Recipe[I] != nullptr &&
|
||||
"Ingredient doesn't have a recipe");
|
||||
return Ingredient2Recipe[I];
|
||||
}
|
||||
|
||||
/// Check if \I is a memory instruction to be widened for \p Range.Start and
|
||||
/// potentially masked. Such instructions are handled by a recipe that takes
|
||||
|
@ -275,18 +275,35 @@ void VPRegionBlock::execute(VPTransformState *State) {
|
||||
}
|
||||
|
||||
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
|
||||
assert(!Parent && "Recipe already in some VPBasicBlock");
|
||||
assert(InsertPos->getParent() &&
|
||||
"Insertion position not in any VPBasicBlock");
|
||||
Parent = InsertPos->getParent();
|
||||
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
|
||||
}
|
||||
|
||||
void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
|
||||
assert(!Parent && "Recipe already in some VPBasicBlock");
|
||||
assert(InsertPos->getParent() &&
|
||||
"Insertion position not in any VPBasicBlock");
|
||||
Parent = InsertPos->getParent();
|
||||
Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
|
||||
}
|
||||
|
||||
void VPRecipeBase::removeFromParent() {
|
||||
assert(getParent() && "Recipe not in any VPBasicBlock");
|
||||
getParent()->getRecipeList().remove(getIterator());
|
||||
Parent = nullptr;
|
||||
}
|
||||
|
||||
iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
|
||||
assert(getParent() && "Recipe not in any VPBasicBlock");
|
||||
return getParent()->getRecipeList().erase(getIterator());
|
||||
}
|
||||
|
||||
void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
|
||||
InsertPos->getParent()->getRecipeList().splice(
|
||||
std::next(InsertPos->getIterator()), getParent()->getRecipeList(),
|
||||
getIterator());
|
||||
removeFromParent();
|
||||
insertAfter(InsertPos);
|
||||
}
|
||||
|
||||
void VPInstruction::generateInstruction(VPTransformState &State,
|
||||
|
@ -567,6 +567,7 @@ public:
|
||||
/// instructions.
|
||||
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
|
||||
friend VPBasicBlock;
|
||||
friend class VPBlockUtils;
|
||||
|
||||
private:
|
||||
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
|
||||
@ -615,10 +616,18 @@ public:
|
||||
/// the specified recipe.
|
||||
void insertBefore(VPRecipeBase *InsertPos);
|
||||
|
||||
/// Insert an unlinked Recipe into a basic block immediately after
|
||||
/// the specified Recipe.
|
||||
void insertAfter(VPRecipeBase *InsertPos);
|
||||
|
||||
/// Unlink this recipe from its current VPBasicBlock and insert it into
|
||||
/// the VPBasicBlock that MovePos lives in, right after MovePos.
|
||||
void moveAfter(VPRecipeBase *MovePos);
|
||||
|
||||
/// This method unlinks 'this' from the containing basic block, but does not
|
||||
/// delete it.
|
||||
void removeFromParent();
|
||||
|
||||
/// This method unlinks 'this' from the containing basic block and deletes it.
|
||||
///
|
||||
/// \returns an iterator pointing to the element after the erased one
|
||||
@ -973,6 +982,13 @@ public:
|
||||
return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
|
||||
}
|
||||
|
||||
/// Return the mask used by this recipe. Note that a full mask is represented
|
||||
/// by a nullptr.
|
||||
VPValue *getMask() {
|
||||
// Mask is the last operand.
|
||||
return User ? User->getOperand(User->getNumOperands() - 1) : nullptr;
|
||||
}
|
||||
|
||||
/// Generate the wide load/store.
|
||||
void execute(VPTransformState &State) override;
|
||||
|
||||
|
@ -572,3 +572,38 @@ for.body:
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Do not sink branches: While branches are if-converted and do not require
|
||||
; sinking, instructions with side effects (e.g. loads) conditioned by those
|
||||
; branches will become users of the condition bit after vectorization and would
|
||||
; need to be sunk if the loop is vectorized.
|
||||
define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unnamed_addr #0 {
|
||||
; NO-SINK-AFTER-LABEL: do_not_sink_branch
|
||||
; NO-SINK-AFTER-NOT: vector.ph:
|
||||
; NO-SINK-AFTER: }
|
||||
entry:
|
||||
%cmp530 = icmp slt i32 0, %tc
|
||||
br label %for.body4
|
||||
|
||||
for.body4: ; preds = %cond.end, %entry
|
||||
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
|
||||
%cmp534 = phi i1 [ %cmp530, %entry ], [ %cmp5, %cond.end ]
|
||||
br i1 %cmp534, label %cond.true, label %cond.end
|
||||
|
||||
cond.true: ; preds = %for.body4
|
||||
%arrayidx7 = getelementptr inbounds i32, i32* %in, i32 %indvars.iv
|
||||
%in.val = load i32, i32* %arrayidx7, align 4
|
||||
br label %cond.end
|
||||
|
||||
cond.end: ; preds = %for.body4, %cond.true
|
||||
%cond = phi i32 [ %in.val, %cond.true ], [ 0, %for.body4 ]
|
||||
%arrayidx8 = getelementptr inbounds i32, i32* %out, i32 %indvars.iv
|
||||
store i32 %cond, i32* %arrayidx8, align 4
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%cmp5 = icmp slt i32 %indvars.iv.next, %tc
|
||||
%exitcond = icmp eq i32 %indvars.iv.next, %x
|
||||
br i1 %exitcond, label %for.end12.loopexit, label %for.body4
|
||||
|
||||
for.end12.loopexit: ; preds = %cond.end
|
||||
ret void
|
||||
}
|
||||
|
@ -0,0 +1,49 @@
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true < %s
|
||||
|
||||
; Make sure the vectorizer can handle this loop: The strided load is only used
|
||||
; by the loop's exit condition, which is not vectorized, and is therefore
|
||||
; considered uniform while also forming an interleave group.
|
||||
|
||||
%0 = type { i32 ()*, i32 }
|
||||
|
||||
@0 = internal unnamed_addr constant [59 x %0] [%0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 {i32 ()* null, i32 258}, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
|
||||
%0 zeroinitializer], align 8
|
||||
|
||||
define dso_local void @test_dead_load(i32 %arg) {
|
||||
; CHECK-LABEL: @test_dead_load(
|
||||
; CHECK: vector.body:
|
||||
; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* %3, align 8
|
||||
; CHECK: %strided.vec = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
|
||||
bb1:
|
||||
br label %bb2
|
||||
|
||||
bb2:
|
||||
%tmp = phi %0* [ %tmp6, %bb2 ], [ getelementptr inbounds ([59 x %0], [59 x %0]* @0, i64 0, i64 0), %bb1 ]
|
||||
%tmp3 = getelementptr inbounds %0, %0* %tmp, i64 0, i32 1
|
||||
%tmp4 = load i32, i32* %tmp3, align 8
|
||||
%tmp5 = icmp eq i32 %tmp4, 258
|
||||
%tmp6 = getelementptr inbounds %0, %0* %tmp, i64 1
|
||||
br i1 %tmp5, label %bb65, label %bb2
|
||||
|
||||
bb65:
|
||||
unreachable
|
||||
}
|
@ -83,6 +83,7 @@ TEST(VPInstructionTest, moveAfter) {
|
||||
|
||||
CHECK_ITERATOR(VPBB1, I2, I1);
|
||||
CHECK_ITERATOR(VPBB2, I4, I3, I5);
|
||||
EXPECT_EQ(I3->getParent(), I4->getParent());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user