mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[VPlan] Introduce VPWidenSelectRecipe (NFC).
Widening a selects depends on whether the condition is loop invariant or not. Rather than checking during codegen-time, the information can be recorded at the VPlan construction time. This was suggested as part of D76992, to reduce the reliance on accessing the original underlying IR values. Reviewers: gilr, rengolin, Ayal, hsaito Reviewed By: gilr Differential Revision: https://reviews.llvm.org/D77869
This commit is contained in:
parent
a2c586c698
commit
7d6477a593
@ -31,6 +31,8 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class PredicatedScalarEvolution;
|
||||
|
||||
/// VPlan-based builder utility analogous to IRBuilder.
|
||||
class VPBuilder {
|
||||
VPBasicBlock *BB = nullptr;
|
||||
@ -203,6 +205,8 @@ class LoopVectorizationPlanner {
|
||||
/// The interleaved access analysis.
|
||||
InterleavedAccessInfo &IAI;
|
||||
|
||||
PredicatedScalarEvolution &PSE;
|
||||
|
||||
SmallVector<VPlanPtr, 4> VPlans;
|
||||
|
||||
/// This class is used to enable the VPlan to invoke a method of ILV. This is
|
||||
@ -228,9 +232,10 @@ public:
|
||||
const TargetTransformInfo *TTI,
|
||||
LoopVectorizationLegality *Legal,
|
||||
LoopVectorizationCostModel &CM,
|
||||
InterleavedAccessInfo &IAI)
|
||||
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
|
||||
IAI(IAI) {}
|
||||
InterleavedAccessInfo &IAI,
|
||||
PredicatedScalarEvolution &PSE)
|
||||
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
|
||||
PSE(PSE) {}
|
||||
|
||||
/// Plan how to best vectorize, return the best VF and its cost, or None if
|
||||
/// vectorization and interleaving should be avoided up front.
|
||||
|
@ -413,6 +413,9 @@ public:
|
||||
void widenCallInstruction(CallInst &I, VPUser &ArgOperands,
|
||||
VPTransformState &State);
|
||||
|
||||
/// Widen a single select instruction within the innermost loop.
|
||||
void widenSelectInstruction(SelectInst &I, bool InvariantCond);
|
||||
|
||||
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
|
||||
void fixVectorizedLoop();
|
||||
|
||||
@ -4232,6 +4235,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
|
||||
case Instruction::Br:
|
||||
case Instruction::PHI:
|
||||
case Instruction::GetElementPtr:
|
||||
case Instruction::Select:
|
||||
llvm_unreachable("This instruction is handled by a different recipe.");
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
@ -4272,35 +4276,6 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
|
||||
|
||||
break;
|
||||
}
|
||||
case Instruction::Select: {
|
||||
// Widen selects.
|
||||
// If the selector is loop invariant we can create a select
|
||||
// instruction with a scalar condition. Otherwise, use vector-select.
|
||||
auto *SE = PSE.getSE();
|
||||
bool InvariantCond =
|
||||
SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop);
|
||||
setDebugLocFromInst(Builder, &I);
|
||||
|
||||
// The condition can be loop invariant but still defined inside the
|
||||
// loop. This means that we can't just use the original 'cond' value.
|
||||
// We have to take the 'vectorized' value and pick the first lane.
|
||||
// Instcombine will make this a no-op.
|
||||
|
||||
auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0});
|
||||
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
|
||||
Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
|
||||
Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
|
||||
Value *Sel =
|
||||
Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
|
||||
VectorLoopValueMap.setVectorValue(&I, Part, Sel);
|
||||
addMetadata(Sel, &I);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp: {
|
||||
// Widen compares. Generate vector compares.
|
||||
@ -4433,6 +4408,28 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
|
||||
}
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I,
|
||||
bool InvariantCond) {
|
||||
setDebugLocFromInst(Builder, &I);
|
||||
|
||||
// The condition can be loop invariant but still defined inside the
|
||||
// loop. This means that we can't just use the original 'cond' value.
|
||||
// We have to take the 'vectorized' value and pick the first lane.
|
||||
// Instcombine will make this a no-op.
|
||||
|
||||
auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0});
|
||||
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
|
||||
Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
|
||||
Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
|
||||
Value *Sel =
|
||||
Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
|
||||
VectorLoopValueMap.setVectorValue(&I, Part, Sel);
|
||||
addMetadata(Sel, &I);
|
||||
}
|
||||
}
|
||||
|
||||
void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
|
||||
// We should not collect Scalars more than once per VF. Right now, this
|
||||
// function is called from collectUniformsAndScalars(), which already does
|
||||
@ -6937,6 +6934,29 @@ VPRecipeBuilder::tryToWidenCall(Instruction *I, VFRange &Range, VPlan &Plan) {
|
||||
return new VPWidenCallRecipe(*CI, VPValues);
|
||||
}
|
||||
|
||||
VPWidenSelectRecipe *VPRecipeBuilder::tryToWidenSelect(Instruction *I,
|
||||
VFRange &Range) {
|
||||
auto *SI = dyn_cast<SelectInst>(I);
|
||||
if (!SI)
|
||||
return nullptr;
|
||||
|
||||
// SI should be widened, unless it is scalar after vectorization,
|
||||
// scalarization is profitable or it is predicated.
|
||||
auto willWiden = [this, SI](unsigned VF) -> bool {
|
||||
return !CM.isScalarAfterVectorization(SI, VF) &&
|
||||
!CM.isProfitableToScalarize(SI, VF) &&
|
||||
!CM.isScalarWithPredication(SI, VF);
|
||||
};
|
||||
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
|
||||
return nullptr;
|
||||
|
||||
auto *SE = PSE.getSE();
|
||||
bool InvariantCond =
|
||||
SE->isLoopInvariant(PSE.getSCEV(SI->getOperand(0)), OrigLoop);
|
||||
// Success: widen this instruction.
|
||||
return new VPWidenSelectRecipe(*SI, InvariantCond);
|
||||
}
|
||||
|
||||
VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, VFRange &Range) {
|
||||
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
|
||||
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
|
||||
@ -7088,6 +7108,7 @@ bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
|
||||
// operations, inductions and Phi nodes.
|
||||
if ((Recipe = tryToWidenCall(Instr, Range, *Plan)) ||
|
||||
(Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
|
||||
(Recipe = tryToWidenSelect(Instr, Range)) ||
|
||||
(Recipe = tryToOptimizeInduction(Instr, Range)) ||
|
||||
(Recipe = tryToBlend(Instr, Plan)) ||
|
||||
(isa<PHINode>(Instr) &&
|
||||
@ -7194,7 +7215,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
|
||||
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
|
||||
|
||||
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
|
||||
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pre-construction: record ingredients whose recipes we'll need to further
|
||||
@ -7410,6 +7431,10 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
|
||||
State.ILV->widenCallInstruction(Ingredient, User, State);
|
||||
}
|
||||
|
||||
void VPWidenSelectRecipe::execute(VPTransformState &State) {
|
||||
State.ILV->widenSelectInstruction(Ingredient, InvariantCond);
|
||||
}
|
||||
|
||||
void VPWidenRecipe::execute(VPTransformState &State) {
|
||||
State.ILV->widenInstruction(Ingredient);
|
||||
}
|
||||
@ -7620,7 +7645,7 @@ static bool processLoopInVPlanNativePath(
|
||||
// Use the planner for outer loop vectorization.
|
||||
// TODO: CM is not used at this point inside the planner. Turn CM into an
|
||||
// optional argument if we don't need it in the future.
|
||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);
|
||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE);
|
||||
|
||||
// Get user vectorization factor.
|
||||
const unsigned UserVF = Hints.getWidth();
|
||||
@ -7779,7 +7804,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
||||
CM.collectValuesToIgnore();
|
||||
|
||||
// Use the planner for vectorization.
|
||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);
|
||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE);
|
||||
|
||||
// Get user vectorization factor.
|
||||
unsigned UserVF = Hints.getWidth();
|
||||
|
@ -35,6 +35,8 @@ class VPRecipeBuilder {
|
||||
/// The profitablity analysis.
|
||||
LoopVectorizationCostModel &CM;
|
||||
|
||||
PredicatedScalarEvolution &PSE;
|
||||
|
||||
VPBuilder &Builder;
|
||||
|
||||
/// When we if-convert we need to create edge masks. We have to cache values
|
||||
@ -113,6 +115,8 @@ public:
|
||||
VPWidenCallRecipe *tryToWidenCall(Instruction *I, VFRange &Range,
|
||||
VPlan &Plan);
|
||||
|
||||
VPWidenSelectRecipe *tryToWidenSelect(Instruction *I, VFRange &Range);
|
||||
|
||||
/// Check if \p I can be widened within the given VF \p Range. If \p I can be
|
||||
/// widened for \p Range.Start, build a new VPWidenRecipe and return it.
|
||||
/// Range.End may be decreased to ensure same decision from \p Range.Start to
|
||||
@ -127,8 +131,10 @@ public:
|
||||
public:
|
||||
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
|
||||
LoopVectorizationLegality *Legal,
|
||||
LoopVectorizationCostModel &CM, VPBuilder &Builder)
|
||||
: OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {}
|
||||
LoopVectorizationCostModel &CM,
|
||||
PredicatedScalarEvolution &PSE, VPBuilder &Builder)
|
||||
: OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE),
|
||||
Builder(Builder) {}
|
||||
|
||||
/// Check if a recipe can be create for \p I withing the given VF \p Range.
|
||||
/// If a recipe can be created, it adds it to \p VPBB.
|
||||
|
@ -718,6 +718,13 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
|
||||
<< Indent << "\"WIDEN-CALL " << VPlanIngredient(&Ingredient) << "\\l\"";
|
||||
}
|
||||
|
||||
void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
|
||||
VPSlotTracker &SlotTracker) const {
|
||||
O << " +\n"
|
||||
<< Indent << "\"WIDEN-SELECT" << VPlanIngredient(&Ingredient)
|
||||
<< (InvariantCond ? " (condition is loop invariant)" : "") << "\\l\"";
|
||||
}
|
||||
|
||||
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
|
||||
VPSlotTracker &SlotTracker) const {
|
||||
O << " +\n" << Indent << "\"WIDEN\\l\"";
|
||||
|
@ -617,6 +617,7 @@ public:
|
||||
VPWidenMemoryInstructionSC,
|
||||
VPWidenPHISC,
|
||||
VPWidenSC,
|
||||
VPWidenSelectSC
|
||||
};
|
||||
|
||||
VPRecipeBase(const unsigned char SC) : SubclassID(SC) {}
|
||||
@ -813,6 +814,38 @@ public:
|
||||
VPSlotTracker &SlotTracker) const override;
|
||||
};
|
||||
|
||||
/// A recipe for widening select instructions.
|
||||
class VPWidenSelectRecipe : public VPRecipeBase {
|
||||
private:
|
||||
/// Hold the select to be widened.
|
||||
SelectInst &Ingredient;
|
||||
|
||||
/// Is the condition of the select loop invariant?
|
||||
bool InvariantCond;
|
||||
|
||||
/// Hold VPValues for the arguments of the call.
|
||||
VPUser User;
|
||||
|
||||
public:
|
||||
VPWidenSelectRecipe(SelectInst &I, bool InvariantCond)
|
||||
: VPRecipeBase(VPWidenSelectSC), Ingredient(I),
|
||||
InvariantCond(InvariantCond) {}
|
||||
|
||||
~VPWidenSelectRecipe() override = default;
|
||||
|
||||
/// Method to support type inquiry through isa, cast, and dyn_cast.
|
||||
static inline bool classof(const VPRecipeBase *V) {
|
||||
return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC;
|
||||
}
|
||||
|
||||
/// Produce a widened version of the select instruction.
|
||||
void execute(VPTransformState &State) override;
|
||||
|
||||
/// Print the recipe.
|
||||
void print(raw_ostream &O, const Twine &Indent,
|
||||
VPSlotTracker &SlotTracker) const override;
|
||||
};
|
||||
|
||||
/// A recipe for handling GEP instructions.
|
||||
class VPWidenGEPRecipe : public VPRecipeBase {
|
||||
GetElementPtrInst *GEP;
|
||||
|
Loading…
Reference in New Issue
Block a user