1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

[VPlan] Introduce VPWidenSelectRecipe (NFC).

Widening a selects depends on whether the condition is loop invariant or
not. Rather than checking during codegen-time, the information can be
recorded at the VPlan construction time.

This was suggested as part of D76992, to reduce the reliance on
accessing the original underlying IR values.

Reviewers: gilr, rengolin, Ayal, hsaito

Reviewed By: gilr

Differential Revision: https://reviews.llvm.org/D77869
This commit is contained in:
Florian Hahn 2020-04-13 08:28:26 +01:00
parent a2c586c698
commit 7d6477a593
5 changed files with 113 additions and 37 deletions

View File

@ -31,6 +31,8 @@
namespace llvm {
class PredicatedScalarEvolution;
/// VPlan-based builder utility analogous to IRBuilder.
class VPBuilder {
VPBasicBlock *BB = nullptr;
@ -203,6 +205,8 @@ class LoopVectorizationPlanner {
/// The interleaved access analysis.
InterleavedAccessInfo &IAI;
PredicatedScalarEvolution &PSE;
SmallVector<VPlanPtr, 4> VPlans;
/// This class is used to enable the VPlan to invoke a method of ILV. This is
@ -228,9 +232,10 @@ public:
const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM,
InterleavedAccessInfo &IAI)
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
IAI(IAI) {}
InterleavedAccessInfo &IAI,
PredicatedScalarEvolution &PSE)
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
PSE(PSE) {}
/// Plan how to best vectorize, return the best VF and its cost, or None if
/// vectorization and interleaving should be avoided up front.

View File

@ -413,6 +413,9 @@ public:
void widenCallInstruction(CallInst &I, VPUser &ArgOperands,
VPTransformState &State);
/// Widen a single select instruction within the innermost loop.
void widenSelectInstruction(SelectInst &I, bool InvariantCond);
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
void fixVectorizedLoop();
@ -4232,6 +4235,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
case Instruction::Br:
case Instruction::PHI:
case Instruction::GetElementPtr:
case Instruction::Select:
llvm_unreachable("This instruction is handled by a different recipe.");
case Instruction::UDiv:
case Instruction::SDiv:
@ -4272,35 +4276,6 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
break;
}
case Instruction::Select: {
// Widen selects.
// If the selector is loop invariant we can create a select
// instruction with a scalar condition. Otherwise, use vector-select.
auto *SE = PSE.getSE();
bool InvariantCond =
SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop);
setDebugLocFromInst(Builder, &I);
// The condition can be loop invariant but still defined inside the
// loop. This means that we can't just use the original 'cond' value.
// We have to take the 'vectorized' value and pick the first lane.
// Instcombine will make this a no-op.
auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0});
for (unsigned Part = 0; Part < UF; ++Part) {
Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
Value *Sel =
Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
VectorLoopValueMap.setVectorValue(&I, Part, Sel);
addMetadata(Sel, &I);
}
break;
}
case Instruction::ICmp:
case Instruction::FCmp: {
// Widen compares. Generate vector compares.
@ -4433,6 +4408,28 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
}
}
void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I,
bool InvariantCond) {
setDebugLocFromInst(Builder, &I);
// The condition can be loop invariant but still defined inside the
// loop. This means that we can't just use the original 'cond' value.
// We have to take the 'vectorized' value and pick the first lane.
// Instcombine will make this a no-op.
auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0});
for (unsigned Part = 0; Part < UF; ++Part) {
Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
Value *Sel =
Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
VectorLoopValueMap.setVectorValue(&I, Part, Sel);
addMetadata(Sel, &I);
}
}
void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
// We should not collect Scalars more than once per VF. Right now, this
// function is called from collectUniformsAndScalars(), which already does
@ -6937,6 +6934,29 @@ VPRecipeBuilder::tryToWidenCall(Instruction *I, VFRange &Range, VPlan &Plan) {
return new VPWidenCallRecipe(*CI, VPValues);
}
VPWidenSelectRecipe *VPRecipeBuilder::tryToWidenSelect(Instruction *I,
VFRange &Range) {
auto *SI = dyn_cast<SelectInst>(I);
if (!SI)
return nullptr;
// SI should be widened, unless it is scalar after vectorization,
// scalarization is profitable or it is predicated.
auto willWiden = [this, SI](unsigned VF) -> bool {
return !CM.isScalarAfterVectorization(SI, VF) &&
!CM.isProfitableToScalarize(SI, VF) &&
!CM.isScalarWithPredication(SI, VF);
};
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
return nullptr;
auto *SE = PSE.getSE();
bool InvariantCond =
SE->isLoopInvariant(PSE.getSCEV(SI->getOperand(0)), OrigLoop);
// Success: widen this instruction.
return new VPWidenSelectRecipe(*SI, InvariantCond);
}
VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, VFRange &Range) {
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
@ -7088,6 +7108,7 @@ bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
// operations, inductions and Phi nodes.
if ((Recipe = tryToWidenCall(Instr, Range, *Plan)) ||
(Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
(Recipe = tryToWidenSelect(Instr, Range)) ||
(Recipe = tryToOptimizeInduction(Instr, Range)) ||
(Recipe = tryToBlend(Instr, Plan)) ||
(isa<PHINode>(Instr) &&
@ -7194,7 +7215,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder);
// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
@ -7410,6 +7431,10 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
State.ILV->widenCallInstruction(Ingredient, User, State);
}
void VPWidenSelectRecipe::execute(VPTransformState &State) {
State.ILV->widenSelectInstruction(Ingredient, InvariantCond);
}
void VPWidenRecipe::execute(VPTransformState &State) {
State.ILV->widenInstruction(Ingredient);
}
@ -7620,7 +7645,7 @@ static bool processLoopInVPlanNativePath(
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
// optional argument if we don't need it in the future.
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE);
// Get user vectorization factor.
const unsigned UserVF = Hints.getWidth();
@ -7779,7 +7804,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
CM.collectValuesToIgnore();
// Use the planner for vectorization.
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE);
// Get user vectorization factor.
unsigned UserVF = Hints.getWidth();

View File

@ -35,6 +35,8 @@ class VPRecipeBuilder {
/// The profitablity analysis.
LoopVectorizationCostModel &CM;
PredicatedScalarEvolution &PSE;
VPBuilder &Builder;
/// When we if-convert we need to create edge masks. We have to cache values
@ -113,6 +115,8 @@ public:
VPWidenCallRecipe *tryToWidenCall(Instruction *I, VFRange &Range,
VPlan &Plan);
VPWidenSelectRecipe *tryToWidenSelect(Instruction *I, VFRange &Range);
/// Check if \p I can be widened within the given VF \p Range. If \p I can be
/// widened for \p Range.Start, build a new VPWidenRecipe and return it.
/// Range.End may be decreased to ensure same decision from \p Range.Start to
@ -127,8 +131,10 @@ public:
public:
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM, VPBuilder &Builder)
: OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {}
LoopVectorizationCostModel &CM,
PredicatedScalarEvolution &PSE, VPBuilder &Builder)
: OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE),
Builder(Builder) {}
/// Check if a recipe can be create for \p I withing the given VF \p Range.
/// If a recipe can be created, it adds it to \p VPBB.

View File

@ -718,6 +718,13 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
<< Indent << "\"WIDEN-CALL " << VPlanIngredient(&Ingredient) << "\\l\"";
}
void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << " +\n"
<< Indent << "\"WIDEN-SELECT" << VPlanIngredient(&Ingredient)
<< (InvariantCond ? " (condition is loop invariant)" : "") << "\\l\"";
}
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << " +\n" << Indent << "\"WIDEN\\l\"";

View File

@ -617,6 +617,7 @@ public:
VPWidenMemoryInstructionSC,
VPWidenPHISC,
VPWidenSC,
VPWidenSelectSC
};
VPRecipeBase(const unsigned char SC) : SubclassID(SC) {}
@ -813,6 +814,38 @@ public:
VPSlotTracker &SlotTracker) const override;
};
/// A recipe for widening select instructions.
class VPWidenSelectRecipe : public VPRecipeBase {
private:
/// Hold the select to be widened.
SelectInst &Ingredient;
/// Is the condition of the select loop invariant?
bool InvariantCond;
/// Hold VPValues for the arguments of the call.
VPUser User;
public:
VPWidenSelectRecipe(SelectInst &I, bool InvariantCond)
: VPRecipeBase(VPWidenSelectSC), Ingredient(I),
InvariantCond(InvariantCond) {}
~VPWidenSelectRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *V) {
return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC;
}
/// Produce a widened version of the select instruction.
void execute(VPTransformState &State) override;
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
};
/// A recipe for handling GEP instructions.
class VPWidenGEPRecipe : public VPRecipeBase {
GetElementPtrInst *GEP;