mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
Recommit "[VPlan] Add recipe for first-order rec phis, make splicing explicit."
This reverts the revert commit b1777b04dc4b1a9fee0e7effa7e177892ab32ef0. The patch originally got reverted due to a crash: https://bugs.chromium.org/p/chromium/issues/detail?id=1232798#c2 The underlying issue was that we were not using the stored values from the modified memory recipes, but the out-of-date values directly from the IR (accessed via the VPlan). This should be fixed in d995d6376. A reduced version of the reproducer has been added in 93664503be6b.
This commit is contained in:
parent
bab200ac44
commit
ca0aa2b075
@ -4164,14 +4164,10 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
|
||||
// the incoming edges.
|
||||
VPBasicBlock *Header = State.Plan->getEntry()->getEntryBasicBlock();
|
||||
for (VPRecipeBase &R : Header->phis()) {
|
||||
auto *PhiR = dyn_cast<VPWidenPHIRecipe>(&R);
|
||||
if (!PhiR)
|
||||
continue;
|
||||
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
|
||||
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(PhiR)) {
|
||||
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
|
||||
fixReduction(ReductionPhi, State);
|
||||
} else if (Legal->isFirstOrderRecurrence(OrigPhi))
|
||||
fixFirstOrderRecurrence(PhiR, State);
|
||||
else if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
|
||||
fixFirstOrderRecurrence(FOR, State);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4200,7 +4196,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR,
|
||||
//
|
||||
// In this example, s1 is a recurrence because it's value depends on the
|
||||
// previous iteration. In the first phase of vectorization, we created a
|
||||
// temporary value for s1. We now complete the vectorization and produce the
|
||||
// vector phi v1 for s1. We now complete the vectorization and produce the
|
||||
// shorthand vector IR shown below (for VF = 4, UF = 1).
|
||||
//
|
||||
// vector.ph:
|
||||
@ -4226,82 +4222,19 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR,
|
||||
// After execution completes the vector loop, we extract the next value of
|
||||
// the recurrence (x) to use as the initial value in the scalar loop.
|
||||
|
||||
auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue();
|
||||
|
||||
auto *IdxTy = Builder.getInt32Ty();
|
||||
auto *One = ConstantInt::get(IdxTy, 1);
|
||||
|
||||
// Create a vector from the initial value.
|
||||
auto *VectorInit = ScalarInit;
|
||||
if (VF.isVector()) {
|
||||
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
|
||||
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
|
||||
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
|
||||
VectorInit = Builder.CreateInsertElement(
|
||||
PoisonValue::get(VectorType::get(VectorInit->getType(), VF)),
|
||||
VectorInit, LastIdx, "vector.recur.init");
|
||||
}
|
||||
|
||||
VPValue *PreviousDef = PhiR->getBackedgeValue();
|
||||
// We constructed a temporary phi node in the first phase of vectorization.
|
||||
// This phi node will eventually be deleted.
|
||||
Builder.SetInsertPoint(cast<Instruction>(State.get(PhiR, 0)));
|
||||
|
||||
// Create a phi node for the new recurrence. The current value will either be
|
||||
// the initial value inserted into a vector or loop-varying vector value.
|
||||
auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur");
|
||||
VecPhi->addIncoming(VectorInit, LoopVectorPreHeader);
|
||||
|
||||
// Get the vectorized previous value of the last part UF - 1. It appears last
|
||||
// among all unrolled iterations, due to the order of their construction.
|
||||
Value *PreviousLastPart = State.get(PreviousDef, UF - 1);
|
||||
|
||||
// Find and set the insertion point after the previous value if it is an
|
||||
// instruction.
|
||||
BasicBlock::iterator InsertPt;
|
||||
// Note that the previous value may have been constant-folded so it is not
|
||||
// guaranteed to be an instruction in the vector loop.
|
||||
// FIXME: Loop invariant values do not form recurrences. We should deal with
|
||||
// them earlier.
|
||||
if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart))
|
||||
InsertPt = LoopVectorBody->getFirstInsertionPt();
|
||||
else {
|
||||
Instruction *PreviousInst = cast<Instruction>(PreviousLastPart);
|
||||
if (isa<PHINode>(PreviousLastPart))
|
||||
// If the previous value is a phi node, we should insert after all the phi
|
||||
// nodes in the block containing the PHI to avoid breaking basic block
|
||||
// verification. Note that the basic block may be different to
|
||||
// LoopVectorBody, in case we predicate the loop.
|
||||
InsertPt = PreviousInst->getParent()->getFirstInsertionPt();
|
||||
else
|
||||
InsertPt = ++PreviousInst->getIterator();
|
||||
}
|
||||
Builder.SetInsertPoint(&*InsertPt);
|
||||
|
||||
// The vector from which to take the initial value for the current iteration
|
||||
// (actual or unrolled). Initially, this is the vector phi node.
|
||||
Value *Incoming = VecPhi;
|
||||
|
||||
// Shuffle the current and previous vector and update the vector parts.
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
Value *PreviousPart = State.get(PreviousDef, Part);
|
||||
Value *PhiPart = State.get(PhiR, Part);
|
||||
auto *Shuffle = VF.isVector()
|
||||
? Builder.CreateVectorSplice(Incoming, PreviousPart, -1)
|
||||
: Incoming;
|
||||
PhiPart->replaceAllUsesWith(Shuffle);
|
||||
cast<Instruction>(PhiPart)->eraseFromParent();
|
||||
State.reset(PhiR, Shuffle, Part);
|
||||
Incoming = PreviousPart;
|
||||
}
|
||||
auto *VecPhi = cast<PHINode>(State.get(PhiR, 0));
|
||||
|
||||
// Fix the latch value of the new recurrence in the vector loop.
|
||||
VPValue *PreviousDef = PhiR->getBackedgeValue();
|
||||
Value *Incoming = State.get(PreviousDef, UF - 1);
|
||||
VecPhi->addIncoming(Incoming, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
|
||||
|
||||
// Extract the last vector element in the middle block. This will be the
|
||||
// initial value for the recurrence when jumping to the scalar loop.
|
||||
auto *ExtractForScalar = Incoming;
|
||||
if (VF.isVector()) {
|
||||
auto *One = ConstantInt::get(IdxTy, 1);
|
||||
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
|
||||
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
|
||||
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
|
||||
@ -4330,6 +4263,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR,
|
||||
Builder.SetInsertPoint(&*LoopScalarPreHeader->begin());
|
||||
PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingValue());
|
||||
auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init");
|
||||
auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue();
|
||||
for (auto *BB : predecessors(LoopScalarPreHeader)) {
|
||||
auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit;
|
||||
Start->addIncoming(Incoming, BB);
|
||||
@ -4785,18 +4719,6 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
|
||||
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
|
||||
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
|
||||
// this value when we vectorize all of the instructions that use the PHI.
|
||||
if (Legal->isFirstOrderRecurrence(P)) {
|
||||
Type *VecTy = State.VF.isScalar()
|
||||
? PN->getType()
|
||||
: VectorType::get(PN->getType(), State.VF);
|
||||
|
||||
for (unsigned Part = 0; Part < State.UF; ++Part) {
|
||||
Value *EntryPart = PHINode::Create(
|
||||
VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
|
||||
State.set(PhiR, EntryPart, Part);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
assert(!Legal->isReductionVariable(P) &&
|
||||
"reductions should be handled elsewhere");
|
||||
@ -9076,7 +8998,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
|
||||
CM.isInLoopReduction(Phi),
|
||||
CM.useOrderedReductions(RdxDesc));
|
||||
} else {
|
||||
PhiRecipe = new VPWidenPHIRecipe(Phi, *StartV);
|
||||
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
|
||||
}
|
||||
|
||||
// Record the incoming value from the backedge, so we can add the incoming
|
||||
@ -9317,23 +9239,22 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Apply Sink-After legal constraints.
|
||||
auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
|
||||
auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
|
||||
if (Region && Region->isReplicator()) {
|
||||
assert(Region->getNumSuccessors() == 1 &&
|
||||
Region->getNumPredecessors() == 1 && "Expected SESE region!");
|
||||
assert(R->getParent()->size() == 1 &&
|
||||
"A recipe in an original replicator region must be the only "
|
||||
"recipe in its block");
|
||||
return Region;
|
||||
}
|
||||
return nullptr;
|
||||
};
|
||||
for (auto &Entry : SinkAfter) {
|
||||
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
|
||||
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
|
||||
|
||||
auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
|
||||
auto *Region =
|
||||
dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
|
||||
if (Region && Region->isReplicator()) {
|
||||
assert(Region->getNumSuccessors() == 1 &&
|
||||
Region->getNumPredecessors() == 1 && "Expected SESE region!");
|
||||
assert(R->getParent()->size() == 1 &&
|
||||
"A recipe in an original replicator region must be the only "
|
||||
"recipe in its block");
|
||||
return Region;
|
||||
}
|
||||
return nullptr;
|
||||
};
|
||||
auto *TargetRegion = GetReplicateRegion(Target);
|
||||
auto *SinkRegion = GetReplicateRegion(Sink);
|
||||
if (!SinkRegion) {
|
||||
@ -9365,8 +9286,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
VPBlockUtils::connectBlocks(SinkRegion, TargetSucc);
|
||||
} else {
|
||||
// The sink source is in a replicate region, we need to move the whole
|
||||
// replicate region, which should only contain a single recipe in the main
|
||||
// block.
|
||||
// replicate region, which should only contain a single recipe in the
|
||||
// main block.
|
||||
auto *SplitBlock =
|
||||
Target->getParent()->splitAt(std::next(Target->getIterator()));
|
||||
|
||||
@ -9380,6 +9301,29 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
}
|
||||
}
|
||||
|
||||
// Introduce a recipe to combine the incoming and previous values of a
|
||||
// first-order recurrence.
|
||||
for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) {
|
||||
auto *RecurPhi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R);
|
||||
if (!RecurPhi)
|
||||
continue;
|
||||
|
||||
auto *RecurSplice = cast<VPInstruction>(
|
||||
Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,
|
||||
{RecurPhi, RecurPhi->getBackedgeValue()}));
|
||||
|
||||
VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe();
|
||||
if (auto *Region = GetReplicateRegion(PrevRecipe)) {
|
||||
VPBasicBlock *Succ = cast<VPBasicBlock>(Region->getSingleSuccessor());
|
||||
RecurSplice->moveBefore(*Succ, Succ->getFirstNonPhi());
|
||||
} else
|
||||
RecurSplice->moveAfter(PrevRecipe);
|
||||
RecurPhi->replaceAllUsesWith(RecurSplice);
|
||||
// Set the first operand of RecurSplice to RecurPhi again, after replacing
|
||||
// all users.
|
||||
RecurSplice->setOperand(0, RecurPhi);
|
||||
}
|
||||
|
||||
// Interleave memory: for each Interleave Group we marked earlier as relevant
|
||||
// for this VPlan, replace the Recipes widening its memory instructions with a
|
||||
// single VPInterleaveRecipe at its insertion point.
|
||||
|
@ -687,6 +687,30 @@ void VPInstruction::generateInstruction(VPTransformState &State,
|
||||
State.set(this, Call, Part);
|
||||
break;
|
||||
}
|
||||
case VPInstruction::FirstOrderRecurrenceSplice: {
|
||||
// Generate code to combine the previous and current values in vector v3.
|
||||
//
|
||||
// vector.ph:
|
||||
// v_init = vector(..., ..., ..., a[-1])
|
||||
// br vector.body
|
||||
//
|
||||
// vector.body
|
||||
// i = phi [0, vector.ph], [i+4, vector.body]
|
||||
// v1 = phi [v_init, vector.ph], [v2, vector.body]
|
||||
// v2 = a[i, i+1, i+2, i+3];
|
||||
// v3 = vector(v1(3), v2(0, 1, 2))
|
||||
|
||||
// For the first part, use the recurrence phi (v1), otherwise v2.
|
||||
auto *V1 = State.get(getOperand(0), 0);
|
||||
Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
|
||||
if (!PartMinus1->getType()->isVectorTy()) {
|
||||
State.set(this, PartMinus1, Part);
|
||||
} else {
|
||||
Value *V2 = State.get(getOperand(1), Part);
|
||||
State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("Unsupported opcode for instruction");
|
||||
}
|
||||
@ -729,7 +753,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
|
||||
case VPInstruction::ActiveLaneMask:
|
||||
O << "active lane mask";
|
||||
break;
|
||||
|
||||
case VPInstruction::FirstOrderRecurrenceSplice:
|
||||
O << "first-order splice";
|
||||
break;
|
||||
default:
|
||||
O << Instruction::getOpcodeName(getOpcode());
|
||||
}
|
||||
@ -1222,6 +1248,43 @@ void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
|
||||
}
|
||||
#endif
|
||||
|
||||
void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
|
||||
auto &Builder = State.Builder;
|
||||
// Create a vector from the initial value.
|
||||
auto *VectorInit = getStartValue()->getLiveInIRValue();
|
||||
|
||||
Type *VecTy = State.VF.isScalar()
|
||||
? VectorInit->getType()
|
||||
: VectorType::get(VectorInit->getType(), State.VF);
|
||||
|
||||
if (State.VF.isVector()) {
|
||||
auto *IdxTy = Builder.getInt32Ty();
|
||||
auto *One = ConstantInt::get(IdxTy, 1);
|
||||
IRBuilder<>::InsertPointGuard Guard(Builder);
|
||||
Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
|
||||
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
|
||||
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
|
||||
VectorInit = Builder.CreateInsertElement(
|
||||
PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
|
||||
}
|
||||
|
||||
// Create a phi node for the new recurrence.
|
||||
PHINode *EntryPart = PHINode::Create(
|
||||
VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
|
||||
EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader);
|
||||
State.set(this, EntryPart, 0);
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
|
||||
VPSlotTracker &SlotTracker) const {
|
||||
O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
|
||||
printAsOperand(O, SlotTracker);
|
||||
O << " = phi ";
|
||||
printOperands(O, SlotTracker);
|
||||
}
|
||||
#endif
|
||||
|
||||
void VPReductionPHIRecipe::execute(VPTransformState &State) {
|
||||
PHINode *PN = cast<PHINode>(getUnderlyingValue());
|
||||
auto &Builder = State.Builder;
|
||||
|
@ -776,7 +776,10 @@ class VPInstruction : public VPRecipeBase, public VPValue {
|
||||
public:
|
||||
/// VPlan opcodes, extending LLVM IR with idiomatics instructions.
|
||||
enum {
|
||||
Not = Instruction::OtherOpsEnd + 1,
|
||||
FirstOrderRecurrenceSplice =
|
||||
Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
|
||||
// values of a first-order recurrence.
|
||||
Not,
|
||||
ICmpULE,
|
||||
SLPLoad,
|
||||
SLPStore,
|
||||
@ -1060,8 +1063,12 @@ class VPWidenPHIRecipe : public VPRecipeBase, public VPValue {
|
||||
SmallVector<VPBasicBlock *, 2> IncomingBlocks;
|
||||
|
||||
protected:
|
||||
VPWidenPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi)
|
||||
: VPRecipeBase(VPDefID, {}), VPValue(VPVID, Phi, this) {}
|
||||
VPWidenPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi,
|
||||
VPValue *Start = nullptr)
|
||||
: VPRecipeBase(VPDefID, {}), VPValue(VPVID, Phi, this) {
|
||||
if (Start)
|
||||
addOperand(Start);
|
||||
}
|
||||
|
||||
public:
|
||||
/// Create a VPWidenPHIRecipe for \p Phi
|
||||
@ -1078,10 +1085,12 @@ public:
|
||||
/// Method to support type inquiry through isa, cast, and dyn_cast.
|
||||
static inline bool classof(const VPRecipeBase *B) {
|
||||
return B->getVPDefID() == VPRecipeBase::VPWidenPHISC ||
|
||||
B->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC ||
|
||||
B->getVPDefID() == VPRecipeBase::VPReductionPHISC;
|
||||
}
|
||||
static inline bool classof(const VPValue *V) {
|
||||
return V->getVPValueID() == VPValue::VPVWidenPHISC ||
|
||||
V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC ||
|
||||
V->getVPValueID() == VPValue::VPVReductionPHISC;
|
||||
}
|
||||
|
||||
@ -1106,6 +1115,12 @@ public:
|
||||
return getOperand(1);
|
||||
}
|
||||
|
||||
/// Returns the backedge value as a recipe. The backedge value is guaranteed
|
||||
/// to be a recipe.
|
||||
VPRecipeBase *getBackedgeRecipe() {
|
||||
return cast<VPRecipeBase>(getBackedgeValue()->getDef());
|
||||
}
|
||||
|
||||
/// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
|
||||
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
|
||||
addOperand(IncomingV);
|
||||
@ -1119,6 +1134,34 @@ public:
|
||||
VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
|
||||
};
|
||||
|
||||
/// A recipe for handling first-order recurrence phis. The start value is the
|
||||
/// first operand of the recipe and the incoming value from the backedge is the
|
||||
/// second operand.
|
||||
struct VPFirstOrderRecurrencePHIRecipe : public VPWidenPHIRecipe {
|
||||
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
|
||||
: VPWidenPHIRecipe(VPVFirstOrderRecurrencePHISC,
|
||||
VPFirstOrderRecurrencePHISC, Phi, &Start) {}
|
||||
|
||||
/// Method to support type inquiry through isa, cast, and dyn_cast.
|
||||
static inline bool classof(const VPRecipeBase *R) {
|
||||
return R->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC;
|
||||
}
|
||||
static inline bool classof(const VPWidenPHIRecipe *D) {
|
||||
return D->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC;
|
||||
}
|
||||
static inline bool classof(const VPValue *V) {
|
||||
return V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC;
|
||||
}
|
||||
|
||||
void execute(VPTransformState &State) override;
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
/// Print the recipe.
|
||||
void print(raw_ostream &O, const Twine &Indent,
|
||||
VPSlotTracker &SlotTracker) const override;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// A recipe for handling reduction phis. The start value is the first operand
|
||||
/// of the recipe and the incoming value from the backedge is the second
|
||||
/// operand.
|
||||
@ -1138,10 +1181,9 @@ public:
|
||||
VPReductionPHIRecipe(PHINode *Phi, RecurrenceDescriptor &RdxDesc,
|
||||
VPValue &Start, bool IsInLoop = false,
|
||||
bool IsOrdered = false)
|
||||
: VPWidenPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi),
|
||||
: VPWidenPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi, &Start),
|
||||
RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
|
||||
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
|
||||
addOperand(&Start);
|
||||
}
|
||||
|
||||
~VPReductionPHIRecipe() override = default;
|
||||
|
@ -221,27 +221,6 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
|
||||
// region. Such dependencies should be rejected during earlier dependence
|
||||
// checks, which guarantee accesses can be re-ordered for vectorization.
|
||||
//
|
||||
// If a recipe is used by a first-order recurrence phi, we cannot move it at
|
||||
// the moment: a recipe R feeding a first order recurrence phi must allow
|
||||
// for a *vector* shuffle to be inserted immediately after it, and therefore
|
||||
// if R is *scalarized and predicated* it must appear last in its basic
|
||||
// block. In addition, other recipes may need to "sink after" R, so best if
|
||||
// R not be moved at all.
|
||||
auto IsImmovableRecipe = [](VPRecipeBase &R) {
|
||||
assert(R.getNumDefinedValues() <= 1 &&
|
||||
"no multi-defs are expected in predicated blocks");
|
||||
for (VPUser *U : R.getVPSingleValue()->users()) {
|
||||
auto *UI = dyn_cast<VPRecipeBase>(U);
|
||||
if (!UI)
|
||||
continue;
|
||||
if (isa<VPWidenPHIRecipe>(UI) && !isa<VPReductionPHIRecipe>(UI))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
if (any_of(*Then1, IsImmovableRecipe))
|
||||
continue;
|
||||
|
||||
// Move recipes to the successor region.
|
||||
for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))
|
||||
ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());
|
||||
|
@ -101,6 +101,7 @@ public:
|
||||
|
||||
// Phi-like VPValues. Need to be kept together.
|
||||
VPVBlendSC,
|
||||
VPVFirstOrderRecurrencePHISC,
|
||||
VPVWidenPHISC,
|
||||
VPVWidenCanonicalIVSC,
|
||||
VPVWidenIntOrFpInductionSC,
|
||||
@ -331,6 +332,7 @@ public:
|
||||
|
||||
// Phi-like recipes. Need to be kept together.
|
||||
VPBlendSC,
|
||||
VPFirstOrderRecurrencePHISC,
|
||||
VPWidenPHISC,
|
||||
VPWidenCanonicalIVSC,
|
||||
VPWidenIntOrFpInductionSC,
|
||||
|
@ -16,9 +16,9 @@ define void @can_sink_after_store(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr
|
||||
; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4
|
||||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
@ -100,9 +100,9 @@ define void @sink_sdiv(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 {
|
||||
; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4
|
||||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
@ -183,9 +183,9 @@ define void @can_sink_with_additional_user(i32 %x, i32* %ptr, i64 %tc) {
|
||||
; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[IDX_PHI_TRANS]], align 4
|
||||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
@ -502,7 +502,7 @@ define i16 @multiple_exit(i16* %p, i32 %n) {
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
||||
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
|
||||
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
|
||||
@ -523,7 +523,7 @@ define i16 @multiple_exit(i16* %p, i32 %n) {
|
||||
; CHECK-NEXT: store i16 [[SCALAR_RECUR]], i16* [[B]], align 4
|
||||
; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
|
||||
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]]
|
||||
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[SCALAR_RECUR]], [[FOR_COND]] ]
|
||||
; CHECK-NEXT: ret i16 [[REC_LCSSA]]
|
||||
@ -586,7 +586,7 @@ define i16 @multiple_exit2(i16* %p, i32 %n) {
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
||||
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
|
||||
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
|
||||
@ -607,7 +607,7 @@ define i16 @multiple_exit2(i16* %p, i32 %n) {
|
||||
; CHECK-NEXT: store i16 [[SCALAR_RECUR]], i16* [[B]], align 4
|
||||
; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
|
||||
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
|
||||
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_COND]] ], [ 10, [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: ret i16 [[REC_LCSSA]]
|
||||
|
@ -10,7 +10,7 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize {
|
||||
; CHECK-LABEL: sink_replicate_region_1
|
||||
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
|
||||
; CHECK-NEXT: loop:
|
||||
; CHECK-NEXT: WIDEN-PHI ir<%0> = phi ir<0>, ir<%conv>
|
||||
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv>
|
||||
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
|
||||
; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
|
||||
; CHECK-NEXT: Successor(s): loop.0
|
||||
@ -37,6 +37,7 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1:
|
||||
; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6>
|
||||
; CHECK-NEXT: EMIT vp<%8> = first-order splice ir<%0> ir<%conv>
|
||||
; CHECK-NEXT: Successor(s): pred.srem
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.srem: {
|
||||
@ -46,17 +47,17 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize {
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.srem.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V)
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%8>, ir<%x> (S->V)
|
||||
; CHECK-NEXT: Successor(s): pred.srem.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.srem.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.1.split
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1.split:
|
||||
; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<%9>
|
||||
; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<%10>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
;
|
||||
@ -83,13 +84,14 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
|
||||
; CHECK-LABEL: sink_replicate_region_2
|
||||
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
|
||||
; CHECK-NEXT: loop:
|
||||
; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
|
||||
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
|
||||
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
|
||||
; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
|
||||
; CHECK-NEXT: Successor(s): loop.0
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.0:
|
||||
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
|
||||
; CHECK-NEXT: EMIT vp<%5> = first-order splice ir<%recur> ir<%recur.next>
|
||||
; CHECK-NEXT: Successor(s): loop.0.split
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.0.split:
|
||||
@ -102,14 +104,14 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x>
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%5>, ir<%x>
|
||||
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.1
|
||||
@ -141,7 +143,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize
|
||||
; CHECK-LABEL: sink_replicate_region_3_reduction
|
||||
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
|
||||
; CHECK-NEXT: loop:
|
||||
; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
|
||||
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
|
||||
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
|
||||
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next>
|
||||
; CHECK-NEXT: EMIT vp<%4> = icmp ule ir<%iv> vp<%0>
|
||||
@ -149,6 +151,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.0:
|
||||
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
|
||||
; CHECK-NEXT: EMIT vp<%6> = first-order splice ir<%recur> ir<%recur.next>
|
||||
; CHECK-NEXT: Successor(s): pred.srem
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.srem: {
|
||||
@ -158,19 +161,19 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize
|
||||
; CHECK-NEXT: CondBit: vp<%4> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.srem.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> (S->V)
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%6>, ir<%x> (S->V)
|
||||
; CHECK-NEXT: Successor(s): pred.srem.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.srem.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%rem>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.0.split
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.0.split:
|
||||
; CHECK-NEXT: WIDEN ir<%add> = add vp<%7>, ir<%recur.next>
|
||||
; CHECK-NEXT: WIDEN ir<%add> = add vp<%8>, ir<%recur.next>
|
||||
; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add>
|
||||
; CHECK-NEXT: EMIT vp<%10> = select vp<%4> ir<%and.red.next> ir<%and.red>
|
||||
; CHECK-NEXT: EMIT vp<%11> = select vp<%4> ir<%and.red.next> ir<%and.red>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
;
|
||||
@ -200,7 +203,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8*
|
||||
; CHECK-LABEL: sink_replicate_region_4_requires_split_at_end_of_block
|
||||
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
|
||||
; CHECK-NEXT: loop:
|
||||
; CHECK-NEXT: WIDEN-PHI ir<%0> = phi ir<0>, ir<%conv>
|
||||
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv>
|
||||
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
|
||||
; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
|
||||
@ -227,6 +230,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8*
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1:
|
||||
; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6>
|
||||
; CHECK-NEXT: EMIT vp<%8> = first-order splice ir<%0> ir<%conv>
|
||||
; CHECK-NEXT: Successor(s): loop.1.split
|
||||
|
||||
; CHECK: loop.1.split:
|
||||
@ -239,19 +243,19 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8*
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
|
||||
; CHECK: pred.load.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V)
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%8>, ir<%x> (S->V)
|
||||
; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V)
|
||||
; CHECK-NEXT: Successor(s): pred.load.continue
|
||||
|
||||
; CHECK: pred.load.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.2>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%rem>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%12> = ir<%lv.2>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
|
||||
; CHECK: loop.2:
|
||||
; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%10>
|
||||
; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%11>
|
||||
; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%11>
|
||||
; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%12>
|
||||
; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
@ -283,7 +287,7 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
|
||||
; CHECK-LABEL: sink_replicate_region_after_replicate_region
|
||||
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
|
||||
; CHECK-NEXT: loop:
|
||||
; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
|
||||
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
|
||||
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
|
||||
; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
|
||||
; CHECK-NEXT: Successor(s): loop.0
|
||||
@ -293,6 +297,7 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1:
|
||||
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
|
||||
; CHECK-NEXT: EMIT vp<%5> = first-order splice ir<%recur> ir<%recur.next>
|
||||
; CHECK-NEXT: Successor(s): pred.srem
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.srem: {
|
||||
@ -302,11 +307,11 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.srem.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x>
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%5>, ir<%x>
|
||||
; CHECK-NEXT: Successor(s): pred.srem.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.srem.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.1.split
|
||||
@ -321,13 +326,13 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6>
|
||||
; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%7>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem.div>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%rem.div>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.2
|
||||
|
@ -337,7 +337,8 @@ for.end:
|
||||
; UNROLL-NO-IC-LABEL: @constant_folded_previous_value(
|
||||
; UNROLL-NO-IC: vector.body:
|
||||
; UNROLL-NO-IC: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, %vector.ph ], [ <i64 1, i64 1, i64 1, i64 1>, %vector.body ]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
||||
; UNROLL-NO-IC: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
||||
; CHECK-NO-IC-NEXT: add nuw i64
|
||||
; UNROLL-NO-IC: br i1 {{.*}}, label %middle.block, label %vector.body
|
||||
;
|
||||
define void @constant_folded_previous_value() {
|
||||
@ -641,10 +642,10 @@ define void @sink_dead_inst() {
|
||||
; SINK-AFTER-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
|
||||
; SINK-AFTER-NEXT: %vec.ind = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, %vector.ph ], [ %vec.ind.next, %vector.body ]
|
||||
; SINK-AFTER-NEXT: %vector.recur = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, %vector.ph ], [ %3, %vector.body ]
|
||||
; SINK-AFTER-NEXT: %vector.recur2 = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, %vector.ph ], [ %1, %vector.body ]
|
||||
; SINK-AFTER-NEXT: %vector.recur1 = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, %vector.ph ], [ %1, %vector.body ]
|
||||
; SINK-AFTER-NEXT: %0 = add <4 x i16> %vec.ind, <i16 1, i16 1, i16 1, i16 1>
|
||||
; SINK-AFTER-NEXT: %1 = zext <4 x i16> %0 to <4 x i32>
|
||||
; SINK-AFTER-NEXT: %2 = shufflevector <4 x i32> %vector.recur2, <4 x i32> %1, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
||||
; SINK-AFTER-NEXT: %2 = shufflevector <4 x i32> %vector.recur1, <4 x i32> %1, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
||||
; SINK-AFTER-NEXT: %3 = add <4 x i16> %0, <i16 5, i16 5, i16 5, i16 5>
|
||||
; SINK-AFTER-NEXT: %4 = shufflevector <4 x i16> %vector.recur, <4 x i16> %3, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
||||
; SINK-AFTER-NEXT: %5 = sub <4 x i16> %4, <i16 10, i16 10, i16 10, i16 10>
|
||||
@ -704,30 +705,30 @@ define i32 @sink_into_replication_region(i32 %y) {
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
|
||||
; CHECK: pred.udiv.if4:
|
||||
; CHECK: pred.udiv.if3:
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = udiv i32 219220132, [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i32 1
|
||||
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE5]]
|
||||
; CHECK: pred.udiv.continue5:
|
||||
; CHECK: pred.udiv.continue4:
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF4]] ]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
|
||||
; CHECK: pred.udiv.if6:
|
||||
; CHECK: pred.udiv.if5:
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], -2
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = udiv i32 219220132, [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i32 2
|
||||
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE7]]
|
||||
; CHECK: pred.udiv.continue7:
|
||||
; CHECK: pred.udiv.continue6:
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP15]], [[PRED_UDIV_IF6]] ]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9]]
|
||||
; CHECK: pred.udiv.if8:
|
||||
; CHECK: pred.udiv.if7:
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], -3
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP19]], i32 3
|
||||
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE9]]
|
||||
; CHECK: pred.udiv.continue9:
|
||||
; CHECK: pred.udiv.continue8:
|
||||
; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP20]], [[PRED_UDIV_IF8]] ]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
||||
; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI1]], [[TMP22]]
|
||||
@ -798,27 +799,27 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
|
||||
; CHECK: pred.udiv.if5:
|
||||
; CHECK: pred.udiv.if4:
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = udiv i32 219220132, [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP11]], i32 1
|
||||
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE6]]
|
||||
; CHECK: pred.udiv.continue6:
|
||||
; CHECK: pred.udiv.continue5:
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF5]] ]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
|
||||
; CHECK: pred.udiv.if7:
|
||||
; CHECK: pred.udiv.if6:
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP15]], i32 2
|
||||
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE8]]
|
||||
; CHECK: pred.udiv.continue8:
|
||||
; CHECK: pred.udiv.continue7:
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP16]], [[PRED_UDIV_IF7]] ]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]]
|
||||
; CHECK: pred.udiv.if9:
|
||||
; CHECK: pred.udiv.if8:
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP19]], i32 3
|
||||
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE10]]
|
||||
; CHECK: pred.udiv.continue10:
|
||||
; CHECK: pred.udiv.continue9:
|
||||
; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP20]], [[PRED_UDIV_IF9]] ]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
||||
; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI4]], [[TMP22]]
|
||||
@ -832,31 +833,31 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
|
||||
; CHECK: pred.store.continue:
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
|
||||
; CHECK: pred.store.if11:
|
||||
; CHECK: pred.store.if10:
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP29]]
|
||||
; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP30]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
|
||||
; CHECK: pred.store.continue12:
|
||||
; CHECK: pred.store.continue11:
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
|
||||
; CHECK: pred.store.if13:
|
||||
; CHECK: pred.store.if12:
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP33]]
|
||||
; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP34]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
|
||||
; CHECK: pred.store.continue14:
|
||||
; CHECK: pred.store.continue13:
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]]
|
||||
; CHECK: pred.store.if15:
|
||||
; CHECK: pred.store.if14:
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP37]]
|
||||
; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP38]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
|
||||
; CHECK: pred.store.continue16:
|
||||
; CHECK: pred.store.continue15:
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
||||
|
@ -860,10 +860,10 @@ define i64 @trunc_with_first_order_recurrence() {
|
||||
; CHECK-NEXT: %vec.phi = phi <2 x i64>
|
||||
; CHECK-NEXT: %vec.ind = phi <2 x i64> [ <i64 1, i64 2>, %vector.ph ], [ %vec.ind.next, %vector.body ]
|
||||
; CHECK-NEXT: %vec.ind2 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next3, %vector.body ]
|
||||
; CHECK-NEXT: %vector.recur = phi <2 x i32> [ <i32 poison, i32 42>, %vector.ph ], [ %vec.ind5, %vector.body ]
|
||||
; CHECK-NEXT: %vec.ind5 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next6, %vector.body ]
|
||||
; CHECK-NEXT: %vec.ind7 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next8, %vector.body ]
|
||||
; CHECK-NEXT: shufflevector <2 x i32> %vector.recur, <2 x i32> %vec.ind5, <2 x i32> <i32 1, i32 2>
|
||||
; CHECK-NEXT: %vector.recur = phi <2 x i32> [ <i32 poison, i32 42>, %vector.ph ], [ %vec.ind4, %vector.body ]
|
||||
; CHECK-NEXT: %vec.ind4 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next5, %vector.body ]
|
||||
; CHECK-NEXT: %vec.ind6 = phi <2 x i32> [ <i32 1, i32 2>, %vector.ph ], [ %vec.ind.next7, %vector.body ]
|
||||
; CHECK-NEXT: shufflevector <2 x i32> %vector.recur, <2 x i32> %vec.ind4, <2 x i32> <i32 1, i32 2>
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
|
@ -1530,11 +1530,11 @@ define void @PR34743(i16* %a, i32* %b, i64 %n) {
|
||||
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, <8 x i16>* [[TMP10]], align 4
|
||||
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
; CHECK-NEXT: [[STRIDED_VEC8]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[STRIDED_VEC8]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>*
|
||||
|
@ -172,8 +172,9 @@ define void @constant_folded_previous_value() {
|
||||
; CHECK-VF4UF2-LABEL: @constant_folded_previous_value
|
||||
; CHECK-VF4UF2: vector.body
|
||||
; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i64> [ %vector.recur.init, %vector.ph ], [ shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer), %vector.body ]
|
||||
; CHECK-VF4UF2-NEXT: %[[SPLICE1:.*]] = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> %vector.recur, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer), i32 -1)
|
||||
; CHECK-VF4UF2: %[[SPLICE1:.*]] = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> %vector.recur, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer), i32 -1)
|
||||
; CHECK-VF4UF2: %[[SPLICE2:.*]] = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer), i32 -1)
|
||||
; CHECK-VF4UF2: br i1 {{.*}}, label %middle.block, label %vector.body
|
||||
entry:
|
||||
br label %scalar.body
|
||||
|
||||
@ -197,15 +198,17 @@ for.end:
|
||||
define i32 @extract_second_last_iteration(i32* %cval, i32 %x) {
|
||||
; CHECK-VF4UF2-LABEL: @extract_second_last_iteration
|
||||
; CHECK-VF4UF2: vector.ph
|
||||
; CHECK-VF4UF2: %[[SPLAT_INS1:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
|
||||
; CHECK-VF4UF2: %[[SPLAT1:.*]] = shufflevector <vscale x 4 x i32> %[[SPLAT_INS1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-VF4UF2: %[[SPLAT_INS2:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
|
||||
; CHECK-VF4UF2: %[[SPLAT2:.*]] = shufflevector <vscale x 4 x i32> %[[SPLAT_INS2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-VF4UF2: call i32 @llvm.vscale.i32()
|
||||
; CHECK-VF4UF2: call i32 @llvm.vscale.i32()
|
||||
; CHECK-VF4UF2: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32()
|
||||
; CHECK-VF4UF2: %[[MUL1:.*]] = mul i32 %[[VSCALE1]], 4
|
||||
; CHECK-VF4UF2: %[[SUB1:.*]] = sub i32 %[[MUL1]], 1
|
||||
; CHECK-VF4UF2: %[[VEC_RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 %[[SUB1]]
|
||||
; CHECK-VF4UF2: vector.body
|
||||
; CHECK-VF4UF2: %[[SPLAT_INS1:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
|
||||
; CHECK-VF4UF2: %[[SPLAT1:.*]] = shufflevector <vscale x 4 x i32> %[[SPLAT_INS1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-VF4UF2: %[[SPLAT_INS2:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
|
||||
; CHECK-VF4UF2: %[[SPLAT2:.*]] = shufflevector <vscale x 4 x i32> %[[SPLAT_INS2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; ; CHECK-VF4UF2: vector.body
|
||||
; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[VEC_RECUR_INIT]], %vector.ph ], [ %[[ADD2:.*]], %vector.body ]
|
||||
; CHECK-VF4UF2: %[[ADD1:.*]] = add <vscale x 4 x i32> %{{.*}}, %[[SPLAT1]]
|
||||
; CHECK-VF4UF2: %[[ADD2]] = add <vscale x 4 x i32> %{{.*}}, %[[SPLAT2]]
|
||||
|
@ -766,7 +766,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
|
||||
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
|
||||
; CHECK-NEXT: loop:
|
||||
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
|
||||
; CHECK-NEXT: WIDEN-PHI ir<%for> = phi ir<0>, ir<%lv.a>
|
||||
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, ir<%lv.a>
|
||||
; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
|
||||
; CHECK-NEXT: Successor(s): pred.load
|
||||
@ -788,6 +788,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
|
||||
; CHECK-NEXT: Successor(s): loop.0
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.0:
|
||||
; CHECK-NEXT: EMIT vp<%7> = first-order splice ir<%for> ir<%lv.a>
|
||||
; CHECK-NEXT: Successor(s): loop.1
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1:
|
||||
@ -800,12 +801,12 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%for>, vp<%6>
|
||||
; CHECK-NEXT: REPLICATE ir<%div> = sdiv vp<%7>, vp<%6>
|
||||
; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%div>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%div>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.2
|
||||
|
Loading…
Reference in New Issue
Block a user