1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 10:32:48 +02:00

[VPlan] Merge predicated-triangle regions, after sinking.

Sinking scalar operands into predicated-triangle regions may allow
merging regions. This patch adds a VPlan-to-VPlan transform that tries
to merge predicate-triangle regions after sinking.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D100260
This commit is contained in:
Florian Hahn 2021-06-28 11:01:27 +01:00
parent 39ed7fe00e
commit e51ded8676
12 changed files with 899 additions and 1179 deletions

View File

@ -9298,6 +9298,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
VPlanTransforms::sinkScalarOperands(*Plan);
VPlanTransforms::mergeReplicateRegions(*Plan);
std::string PlanName;
raw_string_ostream RSO(PlanName);

View File

@ -148,3 +148,138 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
}
return Changed;
}
/// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return
/// the mask.
VPValue *getPredicatedMask(VPRegionBlock *R) {
auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());
if (!EntryBB || EntryBB->size() != 1 ||
!isa<VPBranchOnMaskRecipe>(EntryBB->begin()))
return nullptr;
return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);
}
/// If \p R is a triangle region, return the 'then' block of the triangle.
static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
auto *EntryBB = cast<VPBasicBlock>(R->getEntry());
if (EntryBB->getNumSuccessors() != 2)
return nullptr;
auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);
auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);
if (!Succ0 || !Succ1)
return nullptr;
if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
return nullptr;
if (Succ0->getSingleSuccessor() == Succ1)
return Succ0;
if (Succ1->getSingleSuccessor() == Succ0)
return Succ1;
return nullptr;
}
bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
SetVector<VPRegionBlock *> DeletedRegions;
bool Changed = false;
// Collect region blocks to process up-front, to avoid iterator invalidation
// issues while merging regions.
SmallVector<VPRegionBlock *, 8> CandidateRegions(
VPBlockUtils::blocksOnly<VPRegionBlock>(depth_first(
VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry()))));
// Check if Base is a predicated triangle, followed by an empty block,
// followed by another predicate triangle. If that's the case, move the
// recipes from the first to the second triangle.
for (VPRegionBlock *Region1 : CandidateRegions) {
if (DeletedRegions.contains(Region1))
continue;
auto *MiddleBasicBlock =
dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
continue;
auto *Region2 =
dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
if (!Region2)
continue;
VPValue *Mask1 = getPredicatedMask(Region1);
VPValue *Mask2 = getPredicatedMask(Region2);
if (!Mask1 || Mask1 != Mask2)
continue;
VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
if (!Then1 || !Then2)
continue;
assert(Mask1 && Mask2 && "both region must have conditions");
// Note: No fusion-preventing memory dependencies are expected in either
// region. Such dependencies should be rejected during earlier dependence
// checks, which guarantee accesses can be re-ordered for vectorization.
//
// If a recipe is used by a first-order recurrence phi, we cannot move it at
// the moment: a recipe R feeding a first order recurrence phi must allow
// for a *vector* shuffle to be inserted immediately after it, and therefore
// if R is *scalarized and predicated* it must appear last in its basic
// block. In addition, other recipes may need to "sink after" R, so best if
// R not be moved at all.
auto IsImmovableRecipe = [](VPRecipeBase &R) {
assert(R.getNumDefinedValues() <= 1 &&
"no multi-defs are expected in predicated blocks");
for (VPUser *U : R.getVPValue()->users()) {
auto *UI = dyn_cast<VPRecipeBase>(U);
if (!UI)
continue;
auto *PhiR = dyn_cast<VPWidenPHIRecipe>(UI);
if (PhiR && !PhiR->getRecurrenceDescriptor())
return true;
}
return false;
};
if (any_of(*Then1, IsImmovableRecipe))
continue;
// Move recipes to the successor region.
for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))
ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());
auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor());
auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor());
// Move VPPredInstPHIRecipes from the merge block to the successor region's
// merge block. Update all users inside the successor region to use the
// original values.
for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
VPValue *PredInst1 =
cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
for (VPUser *U : Phi1ToMove.getVPValue()->users()) {
auto *UI = dyn_cast<VPRecipeBase>(U);
if (!UI || UI->getParent() != Then2)
continue;
for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) {
if (Phi1ToMove.getVPValue() != U->getOperand(I))
continue;
U->setOperand(I, PredInst1);
}
}
Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
}
// Finally, remove the first region.
for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) {
VPBlockUtils::disconnectBlocks(Pred, Region1);
VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock);
}
VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock);
DeletedRegions.insert(Region1);
}
for (VPRegionBlock *ToDelete : DeletedRegions)
delete ToDelete;
return Changed;
}

View File

@ -30,6 +30,8 @@ struct VPlanTransforms {
SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE);
static bool sinkScalarOperands(VPlan &Plan);
static bool mergeReplicateRegions(VPlan &Plan);
};
} // namespace llvm

View File

@ -89,38 +89,28 @@ attributes #0 = { "target-cpu"="knl" }
; FORCE: vector.body:
; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE4:%.*]] ]
; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE4]] ]
; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], <i32 2, i32 2>
; FORCE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; FORCE: pred.store.if:
; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1
; FORCE-NEXT: br label [[PRED_STORE_CONTINUE]]
; FORCE: pred.store.continue:
; FORCE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; FORCE-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
; FORCE: pred.store.if1:
; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1
; FORCE-NEXT: br label [[PRED_STORE_CONTINUE2]]
; FORCE: pred.store.continue2:
; FORCE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; FORCE-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; FORCE: pred.load.if:
; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1
; FORCE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP0]]
; FORCE-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 1
; FORCE-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE]]
; FORCE: pred.load.continue:
; FORCE-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; FORCE-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; FORCE-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]]
; FORCE: pred.load.if3:
; FORCE: pred.load.if1:
; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1
; FORCE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP1]]
; FORCE-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 1
; FORCE-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i32 1
; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; FORCE: pred.load.continue4:
; FORCE: pred.load.continue2:
; FORCE-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF3]] ]
; FORCE-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>

View File

@ -40,13 +40,13 @@ define void @example1() optsize {
; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 16
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[TMP10:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[TMP9:%.*]]
; CHECK: 9:
; CHECK-NEXT: br i1 undef, label [[TMP10]], label [[TMP9]], [[LOOP2:!llvm.loop !.*]]
; CHECK-NEXT: br i1 undef, label [[TMP10]], label [[TMP9]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: 10:
; CHECK-NEXT: ret void
;
@ -123,7 +123,7 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[DOT_PREHEADER_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -146,122 +146,74 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK-NEXT: [[BROADCAST_SPLAT21:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT20]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]]
; CHECK: vector.body9:
; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT15:%.*]], [[PRED_STORE_CONTINUE51:%.*]] ]
; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT15:%.*]], [[PRED_STORE_CONTINUE37:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX14]]
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX14]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT28]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT29]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP23:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT21]]
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0
; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP27:%.*]] = phi i32 [ poison, [[VECTOR_BODY9]] ], [ [[TMP26]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1
; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
; CHECK: pred.load.if30:
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP20]]
; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE31]]
; CHECK: pred.load.continue31:
; CHECK-NEXT: [[TMP31:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP30]], [[PRED_LOAD_IF30]] ]
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP23]], i32 2
; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33:%.*]]
; CHECK: pred.load.if32:
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP21]]
; CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE33]]
; CHECK: pred.load.continue33:
; CHECK-NEXT: [[TMP35:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE31]] ], [ [[TMP34]], [[PRED_LOAD_IF32]] ]
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP23]], i32 3
; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_LOAD_IF34:%.*]], label [[PRED_LOAD_CONTINUE35:%.*]]
; CHECK: pred.load.if34:
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP22]]
; CHECK-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE35]]
; CHECK: pred.load.continue35:
; CHECK-NEXT: [[TMP39:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE33]] ], [ [[TMP38]], [[PRED_LOAD_IF34]] ]
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0
; CHECK-NEXT: br i1 [[TMP40]], label [[PRED_LOAD_IF36:%.*]], label [[PRED_LOAD_CONTINUE37:%.*]]
; CHECK: pred.load.if36:
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE37]]
; CHECK: pred.load.continue37:
; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE35]] ], [ [[TMP42]], [[PRED_LOAD_IF36]] ]
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1
; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF38:%.*]], label [[PRED_LOAD_CONTINUE39:%.*]]
; CHECK: pred.load.if38:
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP20]]
; CHECK-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE39]]
; CHECK: pred.load.continue39:
; CHECK-NEXT: [[TMP47:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE37]] ], [ [[TMP46]], [[PRED_LOAD_IF38]] ]
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x i1> [[TMP23]], i32 2
; CHECK-NEXT: br i1 [[TMP48]], label [[PRED_LOAD_IF40:%.*]], label [[PRED_LOAD_CONTINUE41:%.*]]
; CHECK: pred.load.if40:
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP21]]
; CHECK-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE41]]
; CHECK: pred.load.continue41:
; CHECK-NEXT: [[TMP51:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE39]] ], [ [[TMP50]], [[PRED_LOAD_IF40]] ]
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP23]], i32 3
; CHECK-NEXT: br i1 [[TMP52]], label [[PRED_LOAD_IF42:%.*]], label [[PRED_LOAD_CONTINUE43:%.*]]
; CHECK: pred.load.if42:
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP22]]
; CHECK-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE43]]
; CHECK: pred.load.continue43:
; CHECK-NEXT: [[TMP55:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE41]] ], [ [[TMP54]], [[PRED_LOAD_IF42]] ]
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0
; CHECK-NEXT: br i1 [[TMP56]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]]
; CHECK: pred.store.if44:
; CHECK-NEXT: [[TMP57:%.*]] = and i32 [[TMP43]], [[TMP27]]
; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: store i32 [[TMP57]], i32* [[TMP58]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE45]]
; CHECK: pred.store.continue45:
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1
; CHECK-NEXT: br i1 [[TMP59]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]]
; CHECK: pred.store.if46:
; CHECK-NEXT: [[TMP60:%.*]] = and i32 [[TMP47]], [[TMP31]]
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP20]]
; CHECK-NEXT: store i32 [[TMP60]], i32* [[TMP61]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE47]]
; CHECK: pred.store.continue47:
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <4 x i1> [[TMP23]], i32 2
; CHECK-NEXT: br i1 [[TMP62]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]]
; CHECK: pred.store.if48:
; CHECK-NEXT: [[TMP63:%.*]] = and i32 [[TMP51]], [[TMP35]]
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP21]]
; CHECK-NEXT: store i32 [[TMP63]], i32* [[TMP64]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE49]]
; CHECK: pred.store.continue49:
; CHECK-NEXT: [[TMP65:%.*]] = extractelement <4 x i1> [[TMP23]], i32 3
; CHECK-NEXT: br i1 [[TMP65]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51]]
; CHECK: pred.store.if50:
; CHECK-NEXT: [[TMP66:%.*]] = and i32 [[TMP55]], [[TMP39]]
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP22]]
; CHECK-NEXT: store i32 [[TMP66]], i32* [[TMP67]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE51]]
; CHECK: pred.store.continue51:
; CHECK-NEXT: [[TMP20:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT21]]
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]]
; CHECK: pred.store.if30:
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], [[TMP23]]
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE31]]
; CHECK: pred.store.continue31:
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1
; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]]
; CHECK: pred.store.if32:
; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP29]]
; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP29]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], [[TMP31]]
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP29]]
; CHECK-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE33]]
; CHECK: pred.store.continue33:
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2
; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35:%.*]]
; CHECK: pred.store.if34:
; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP37]]
; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP37]]
; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[TMP39]]
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP37]]
; CHECK-NEXT: store i32 [[TMP42]], i32* [[TMP43]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE35]]
; CHECK: pred.store.continue35:
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3
; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37]]
; CHECK: pred.store.if36:
; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP45]]
; CHECK-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP45]]
; CHECK-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4
; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP49]], [[TMP47]]
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP45]]
; CHECK-NEXT: store i32 [[TMP50]], i32* [[TMP51]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE37]]
; CHECK: pred.store.continue37:
; CHECK-NEXT: [[INDEX_NEXT15]] = add i64 [[INDEX14]], 4
; CHECK-NEXT: [[TMP68:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC13]]
; CHECK-NEXT: br i1 [[TMP68]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], [[LOOP5:!llvm.loop !.*]]
; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC13]]
; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block7:
; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH8]]
; CHECK: scalar.ph8:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph5:
; CHECK-NEXT: br i1 undef, label [[DOT_PREHEADER_CRIT_EDGE]], label [[DOTLR_PH5]], [[LOOP6:!llvm.loop !.*]]
; CHECK-NEXT: br i1 undef, label [[DOT_PREHEADER_CRIT_EDGE]], label [[DOTLR_PH5]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], [[LOOP7:!llvm.loop !.*]]
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: ._crit_edge.loopexit:
; CHECK-NEXT: br label [[DOT_CRIT_EDGE]]
; CHECK: ._crit_edge:
@ -324,86 +276,62 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE27:%.*]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE21:%.*]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT15]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
; CHECK: pred.load.if16:
; CHECK-NEXT: store i32 [[TMP6]], i32* [[NEXT_GEP]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
; CHECK: pred.store.if16:
; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE17]]
; CHECK: pred.load.continue17:
; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF16]] ]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
; CHECK: pred.load.if18:
; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP7]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]]
; CHECK: pred.store.continue17:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]]
; CHECK: pred.store.if18:
; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE19]]
; CHECK: pred.load.continue19:
; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP14]], [[PRED_LOAD_IF18]] ]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
; CHECK: pred.load.if20:
; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP8]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE19]]
; CHECK: pred.store.continue19:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21]]
; CHECK: pred.store.if20:
; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE21]]
; CHECK: pred.load.continue21:
; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE19]] ], [ [[TMP18]], [[PRED_LOAD_IF20]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store i32 [[TMP7]], i32* [[NEXT_GEP]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
; CHECK: pred.store.if22:
; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP22]]
; CHECK-NEXT: store i32 [[TMP11]], i32* [[NEXT_GEP7]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE23]]
; CHECK: pred.store.continue23:
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
; CHECK: pred.store.if24:
; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP24]]
; CHECK-NEXT: store i32 [[TMP15]], i32* [[NEXT_GEP8]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE25]]
; CHECK: pred.store.continue25:
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27]]
; CHECK: pred.store.if26:
; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP26]]
; CHECK-NEXT: store i32 [[TMP19]], i32* [[NEXT_GEP9]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE27]]
; CHECK: pred.store.continue27:
; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP9]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE21]]
; CHECK: pred.store.continue21:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
; CHECK: .lr.ph:
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], [[LOOP9:!llvm.loop !.*]]
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: ._crit_edge.loopexit:
; CHECK-NEXT: br label [[DOT_CRIT_EDGE]]
; CHECK: ._crit_edge:
@ -487,13 +415,13 @@ define void @example23b(i16* noalias nocapture %src, i32* noalias nocapture %dst
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[TMP7:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[TMP6:%.*]]
; CHECK: 6:
; CHECK-NEXT: br i1 undef, label [[TMP7]], label [[TMP6]], [[LOOP11:!llvm.loop !.*]]
; CHECK-NEXT: br i1 undef, label [[TMP7]], label [[TMP6]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: 7:
; CHECK-NEXT: ret void
;
@ -524,95 +452,71 @@ define void @example23c(i16* noalias nocapture %src, i32* noalias nocapture %dst
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE22:%.*]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[INDUCTION]], <i64 257, i64 257, i64 257, i64 257>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[SRC:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[NEXT_GEP]], align 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP4:%.*]] = phi i16 [ poison, [[VECTOR_BODY]] ], [ [[TMP3]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
; CHECK: pred.load.if11:
; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[NEXT_GEP4]], align 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]]
; CHECK: pred.load.continue12:
; CHECK-NEXT: [[TMP8:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP7]], [[PRED_LOAD_IF11]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
; CHECK: pred.load.if13:
; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[NEXT_GEP5]], align 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]]
; CHECK: pred.load.continue14:
; CHECK-NEXT: [[TMP12:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE12]] ], [ [[TMP11]], [[PRED_LOAD_IF13]] ]
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
; CHECK: pred.load.if15:
; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[NEXT_GEP6]], align 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE16]]
; CHECK: pred.load.continue16:
; CHECK-NEXT: [[TMP16:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP15]], [[PRED_LOAD_IF15]] ]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP18:%.*]] = zext i16 [[TMP4]] to i32
; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i32 [[TMP18]], 7
; CHECK-NEXT: store i32 [[TMP19]], i32* [[NEXT_GEP7]], align 4
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[SRC:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[NEXT_GEP]], align 2
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 7
; CHECK-NEXT: store i32 [[TMP5]], i32* [[NEXT_GEP7]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
; CHECK: pred.store.if17:
; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP8]] to i32
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
; CHECK: pred.store.if11:
; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[NEXT_GEP4]], align 2
; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 7
; CHECK-NEXT: store i32 [[TMP11]], i32* [[NEXT_GEP8]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.continue12:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
; CHECK: pred.store.if13:
; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[NEXT_GEP5]], align 2
; CHECK-NEXT: [[TMP16:%.*]] = zext i16 [[TMP15]] to i32
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 7
; CHECK-NEXT: store i32 [[TMP17]], i32* [[NEXT_GEP9]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
; CHECK: pred.store.continue14:
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]]
; CHECK: pred.store.if15:
; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP20]]
; CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[NEXT_GEP6]], align 2
; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP21]] to i32
; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7
; CHECK-NEXT: store i32 [[TMP23]], i32* [[NEXT_GEP8]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
; CHECK: pred.store.continue18:
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
; CHECK: pred.store.if19:
; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP25]]
; CHECK-NEXT: [[TMP26:%.*]] = zext i16 [[TMP12]] to i32
; CHECK-NEXT: [[TMP27:%.*]] = shl nuw nsw i32 [[TMP26]], 7
; CHECK-NEXT: store i32 [[TMP27]], i32* [[NEXT_GEP9]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
; CHECK: pred.store.continue20:
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22]]
; CHECK: pred.store.if21:
; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP29]]
; CHECK-NEXT: [[TMP30:%.*]] = zext i16 [[TMP16]] to i32
; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 7
; CHECK-NEXT: store i32 [[TMP31]], i32* [[NEXT_GEP10]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
; CHECK: pred.store.continue22:
; CHECK-NEXT: store i32 [[TMP23]], i32* [[NEXT_GEP10]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
; CHECK: pred.store.continue16:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]]
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[TMP34:%.*]], label [[SCALAR_PH]]
; CHECK-NEXT: br i1 true, label [[TMP26:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[TMP33:%.*]]
; CHECK: 33:
; CHECK-NEXT: br i1 undef, label [[TMP34]], label [[TMP33]], [[LOOP13:!llvm.loop !.*]]
; CHECK: 34:
; CHECK-NEXT: br label [[TMP25:%.*]]
; CHECK: 25:
; CHECK-NEXT: br i1 undef, label [[TMP26]], label [[TMP25]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: 26:
; CHECK-NEXT: ret void
;
br label %1

View File

@ -28,7 +28,7 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
;CHECK-NEXT: %[[VMASK:.+]] = icmp ugt <8 x i32> %[[VECIND]], %{{broadcast.splat*}}
;CHECK-NEXT: %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
;CHECK-NEXT: %[[M:.+]] = extractelement <8 x i1> %[[VMASK]], i32 0
;CHECK-NEXT: br i1 %[[M]], label %pred.load.if, label %pred.load.continue
;CHECK-NEXT: br i1 %[[M]], label %pred.store.if, label %pred.store.continue
;CHECK-NOT: %{{.+}} = load <16 x i8>, <16 x i8>* %{{.*}}, align 1
define dso_local void @masked_strided(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {

View File

@ -64,14 +64,18 @@ for.end:
; instead scalarized if Cost-Model so decided as part of its
; sink-scalar-operands optimization for predicated instructions.
;
; SINK-GATHER: vector.body:
; SINK-GATHER: pred.load.if:
; SINK-GATHER: %[[T0:.+]] = load i32, i32* %{{.*}}, align 4
; SINK-GATHER: pred.load.continue:
; SINK-GATHER: %[[T1:.+]] = phi i32 [ poison, %vector.body ], [ %[[T0]], %pred.load.if ]
; SINK-GATHER: pred.udiv.if:
; SINK-GATHER: %{{.*}} = udiv i32 %[[T1]], %{{.*}}
; SINK-GATHER: pred.udiv.continue:
; SINK-GATHER-LABEL: @scalarize_and_sink_gather
; SINK-GATHER: vector.body:
; SINK-GATHER-LABEL: pred.udiv.if: ; preds = %vector.body
; SINK-GATHER-NEXT: [[EXT:%.+]] = extractelement <8 x i64> {{.*}}, i32 0
; SINK-GATHER-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, i32* %a, i64 [[EXT]]
; SINK-GATHER-NEXT: [[LV:%.+]] = load i32, i32* [[GEP]], align 4
; SINK-GATHER-NEXT: [[UDIV:%.+]] = udiv i32 [[LV]], %x
; SINK-GATHER-NEXT: [[INS:%.+]] = insertelement <8 x i32> poison, i32 [[UDIV]], i32 0
; SINK-GATHER-NEXT: br label %pred.udiv.continue
; SINK-GATHER: pred.udiv.continue:
; SINK-GATHER-NEXT: phi i32 [ poison, %vector.body ], [ [[LV]], %pred.udiv.if ]
; SINK-GATHER-NEXT: phi <8 x i32> [ poison, %vector.body ], [ [[INS]], %pred.udiv.if ]
define i32 @scalarize_and_sink_gather(i32* %a, i1 %c, i32 %x, i64 %n) {
entry:
br label %for.body

View File

@ -90,40 +90,26 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
; CHECK-EMPTY:
; CHECK-NEXT: loop.0:
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
; CHECK-NEXT: Successor(s): pred.srem
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.srem: {
; CHECK-NEXT: pred.srem.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.srem.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x>
; CHECK-NEXT: Successor(s): pred.srem.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.srem.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.0.split
; CHECK-EMPTY:
; CHECK-NEXT: loop.0.split:
; CHECK-NEXT: Successor(s): pred.store
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
; CHECK-NEXT: pred.store.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)
; CHECK-NEXT: pred.store.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%add> = add vp<%6>, ir<%recur.next>
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x>
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.1
@ -241,45 +227,30 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8*
; CHECK-EMPTY:
; CHECK-NEXT: loop.1:
; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6>
; CHECK-NEXT: Successor(s): pred.srem
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.srem: {
; CHECK-NEXT: pred.srem.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.srem.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V)
; CHECK-NEXT: Successor(s): pred.srem.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.srem.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.1.split
; CHECK-EMPTY:
; CHECK-NEXT: loop.1.split:
; CHECK: loop.1.split:
; CHECK-NEXT: Successor(s): pred.load
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.load: {
; CHECK: <xVFxUF> pred.load: {
; CHECK-NEXT: pred.load.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.if:
; CHECK: pred.load.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V)
; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V)
; CHECK-NEXT: Successor(s): pred.load.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.continue:
; CHECK: pred.load.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem>
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.2>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.2
; CHECK-EMPTY:
; CHECK-NEXT: loop.2:
; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%9>
; CHECK: loop.2:
; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%10>
; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%11>
; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2>
; CHECK-NEXT: No successors
@ -338,21 +309,6 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK: <xVFxUF> pred.sdiv: {
; CHECK-NEXT: pred.sdiv.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.sdiv.if:
; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6>
; CHECK-NEXT: Successor(s): pred.sdiv.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.sdiv.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%rem.div>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.1.split
; CHECK-EMPTY:
; CHECK-NEXT: loop.1.split:
@ -365,15 +321,18 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
; CHECK-NEXT: CondBit: vp<%3> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6>
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep>
; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem.div>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK: loop.2:
; CHECK-NEXT: Successor(s): loop.2
; CHECK-EMPTY:
; CHECK-NEXT: loop.2:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;

View File

@ -5,9 +5,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Test predication of non-void instructions, specifically (i) that these
; instructions permit vectorization and (ii) the creation of an insertelement
; and a Phi node. We check the full 2-element sequence for the first
; instruction; For the rest we'll just make sure they get predicated based
; on the code generated for the first element.
; and a Phi node. We check the full 2-element sequence for all predicate instructions.
define void @test(i32* nocapture %asd, i32* nocapture %aud,
i32* nocapture %asr, i32* nocapture %aur) {
entry:
@ -25,53 +23,50 @@ for.cond.cleanup: ; preds = %if.end
; CHECK: %[[SDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SD0:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0]], %[[SDA1]]
; CHECK: %[[SD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[SD0]], i32 0
; CHECK: br label %[[ESD]]
; CHECK: [[ESD]]:
; CHECK: %[[SDR:[a-zA-Z0-9]+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[SD1]], %[[CSD]] ]
; CHECK: %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 1
; CHECK: br i1 %[[SDEEH]], label %[[CSDH:[a-zA-Z0-9.]+]], label %[[ESDH:[a-zA-Z0-9.]+]]
; CHECK: [[CSDH]]:
; CHECK: %[[SDA0H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[SDA1H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[SD0H:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0H]], %[[SDA1H]]
; CHECK: %[[SD1H:[a-zA-Z0-9]+]] = insertelement <2 x i32> %[[SDR]], i32 %[[SD0H]], i32 1
; CHECK: br label %[[ESDH]]
; CHECK: [[ESDH]]:
; CHECK: %{{.*}} = phi <2 x i32> [ %[[SDR]], %[[ESD]] ], [ %[[SD1H]], %[[CSDH]] ]
; CHECK: %[[UDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
; CHECK: br i1 %[[UDEE]], label %[[CUD:[a-zA-Z0-9.]+]], label %[[EUD:[a-zA-Z0-9.]+]]
; CHECK: [[CUD]]:
; CHECK: %[[UDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[UDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[UD0:[a-zA-Z0-9]+]] = udiv i32 %[[UDA0]], %[[UDA1]]
; CHECK: %[[UD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[UD0]], i32 0
; CHECK: br label %[[EUD]]
; CHECK: [[EUD]]:
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UD1]], %[[CUD]] ]
; CHECK: %[[SREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
; CHECK: br i1 %[[SREE]], label %[[CSR:[a-zA-Z0-9.]+]], label %[[ESR:[a-zA-Z0-9.]+]]
; CHECK: [[CSR]]:
; CHECK: %[[SRA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SRA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SR0:[a-zA-Z0-9]+]] = srem i32 %[[SRA0]], %[[SRA1]]
; CHECK: %[[SR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[SR0]], i32 0
; CHECK: br label %[[ESR]]
; CHECK: [[ESR]]:
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[SR1]], %[[CSR]] ]
; CHECK: %[[UREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
; CHECK: br i1 %[[UREE]], label %[[CUR:[a-zA-Z0-9.]+]], label %[[EUR:[a-zA-Z0-9.]+]]
; CHECK: [[CUR]]:
; CHECK: %[[URA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[URA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[UR0:[a-zA-Z0-9]+]] = urem i32 %[[URA0]], %[[URA1]]
; CHECK: %[[UR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[UR0]], i32 0
; CHECK: br label %[[EUR]]
; CHECK: [[EUR]]:
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UR1]], %[[CUR]] ]
; CHECK: br label %[[ESD]]
; CHECK: [[ESD]]:
; CHECK: [[SDR:%[a-zA-Z0-9]+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[SD1]], %[[CSD]] ]
; CHECK: [[UDR:%.+]] = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UD1]], %[[CSD]] ]
; CHECK: [[SRR:%.+]] = phi <2 x i32> [ poison, %{{.*}} ], [ %[[SR1]], %[[CSD]] ]
; CHECK: [[URR:%.+]] = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UR1]], %[[CSD]] ]
; CHECK: %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 1
; CHECK: br i1 %[[SDEEH]], label %[[CSDH:[a-zA-Z0-9.]+]], label %[[ESDH:[a-zA-Z0-9.]+]]
; CHECK: [[CSDH]]:
; CHECK: %[[SD1_A0H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[SD1_A1H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[SD1_0H:[a-zA-Z0-9]+]] = sdiv i32 %[[SD1_A0H]], %[[SD1_A1H]]
; CHECK: %[[SD1_1H:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[SDR]], i32 %[[SD1_0H]], i32 1
; CHECK: %[[UD1_A0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[UD1_A1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[UD1_0:[a-zA-Z0-9]+]] = udiv i32 %[[UD1_A0]], %[[UD1_A1]]
; CHECK: %[[UD1_1:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[UDR]], i32 %[[UD1_0]], i32 1
; CHECK: %[[SR1_A0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[SR1_A1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[SR1_0:[a-zA-Z0-9]+]] = srem i32 %[[SR1_A0]], %[[SR1_A1]]
; CHECK: %[[SR1_1:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[SRR]], i32 %[[SR1_0]], i32 1
; CHECK: %[[UR1_A0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[UR1_A1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[UR1_0:[a-zA-Z0-9]+]] = urem i32 %[[UR1_A0]], %[[UR1_A1]]
; CHECK: %[[UR1_1:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[URR]], i32 %[[UR1_0]], i32 1
; CHECK: br label %[[ESDH]]
; CHECK: [[ESDH]]:
; CHECK: [[SDR1:%[a-zA-Z0-9]+]] = phi <2 x i32> [ [[SDR]], %[[ESD]] ], [ %[[SD1_1H]], %[[CSDH]] ]
; CHECK: [[UDR1:%.+]] = phi <2 x i32> [ [[UDR]], %{{.*}} ], [ %[[UD1_1]], %[[CSDH]] ]
; CHECK: [[SRR1:%.+]] = phi <2 x i32> [ [[SRR]], %{{.*}} ], [ %[[SR1_1]], %[[CSDH]] ]
; CHECK: [[URR1:%.+]] = phi <2 x i32> [ [[URR]], %{{.*}} ], [ %[[UR1_1]], %[[CSDH]] ]
;
for.body: ; preds = %if.end, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
%isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv

File diff suppressed because it is too large Load Diff

View File

@ -32,7 +32,7 @@ entry:
define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
; CHECK-LABEL: @reduction_sum(
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP47:%.*]], %pred.load.continue14 ]
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP47:%.*]], %pred.load.continue6 ]
; CHECK: [[TMP44:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND:%.*]]
; CHECK: [[TMP45:%.*]] = add <4 x i32> [[TMP44]], [[TMP23:%.*]]
; CHECK: [[TMP46:%.*]] = add <4 x i32> [[TMP45]], [[TMP43:%.*]]
@ -65,7 +65,7 @@ entry:
define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
; CHECK-LABEL: @reduction_prod(
; CHECK: vector.body:
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
; CHECK: [[TMP44:%.*]] = mul <4 x i32> [[VEC_PHI]], [[TMP23:%.*]]
; CHECK: [[TMP45:%.*]] = mul <4 x i32> [[TMP44]], [[TMP43:%.*]]
; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]]
@ -96,7 +96,7 @@ entry:
define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
; CHECK-LABEL: @reduction_and(
; CHECK: vector.body:
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
; CHECK: [[TMP44:%.*]] = and <4 x i32> [[VEC_PHI]], [[TMP23:%.*]]
; CHECK: [[TMP45:%.*]] = and <4 x i32> [[TMP44]], [[TMP43:%.*]]
; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]]
@ -127,7 +127,7 @@ for.end: ; preds = %for.body, %entry
define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
; CHECK-LABEL: @reduction_or(
; CHECK: vector.body:
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
; CHECK: [[TMP45:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP44:%.*]], <4 x i32> zeroinitializer
; CHECK: [[TMP46]] = or <4 x i32> [[VEC_PHI]], [[TMP45]]
; CHECK: middle.block:
@ -157,7 +157,7 @@ for.end: ; preds = %for.body, %entry
define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
; CHECK-LABEL: @reduction_xor(
; CHECK: vector.body:
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
; CHECK: [[TMP45:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP44:%.*]], <4 x i32> zeroinitializer
; CHECK: [[TMP46]] = xor <4 x i32> [[VEC_PHI]], [[TMP45]]
; CHECK: middle.block:
@ -187,7 +187,7 @@ for.end: ; preds = %for.body, %entry
define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
; CHECK-LABEL: @reduction_fadd(
; CHECK: vector.body:
; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
; CHECK: [[TMP44:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]]
; CHECK: [[TMP45:%.*]] = fadd fast <4 x float> [[TMP44]], [[TMP43:%.*]]
; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]]
@ -218,7 +218,7 @@ for.end: ; preds = %for.body, %entry
define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
; CHECK-LABEL: @reduction_fmul(
; CHECK: vector.body:
; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
; CHECK: [[TMP44:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]]
; CHECK: [[TMP45:%.*]] = fmul fast <4 x float> [[TMP44]], [[TMP43:%.*]]
; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]]

View File

@ -14,23 +14,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK-NEXT: loop:
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
; CHECK-NEXT: EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
; CHECK-NEXT: Successor(s): pred.load
; CHECK: <xVFxUF> pred.load: {
; CHECK-NEXT: pred.load.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-NEXT: CondBit: vp<%2> (loop)
; CHECK: pred.load.if:
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
; CHECK-NEXT: Successor(s): pred.load.continue
; CHECK: pred.load.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.b>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.0
; CHECK: loop.0:
; CHECK-NEXT: Successor(s): pred.store
@ -42,13 +26,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK-NEXT: CondBit: vp<%2> (loop)
; CHECK: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%add> = add vp<%5>, ir<10>
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%lv.b>, ir<10>
; CHECK-NEXT: REPLICATE ir<%mul> = mul ir<2>, ir<%add>
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
; CHECK-NEXT: REPLICATE store ir<%mul>, ir<%gep.a>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK: pred.store.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%lv.b>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
@ -615,61 +602,12 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
; CHECK-NEXT: EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
; CHECK-NEXT: Successor(s): pred.load
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.load: {
; CHECK-NEXT: pred.load.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-NEXT: CondBit: vp<%2> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.if:
; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a>
; CHECK-NEXT: Successor(s): pred.load.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.a>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.0
; CHECK-EMPTY:
; CHECK-NEXT: loop.0:
; CHECK-NEXT: Successor(s): pred.load
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.load: {
; CHECK-NEXT: pred.load.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-NEXT: CondBit: vp<%2> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.if:
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
; CHECK-NEXT: Successor(s): pred.load.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%lv.b>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.1
; CHECK-EMPTY:
; CHECK-NEXT: loop.1:
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
; CHECK-NEXT: pred.store.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-NEXT: CondBit: vp<%2> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%gep.c> = getelementptr ir<@c>, ir<0>, ir<%iv>
; CHECK-NEXT: REPLICATE store vp<%5>, ir<%gep.c>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.2
; CHECK-EMPTY:
; CHECK-NEXT: loop.2:
@ -682,10 +620,17 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
; CHECK-NEXT: CondBit: vp<%2> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep.a>
; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a>
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
; CHECK-NEXT: REPLICATE ir<%gep.c> = getelementptr ir<@c>, ir<0>, ir<%iv>
; CHECK-NEXT: REPLICATE store ir<%lv.a>, ir<%gep.c>
; CHECK-NEXT: REPLICATE store ir<%lv.b>, ir<%gep.a>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%lv.a>
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.b>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.3
@ -695,7 +640,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
; CHECK-NEXT: Successor(s): then.0
; CHECK-EMPTY:
; CHECK-NEXT: then.0:
; CHECK-NEXT: WIDEN ir<%mul> = mul vp<%5>, vp<%8>
; CHECK-NEXT: WIDEN ir<%mul> = mul vp<%10>, vp<%11>
; CHECK-NEXT: EMIT vp<%14> = select vp<%2> ir<%c.0> ir<false>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
@ -764,41 +709,9 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) {
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
; CHECK-NEXT: EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
; CHECK-NEXT: Successor(s): pred.load
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.load: {
; CHECK-NEXT: pred.load.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-NEXT: CondBit: vp<%2> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.if:
; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a>
; CHECK-NEXT: Successor(s): pred.load.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.a>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.0
; CHECK-EMPTY:
; CHECK-NEXT: loop.0:
; CHECK-NEXT: Successor(s): pred.sdiv
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.sdiv: {
; CHECK-NEXT: pred.sdiv.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue
; CHECK-NEXT: CondBit: vp<%2> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.sdiv.if:
; CHECK-NEXT: REPLICATE ir<%div> = sdiv vp<%5>, vp<%5>
; CHECK-NEXT: Successor(s): pred.sdiv.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.sdiv.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%div>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.1
; CHECK-EMPTY:
; CHECK-NEXT: loop.1:
@ -811,10 +724,14 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) {
; CHECK-NEXT: CondBit: vp<%2> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE store vp<%7>, ir<%gep.a>
; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a>
; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%lv.a>, ir<%lv.a>
; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%lv.a>
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%div>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.2
@ -871,22 +788,6 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
; CHECK-NEXT: Successor(s): loop.0
; CHECK-EMPTY:
; CHECK-NEXT: loop.0:
; CHECK-NEXT: Successor(s): pred.sdiv
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.sdiv: {
; CHECK-NEXT: pred.sdiv.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.sdiv.if:
; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%for>, vp<%6>
; CHECK-NEXT: Successor(s): pred.sdiv.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.sdiv.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%div>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.1
; CHECK-EMPTY:
; CHECK-NEXT: loop.1:
@ -899,12 +800,21 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
; CHECK-NEXT: CondBit: vp<%3> (loop)
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep.a>
; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%for>, vp<%6>
; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%div>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.2
; CHECK-EMPTY:
; CHECK-NEXT: loop.2:
; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8>
; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %loop