mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[VPlan] Merge predicated-triangle regions, after sinking.
Sinking scalar operands into predicated-triangle regions may allow merging regions. This patch adds a VPlan-to-VPlan transform that tries to merge predicate-triangle regions after sinking. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D100260
This commit is contained in:
parent
39ed7fe00e
commit
e51ded8676
@ -9298,6 +9298,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
}
|
||||
|
||||
VPlanTransforms::sinkScalarOperands(*Plan);
|
||||
VPlanTransforms::mergeReplicateRegions(*Plan);
|
||||
|
||||
std::string PlanName;
|
||||
raw_string_ostream RSO(PlanName);
|
||||
|
@ -148,3 +148,138 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
/// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return
|
||||
/// the mask.
|
||||
VPValue *getPredicatedMask(VPRegionBlock *R) {
|
||||
auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());
|
||||
if (!EntryBB || EntryBB->size() != 1 ||
|
||||
!isa<VPBranchOnMaskRecipe>(EntryBB->begin()))
|
||||
return nullptr;
|
||||
|
||||
return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);
|
||||
}
|
||||
|
||||
/// If \p R is a triangle region, return the 'then' block of the triangle.
|
||||
static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
|
||||
auto *EntryBB = cast<VPBasicBlock>(R->getEntry());
|
||||
if (EntryBB->getNumSuccessors() != 2)
|
||||
return nullptr;
|
||||
|
||||
auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);
|
||||
auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);
|
||||
if (!Succ0 || !Succ1)
|
||||
return nullptr;
|
||||
|
||||
if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
|
||||
return nullptr;
|
||||
if (Succ0->getSingleSuccessor() == Succ1)
|
||||
return Succ0;
|
||||
if (Succ1->getSingleSuccessor() == Succ0)
|
||||
return Succ1;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
|
||||
SetVector<VPRegionBlock *> DeletedRegions;
|
||||
bool Changed = false;
|
||||
|
||||
// Collect region blocks to process up-front, to avoid iterator invalidation
|
||||
// issues while merging regions.
|
||||
SmallVector<VPRegionBlock *, 8> CandidateRegions(
|
||||
VPBlockUtils::blocksOnly<VPRegionBlock>(depth_first(
|
||||
VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry()))));
|
||||
|
||||
// Check if Base is a predicated triangle, followed by an empty block,
|
||||
// followed by another predicate triangle. If that's the case, move the
|
||||
// recipes from the first to the second triangle.
|
||||
for (VPRegionBlock *Region1 : CandidateRegions) {
|
||||
if (DeletedRegions.contains(Region1))
|
||||
continue;
|
||||
auto *MiddleBasicBlock =
|
||||
dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
|
||||
if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
|
||||
continue;
|
||||
|
||||
auto *Region2 =
|
||||
dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
|
||||
if (!Region2)
|
||||
continue;
|
||||
|
||||
VPValue *Mask1 = getPredicatedMask(Region1);
|
||||
VPValue *Mask2 = getPredicatedMask(Region2);
|
||||
if (!Mask1 || Mask1 != Mask2)
|
||||
continue;
|
||||
VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
|
||||
VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
|
||||
if (!Then1 || !Then2)
|
||||
continue;
|
||||
|
||||
assert(Mask1 && Mask2 && "both region must have conditions");
|
||||
|
||||
// Note: No fusion-preventing memory dependencies are expected in either
|
||||
// region. Such dependencies should be rejected during earlier dependence
|
||||
// checks, which guarantee accesses can be re-ordered for vectorization.
|
||||
//
|
||||
// If a recipe is used by a first-order recurrence phi, we cannot move it at
|
||||
// the moment: a recipe R feeding a first order recurrence phi must allow
|
||||
// for a *vector* shuffle to be inserted immediately after it, and therefore
|
||||
// if R is *scalarized and predicated* it must appear last in its basic
|
||||
// block. In addition, other recipes may need to "sink after" R, so best if
|
||||
// R not be moved at all.
|
||||
auto IsImmovableRecipe = [](VPRecipeBase &R) {
|
||||
assert(R.getNumDefinedValues() <= 1 &&
|
||||
"no multi-defs are expected in predicated blocks");
|
||||
for (VPUser *U : R.getVPValue()->users()) {
|
||||
auto *UI = dyn_cast<VPRecipeBase>(U);
|
||||
if (!UI)
|
||||
continue;
|
||||
auto *PhiR = dyn_cast<VPWidenPHIRecipe>(UI);
|
||||
if (PhiR && !PhiR->getRecurrenceDescriptor())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
if (any_of(*Then1, IsImmovableRecipe))
|
||||
continue;
|
||||
|
||||
// Move recipes to the successor region.
|
||||
for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))
|
||||
ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());
|
||||
|
||||
auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor());
|
||||
auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor());
|
||||
|
||||
// Move VPPredInstPHIRecipes from the merge block to the successor region's
|
||||
// merge block. Update all users inside the successor region to use the
|
||||
// original values.
|
||||
for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
|
||||
VPValue *PredInst1 =
|
||||
cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
|
||||
for (VPUser *U : Phi1ToMove.getVPValue()->users()) {
|
||||
auto *UI = dyn_cast<VPRecipeBase>(U);
|
||||
if (!UI || UI->getParent() != Then2)
|
||||
continue;
|
||||
for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) {
|
||||
if (Phi1ToMove.getVPValue() != U->getOperand(I))
|
||||
continue;
|
||||
U->setOperand(I, PredInst1);
|
||||
}
|
||||
}
|
||||
|
||||
Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
|
||||
}
|
||||
|
||||
// Finally, remove the first region.
|
||||
for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) {
|
||||
VPBlockUtils::disconnectBlocks(Pred, Region1);
|
||||
VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock);
|
||||
}
|
||||
VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock);
|
||||
DeletedRegions.insert(Region1);
|
||||
}
|
||||
|
||||
for (VPRegionBlock *ToDelete : DeletedRegions)
|
||||
delete ToDelete;
|
||||
return Changed;
|
||||
}
|
||||
|
@ -30,6 +30,8 @@ struct VPlanTransforms {
|
||||
SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE);
|
||||
|
||||
static bool sinkScalarOperands(VPlan &Plan);
|
||||
|
||||
static bool mergeReplicateRegions(VPlan &Plan);
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
@ -89,38 +89,28 @@ attributes #0 = { "target-cpu"="knl" }
|
||||
; FORCE: vector.body:
|
||||
; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE4:%.*]] ]
|
||||
; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE4]] ]
|
||||
; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], <i32 2, i32 2>
|
||||
; FORCE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; FORCE: pred.store.if:
|
||||
; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1
|
||||
; FORCE-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
; FORCE: pred.store.continue:
|
||||
; FORCE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
|
||||
; FORCE-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
|
||||
; FORCE: pred.store.if1:
|
||||
; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1
|
||||
; FORCE-NEXT: br label [[PRED_STORE_CONTINUE2]]
|
||||
; FORCE: pred.store.continue2:
|
||||
; FORCE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; FORCE-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
||||
; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
||||
; FORCE: pred.load.if:
|
||||
; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1
|
||||
; FORCE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP0]]
|
||||
; FORCE-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 1
|
||||
; FORCE-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
|
||||
; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
||||
; FORCE: pred.load.continue:
|
||||
; FORCE-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
|
||||
; FORCE-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
|
||||
; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
|
||||
; FORCE-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]]
|
||||
; FORCE: pred.load.if3:
|
||||
; FORCE: pred.load.if1:
|
||||
; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1
|
||||
; FORCE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP1]]
|
||||
; FORCE-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 1
|
||||
; FORCE-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i32 1
|
||||
; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE4]]
|
||||
; FORCE: pred.load.continue4:
|
||||
; FORCE: pred.load.continue2:
|
||||
; FORCE-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF3]] ]
|
||||
; FORCE-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
|
||||
; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
|
||||
|
@ -40,13 +40,13 @@ define void @example1() optsize {
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 16
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label [[TMP10:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK-NEXT: br label [[TMP9:%.*]]
|
||||
; CHECK: 9:
|
||||
; CHECK-NEXT: br i1 undef, label [[TMP10]], label [[TMP9]], [[LOOP2:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: br i1 undef, label [[TMP10]], label [[TMP9]], !llvm.loop [[LOOP2:![0-9]+]]
|
||||
; CHECK: 10:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -123,7 +123,7 @@ define void @example2(i32 %n, i32 %x) optsize {
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label [[DOT_PREHEADER_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK: scalar.ph:
|
||||
@ -146,122 +146,74 @@ define void @example2(i32 %n, i32 %x) optsize {
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT21:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT20]], <4 x i64> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]]
|
||||
; CHECK: vector.body9:
|
||||
; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT15:%.*]], [[PRED_STORE_CONTINUE51:%.*]] ]
|
||||
; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT15:%.*]], [[PRED_STORE_CONTINUE37:%.*]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX14]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 1
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX14]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT28]], <4 x i64> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT29]], <i64 0, i64 1, i64 2, i64 3>
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT21]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
||||
; CHECK: pred.load.if:
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
||||
; CHECK: pred.load.continue:
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = phi i32 [ poison, [[VECTOR_BODY9]] ], [ [[TMP26]], [[PRED_LOAD_IF]] ]
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
|
||||
; CHECK: pred.load.if30:
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP20]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE31]]
|
||||
; CHECK: pred.load.continue31:
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP30]], [[PRED_LOAD_IF30]] ]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP23]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33:%.*]]
|
||||
; CHECK: pred.load.if32:
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE33]]
|
||||
; CHECK: pred.load.continue33:
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE31]] ], [ [[TMP34]], [[PRED_LOAD_IF32]] ]
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP23]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_LOAD_IF34:%.*]], label [[PRED_LOAD_CONTINUE35:%.*]]
|
||||
; CHECK: pred.load.if34:
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP22]]
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE35]]
|
||||
; CHECK: pred.load.continue35:
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE33]] ], [ [[TMP38]], [[PRED_LOAD_IF34]] ]
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP40]], label [[PRED_LOAD_IF36:%.*]], label [[PRED_LOAD_CONTINUE37:%.*]]
|
||||
; CHECK: pred.load.if36:
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE37]]
|
||||
; CHECK: pred.load.continue37:
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE35]] ], [ [[TMP42]], [[PRED_LOAD_IF36]] ]
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF38:%.*]], label [[PRED_LOAD_CONTINUE39:%.*]]
|
||||
; CHECK: pred.load.if38:
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP20]]
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE39]]
|
||||
; CHECK: pred.load.continue39:
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE37]] ], [ [[TMP46]], [[PRED_LOAD_IF38]] ]
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x i1> [[TMP23]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP48]], label [[PRED_LOAD_IF40:%.*]], label [[PRED_LOAD_CONTINUE41:%.*]]
|
||||
; CHECK: pred.load.if40:
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE41]]
|
||||
; CHECK: pred.load.continue41:
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE39]] ], [ [[TMP50]], [[PRED_LOAD_IF40]] ]
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP23]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP52]], label [[PRED_LOAD_IF42:%.*]], label [[PRED_LOAD_CONTINUE43:%.*]]
|
||||
; CHECK: pred.load.if42:
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP22]]
|
||||
; CHECK-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE43]]
|
||||
; CHECK: pred.load.continue43:
|
||||
; CHECK-NEXT: [[TMP55:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE41]] ], [ [[TMP54]], [[PRED_LOAD_IF42]] ]
|
||||
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP56]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]]
|
||||
; CHECK: pred.store.if44:
|
||||
; CHECK-NEXT: [[TMP57:%.*]] = and i32 [[TMP43]], [[TMP27]]
|
||||
; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: store i32 [[TMP57]], i32* [[TMP58]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE45]]
|
||||
; CHECK: pred.store.continue45:
|
||||
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP59]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]]
|
||||
; CHECK: pred.store.if46:
|
||||
; CHECK-NEXT: [[TMP60:%.*]] = and i32 [[TMP47]], [[TMP31]]
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP20]]
|
||||
; CHECK-NEXT: store i32 [[TMP60]], i32* [[TMP61]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE47]]
|
||||
; CHECK: pred.store.continue47:
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <4 x i1> [[TMP23]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP62]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]]
|
||||
; CHECK: pred.store.if48:
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = and i32 [[TMP51]], [[TMP35]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP21]]
|
||||
; CHECK-NEXT: store i32 [[TMP63]], i32* [[TMP64]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE49]]
|
||||
; CHECK: pred.store.continue49:
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = extractelement <4 x i1> [[TMP23]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP65]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51]]
|
||||
; CHECK: pred.store.if50:
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = and i32 [[TMP55]], [[TMP39]]
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP22]]
|
||||
; CHECK-NEXT: store i32 [[TMP66]], i32* [[TMP67]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE51]]
|
||||
; CHECK: pred.store.continue51:
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT21]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]]
|
||||
; CHECK: pred.store.if30:
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], [[TMP23]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE31]]
|
||||
; CHECK: pred.store.continue31:
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]]
|
||||
; CHECK: pred.store.if32:
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 1
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP29]]
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP29]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], [[TMP31]]
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP29]]
|
||||
; CHECK-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE33]]
|
||||
; CHECK: pred.store.continue33:
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35:%.*]]
|
||||
; CHECK: pred.store.if34:
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP37]]
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP37]]
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[TMP39]]
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP37]]
|
||||
; CHECK-NEXT: store i32 [[TMP42]], i32* [[TMP43]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE35]]
|
||||
; CHECK: pred.store.continue35:
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37]]
|
||||
; CHECK: pred.store.if36:
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP45]]
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP45]]
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP49]], [[TMP47]]
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP45]]
|
||||
; CHECK-NEXT: store i32 [[TMP50]], i32* [[TMP51]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE37]]
|
||||
; CHECK: pred.store.continue37:
|
||||
; CHECK-NEXT: [[INDEX_NEXT15]] = add i64 [[INDEX14]], 4
|
||||
; CHECK-NEXT: [[TMP68:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC13]]
|
||||
; CHECK-NEXT: br i1 [[TMP68]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], [[LOOP5:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC13]]
|
||||
; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], !llvm.loop [[LOOP5:![0-9]+]]
|
||||
; CHECK: middle.block7:
|
||||
; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH8]]
|
||||
; CHECK: scalar.ph8:
|
||||
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
|
||||
; CHECK: .lr.ph5:
|
||||
; CHECK-NEXT: br i1 undef, label [[DOT_PREHEADER_CRIT_EDGE]], label [[DOTLR_PH5]], [[LOOP6:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: br i1 undef, label [[DOT_PREHEADER_CRIT_EDGE]], label [[DOTLR_PH5]], !llvm.loop [[LOOP6:![0-9]+]]
|
||||
; CHECK: .lr.ph:
|
||||
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], [[LOOP7:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]]
|
||||
; CHECK: ._crit_edge.loopexit:
|
||||
; CHECK-NEXT: br label [[DOT_CRIT_EDGE]]
|
||||
; CHECK: ._crit_edge:
|
||||
@ -324,86 +276,62 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE27:%.*]] ]
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE21:%.*]] ]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT15]], <i64 0, i64 1, i64 2, i64 3>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
||||
; CHECK: pred.load.if:
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; CHECK: pred.store.if:
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
||||
; CHECK: pred.load.continue:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
|
||||
; CHECK: pred.load.if16:
|
||||
; CHECK-NEXT: store i32 [[TMP6]], i32* [[NEXT_GEP]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
; CHECK: pred.store.continue:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
|
||||
; CHECK: pred.store.if16:
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE17]]
|
||||
; CHECK: pred.load.continue17:
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF16]] ]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
|
||||
; CHECK: pred.load.if18:
|
||||
; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP7]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]]
|
||||
; CHECK: pred.store.continue17:
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]]
|
||||
; CHECK: pred.store.if18:
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE19]]
|
||||
; CHECK: pred.load.continue19:
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP14]], [[PRED_LOAD_IF18]] ]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
|
||||
; CHECK: pred.load.if20:
|
||||
; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP8]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE19]]
|
||||
; CHECK: pred.store.continue19:
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21]]
|
||||
; CHECK: pred.store.if20:
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE21]]
|
||||
; CHECK: pred.load.continue21:
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE19]] ], [ [[TMP18]], [[PRED_LOAD_IF20]] ]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; CHECK: pred.store.if:
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 [[TMP7]], i32* [[NEXT_GEP]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
; CHECK: pred.store.continue:
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
|
||||
; CHECK: pred.store.if22:
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP22]]
|
||||
; CHECK-NEXT: store i32 [[TMP11]], i32* [[NEXT_GEP7]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE23]]
|
||||
; CHECK: pred.store.continue23:
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
|
||||
; CHECK: pred.store.if24:
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP24]]
|
||||
; CHECK-NEXT: store i32 [[TMP15]], i32* [[NEXT_GEP8]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE25]]
|
||||
; CHECK: pred.store.continue25:
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27]]
|
||||
; CHECK: pred.store.if26:
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP26]]
|
||||
; CHECK-NEXT: store i32 [[TMP19]], i32* [[NEXT_GEP9]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE27]]
|
||||
; CHECK: pred.store.continue27:
|
||||
; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP9]], align 16
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE21]]
|
||||
; CHECK: pred.store.continue21:
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
|
||||
; CHECK: .lr.ph:
|
||||
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], [[LOOP9:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]]
|
||||
; CHECK: ._crit_edge.loopexit:
|
||||
; CHECK-NEXT: br label [[DOT_CRIT_EDGE]]
|
||||
; CHECK: ._crit_edge:
|
||||
@ -487,13 +415,13 @@ define void @example23b(i16* noalias nocapture %src, i32* noalias nocapture %dst
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label [[TMP7:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK-NEXT: br label [[TMP6:%.*]]
|
||||
; CHECK: 6:
|
||||
; CHECK-NEXT: br i1 undef, label [[TMP7]], label [[TMP6]], [[LOOP11:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: br i1 undef, label [[TMP7]], label [[TMP6]], !llvm.loop [[LOOP11:![0-9]+]]
|
||||
; CHECK: 7:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -524,95 +452,71 @@ define void @example23c(i16* noalias nocapture %src, i32* noalias nocapture %dst
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE22:%.*]] ]
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[INDUCTION:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[INDUCTION]], <i64 257, i64 257, i64 257, i64 257>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
||||
; CHECK: pred.load.if:
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[SRC:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[NEXT_GEP]], align 2
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
||||
; CHECK: pred.load.continue:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i16 [ poison, [[VECTOR_BODY]] ], [ [[TMP3]], [[PRED_LOAD_IF]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
|
||||
; CHECK: pred.load.if11:
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[NEXT_GEP4]], align 2
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]]
|
||||
; CHECK: pred.load.continue12:
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP7]], [[PRED_LOAD_IF11]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
|
||||
; CHECK: pred.load.if13:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[NEXT_GEP5]], align 2
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]]
|
||||
; CHECK: pred.load.continue14:
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE12]] ], [ [[TMP11]], [[PRED_LOAD_IF13]] ]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
|
||||
; CHECK: pred.load.if15:
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[NEXT_GEP6]], align 2
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE16]]
|
||||
; CHECK: pred.load.continue16:
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP15]], [[PRED_LOAD_IF15]] ]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; CHECK: pred.store.if:
|
||||
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = zext i16 [[TMP4]] to i32
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i32 [[TMP18]], 7
|
||||
; CHECK-NEXT: store i32 [[TMP19]], i32* [[NEXT_GEP7]], align 4
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[SRC:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[NEXT_GEP]], align 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 7
|
||||
; CHECK-NEXT: store i32 [[TMP5]], i32* [[NEXT_GEP7]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
; CHECK: pred.store.continue:
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
|
||||
; CHECK: pred.store.if17:
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP8]] to i32
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
|
||||
; CHECK: pred.store.if11:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[NEXT_GEP4]], align 2
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 7
|
||||
; CHECK-NEXT: store i32 [[TMP11]], i32* [[NEXT_GEP8]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
|
||||
; CHECK: pred.store.continue12:
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
|
||||
; CHECK: pred.store.if13:
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP13]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[NEXT_GEP5]], align 2
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = zext i16 [[TMP15]] to i32
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 7
|
||||
; CHECK-NEXT: store i32 [[TMP17]], i32* [[NEXT_GEP9]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
|
||||
; CHECK: pred.store.continue14:
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]]
|
||||
; CHECK: pred.store.if15:
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP20]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[NEXT_GEP6]], align 2
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP21]] to i32
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7
|
||||
; CHECK-NEXT: store i32 [[TMP23]], i32* [[NEXT_GEP8]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
|
||||
; CHECK: pred.store.continue18:
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
|
||||
; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
|
||||
; CHECK: pred.store.if19:
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP25]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = zext i16 [[TMP12]] to i32
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = shl nuw nsw i32 [[TMP26]], 7
|
||||
; CHECK-NEXT: store i32 [[TMP27]], i32* [[NEXT_GEP9]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
|
||||
; CHECK: pred.store.continue20:
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22]]
|
||||
; CHECK: pred.store.if21:
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP29]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = zext i16 [[TMP16]] to i32
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 7
|
||||
; CHECK-NEXT: store i32 [[TMP31]], i32* [[NEXT_GEP10]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
|
||||
; CHECK: pred.store.continue22:
|
||||
; CHECK-NEXT: store i32 [[TMP23]], i32* [[NEXT_GEP10]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
|
||||
; CHECK: pred.store.continue16:
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
|
||||
; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
|
||||
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: br i1 true, label [[TMP34:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK-NEXT: br i1 true, label [[TMP26:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK-NEXT: br label [[TMP33:%.*]]
|
||||
; CHECK: 33:
|
||||
; CHECK-NEXT: br i1 undef, label [[TMP34]], label [[TMP33]], [[LOOP13:!llvm.loop !.*]]
|
||||
; CHECK: 34:
|
||||
; CHECK-NEXT: br label [[TMP25:%.*]]
|
||||
; CHECK: 25:
|
||||
; CHECK-NEXT: br i1 undef, label [[TMP26]], label [[TMP25]], !llvm.loop [[LOOP13:![0-9]+]]
|
||||
; CHECK: 26:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
br label %1
|
||||
|
@ -28,7 +28,7 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
|
||||
;CHECK-NEXT: %[[VMASK:.+]] = icmp ugt <8 x i32> %[[VECIND]], %{{broadcast.splat*}}
|
||||
;CHECK-NEXT: %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
;CHECK-NEXT: %[[M:.+]] = extractelement <8 x i1> %[[VMASK]], i32 0
|
||||
;CHECK-NEXT: br i1 %[[M]], label %pred.load.if, label %pred.load.continue
|
||||
;CHECK-NEXT: br i1 %[[M]], label %pred.store.if, label %pred.store.continue
|
||||
;CHECK-NOT: %{{.+}} = load <16 x i8>, <16 x i8>* %{{.*}}, align 1
|
||||
|
||||
define dso_local void @masked_strided(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
|
||||
|
@ -64,14 +64,18 @@ for.end:
|
||||
; instead scalarized if Cost-Model so decided as part of its
|
||||
; sink-scalar-operands optimization for predicated instructions.
|
||||
;
|
||||
; SINK-GATHER-LABEL: @scalarize_and_sink_gather
|
||||
; SINK-GATHER: vector.body:
|
||||
; SINK-GATHER: pred.load.if:
|
||||
; SINK-GATHER: %[[T0:.+]] = load i32, i32* %{{.*}}, align 4
|
||||
; SINK-GATHER: pred.load.continue:
|
||||
; SINK-GATHER: %[[T1:.+]] = phi i32 [ poison, %vector.body ], [ %[[T0]], %pred.load.if ]
|
||||
; SINK-GATHER: pred.udiv.if:
|
||||
; SINK-GATHER: %{{.*}} = udiv i32 %[[T1]], %{{.*}}
|
||||
; SINK-GATHER-LABEL: pred.udiv.if: ; preds = %vector.body
|
||||
; SINK-GATHER-NEXT: [[EXT:%.+]] = extractelement <8 x i64> {{.*}}, i32 0
|
||||
; SINK-GATHER-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, i32* %a, i64 [[EXT]]
|
||||
; SINK-GATHER-NEXT: [[LV:%.+]] = load i32, i32* [[GEP]], align 4
|
||||
; SINK-GATHER-NEXT: [[UDIV:%.+]] = udiv i32 [[LV]], %x
|
||||
; SINK-GATHER-NEXT: [[INS:%.+]] = insertelement <8 x i32> poison, i32 [[UDIV]], i32 0
|
||||
; SINK-GATHER-NEXT: br label %pred.udiv.continue
|
||||
; SINK-GATHER: pred.udiv.continue:
|
||||
; SINK-GATHER-NEXT: phi i32 [ poison, %vector.body ], [ [[LV]], %pred.udiv.if ]
|
||||
; SINK-GATHER-NEXT: phi <8 x i32> [ poison, %vector.body ], [ [[INS]], %pred.udiv.if ]
|
||||
define i32 @scalarize_and_sink_gather(i32* %a, i1 %c, i32 %x, i64 %n) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
@ -90,22 +90,6 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.0:
|
||||
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
|
||||
; CHECK-NEXT: Successor(s): pred.srem
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.srem: {
|
||||
; CHECK-NEXT: pred.srem.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
|
||||
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.srem.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x>
|
||||
; CHECK-NEXT: Successor(s): pred.srem.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.srem.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.0.split
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.0.split:
|
||||
@ -118,12 +102,14 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%add> = add vp<%6>, ir<%recur.next>
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x>
|
||||
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.1
|
||||
@ -241,45 +227,30 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8*
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1:
|
||||
; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6>
|
||||
; CHECK-NEXT: Successor(s): pred.srem
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.srem: {
|
||||
; CHECK-NEXT: pred.srem.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
|
||||
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.srem.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V)
|
||||
; CHECK-NEXT: Successor(s): pred.srem.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.srem.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.1.split
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1.split:
|
||||
|
||||
; CHECK: loop.1.split:
|
||||
; CHECK-NEXT: Successor(s): pred.load
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.load: {
|
||||
|
||||
; CHECK: <xVFxUF> pred.load: {
|
||||
; CHECK-NEXT: pred.load.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
|
||||
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.load.if:
|
||||
|
||||
; CHECK: pred.load.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V)
|
||||
; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V)
|
||||
; CHECK-NEXT: Successor(s): pred.load.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.load.continue:
|
||||
|
||||
; CHECK: pred.load.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.2>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.2
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.2:
|
||||
; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%9>
|
||||
|
||||
; CHECK: loop.2:
|
||||
; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%10>
|
||||
; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%11>
|
||||
; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2>
|
||||
; CHECK-NEXT: No successors
|
||||
@ -338,21 +309,6 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
|
||||
; CHECK: <xVFxUF> pred.sdiv: {
|
||||
; CHECK-NEXT: pred.sdiv.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
|
||||
; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.sdiv.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6>
|
||||
; CHECK-NEXT: Successor(s): pred.sdiv.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.sdiv.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%rem.div>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.1.split
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1.split:
|
||||
@ -365,15 +321,18 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep>
|
||||
; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem.div>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
|
||||
; CHECK: loop.2:
|
||||
; CHECK-NEXT: Successor(s): loop.2
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.2:
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
;
|
||||
|
@ -5,9 +5,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; Test predication of non-void instructions, specifically (i) that these
|
||||
; instructions permit vectorization and (ii) the creation of an insertelement
|
||||
; and a Phi node. We check the full 2-element sequence for the first
|
||||
; instruction; For the rest we'll just make sure they get predicated based
|
||||
; on the code generated for the first element.
|
||||
; and a Phi node. We check the full 2-element sequence for all predicate instructions.
|
||||
define void @test(i32* nocapture %asd, i32* nocapture %aud,
|
||||
i32* nocapture %asr, i32* nocapture %aur) {
|
||||
entry:
|
||||
@ -25,53 +23,50 @@ for.cond.cleanup: ; preds = %if.end
|
||||
; CHECK: %[[SDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
||||
; CHECK: %[[SD0:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0]], %[[SDA1]]
|
||||
; CHECK: %[[SD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[SD0]], i32 0
|
||||
; CHECK: br label %[[ESD]]
|
||||
; CHECK: [[ESD]]:
|
||||
; CHECK: %[[SDR:[a-zA-Z0-9]+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[SD1]], %[[CSD]] ]
|
||||
; CHECK: %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 1
|
||||
; CHECK: br i1 %[[SDEEH]], label %[[CSDH:[a-zA-Z0-9.]+]], label %[[ESDH:[a-zA-Z0-9.]+]]
|
||||
; CHECK: [[CSDH]]:
|
||||
; CHECK: %[[SDA0H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
||||
; CHECK: %[[SDA1H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
||||
; CHECK: %[[SD0H:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0H]], %[[SDA1H]]
|
||||
; CHECK: %[[SD1H:[a-zA-Z0-9]+]] = insertelement <2 x i32> %[[SDR]], i32 %[[SD0H]], i32 1
|
||||
; CHECK: br label %[[ESDH]]
|
||||
; CHECK: [[ESDH]]:
|
||||
; CHECK: %{{.*}} = phi <2 x i32> [ %[[SDR]], %[[ESD]] ], [ %[[SD1H]], %[[CSDH]] ]
|
||||
|
||||
; CHECK: %[[UDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
|
||||
; CHECK: br i1 %[[UDEE]], label %[[CUD:[a-zA-Z0-9.]+]], label %[[EUD:[a-zA-Z0-9.]+]]
|
||||
; CHECK: [[CUD]]:
|
||||
; CHECK: %[[UDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
||||
; CHECK: %[[UDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
||||
; CHECK: %[[UD0:[a-zA-Z0-9]+]] = udiv i32 %[[UDA0]], %[[UDA1]]
|
||||
; CHECK: %[[UD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[UD0]], i32 0
|
||||
; CHECK: br label %[[EUD]]
|
||||
; CHECK: [[EUD]]:
|
||||
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UD1]], %[[CUD]] ]
|
||||
|
||||
; CHECK: %[[SREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
|
||||
; CHECK: br i1 %[[SREE]], label %[[CSR:[a-zA-Z0-9.]+]], label %[[ESR:[a-zA-Z0-9.]+]]
|
||||
; CHECK: [[CSR]]:
|
||||
; CHECK: %[[SRA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
||||
; CHECK: %[[SRA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
||||
; CHECK: %[[SR0:[a-zA-Z0-9]+]] = srem i32 %[[SRA0]], %[[SRA1]]
|
||||
; CHECK: %[[SR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[SR0]], i32 0
|
||||
; CHECK: br label %[[ESR]]
|
||||
; CHECK: [[ESR]]:
|
||||
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[SR1]], %[[CSR]] ]
|
||||
|
||||
; CHECK: %[[UREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
|
||||
; CHECK: br i1 %[[UREE]], label %[[CUR:[a-zA-Z0-9.]+]], label %[[EUR:[a-zA-Z0-9.]+]]
|
||||
; CHECK: [[CUR]]:
|
||||
; CHECK: %[[URA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
||||
; CHECK: %[[URA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
|
||||
; CHECK: %[[UR0:[a-zA-Z0-9]+]] = urem i32 %[[URA0]], %[[URA1]]
|
||||
; CHECK: %[[UR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[UR0]], i32 0
|
||||
; CHECK: br label %[[EUR]]
|
||||
; CHECK: [[EUR]]:
|
||||
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UR1]], %[[CUR]] ]
|
||||
|
||||
; CHECK: br label %[[ESD]]
|
||||
; CHECK: [[ESD]]:
|
||||
; CHECK: [[SDR:%[a-zA-Z0-9]+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[SD1]], %[[CSD]] ]
|
||||
; CHECK: [[UDR:%.+]] = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UD1]], %[[CSD]] ]
|
||||
; CHECK: [[SRR:%.+]] = phi <2 x i32> [ poison, %{{.*}} ], [ %[[SR1]], %[[CSD]] ]
|
||||
; CHECK: [[URR:%.+]] = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UR1]], %[[CSD]] ]
|
||||
; CHECK: %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 1
|
||||
; CHECK: br i1 %[[SDEEH]], label %[[CSDH:[a-zA-Z0-9.]+]], label %[[ESDH:[a-zA-Z0-9.]+]]
|
||||
; CHECK: [[CSDH]]:
|
||||
; CHECK: %[[SD1_A0H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
||||
; CHECK: %[[SD1_A1H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
||||
; CHECK: %[[SD1_0H:[a-zA-Z0-9]+]] = sdiv i32 %[[SD1_A0H]], %[[SD1_A1H]]
|
||||
; CHECK: %[[SD1_1H:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[SDR]], i32 %[[SD1_0H]], i32 1
|
||||
; CHECK: %[[UD1_A0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
||||
; CHECK: %[[UD1_A1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
||||
; CHECK: %[[UD1_0:[a-zA-Z0-9]+]] = udiv i32 %[[UD1_A0]], %[[UD1_A1]]
|
||||
; CHECK: %[[UD1_1:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[UDR]], i32 %[[UD1_0]], i32 1
|
||||
; CHECK: %[[SR1_A0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
||||
; CHECK: %[[SR1_A1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
||||
; CHECK: %[[SR1_0:[a-zA-Z0-9]+]] = srem i32 %[[SR1_A0]], %[[SR1_A1]]
|
||||
; CHECK: %[[SR1_1:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[SRR]], i32 %[[SR1_0]], i32 1
|
||||
; CHECK: %[[UR1_A0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
||||
; CHECK: %[[UR1_A1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
|
||||
; CHECK: %[[UR1_0:[a-zA-Z0-9]+]] = urem i32 %[[UR1_A0]], %[[UR1_A1]]
|
||||
; CHECK: %[[UR1_1:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[URR]], i32 %[[UR1_0]], i32 1
|
||||
; CHECK: br label %[[ESDH]]
|
||||
; CHECK: [[ESDH]]:
|
||||
; CHECK: [[SDR1:%[a-zA-Z0-9]+]] = phi <2 x i32> [ [[SDR]], %[[ESD]] ], [ %[[SD1_1H]], %[[CSDH]] ]
|
||||
; CHECK: [[UDR1:%.+]] = phi <2 x i32> [ [[UDR]], %{{.*}} ], [ %[[UD1_1]], %[[CSDH]] ]
|
||||
; CHECK: [[SRR1:%.+]] = phi <2 x i32> [ [[SRR]], %{{.*}} ], [ %[[SR1_1]], %[[CSDH]] ]
|
||||
; CHECK: [[URR1:%.+]] = phi <2 x i32> [ [[URR]], %{{.*}} ], [ %[[UR1_1]], %[[CSDH]] ]
|
||||
;
|
||||
for.body: ; preds = %if.end, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
|
||||
%isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -32,7 +32,7 @@ entry:
|
||||
|
||||
define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
|
||||
; CHECK-LABEL: @reduction_sum(
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP47:%.*]], %pred.load.continue14 ]
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP47:%.*]], %pred.load.continue6 ]
|
||||
; CHECK: [[TMP44:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND:%.*]]
|
||||
; CHECK: [[TMP45:%.*]] = add <4 x i32> [[TMP44]], [[TMP23:%.*]]
|
||||
; CHECK: [[TMP46:%.*]] = add <4 x i32> [[TMP45]], [[TMP43:%.*]]
|
||||
@ -65,7 +65,7 @@ entry:
|
||||
define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
|
||||
; CHECK-LABEL: @reduction_prod(
|
||||
; CHECK: vector.body:
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
|
||||
; CHECK: [[TMP44:%.*]] = mul <4 x i32> [[VEC_PHI]], [[TMP23:%.*]]
|
||||
; CHECK: [[TMP45:%.*]] = mul <4 x i32> [[TMP44]], [[TMP43:%.*]]
|
||||
; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]]
|
||||
@ -96,7 +96,7 @@ entry:
|
||||
define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
|
||||
; CHECK-LABEL: @reduction_and(
|
||||
; CHECK: vector.body:
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
|
||||
; CHECK: [[TMP44:%.*]] = and <4 x i32> [[VEC_PHI]], [[TMP23:%.*]]
|
||||
; CHECK: [[TMP45:%.*]] = and <4 x i32> [[TMP44]], [[TMP43:%.*]]
|
||||
; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]]
|
||||
@ -127,7 +127,7 @@ for.end: ; preds = %for.body, %entry
|
||||
define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
|
||||
; CHECK-LABEL: @reduction_or(
|
||||
; CHECK: vector.body:
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
|
||||
; CHECK: [[TMP45:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP44:%.*]], <4 x i32> zeroinitializer
|
||||
; CHECK: [[TMP46]] = or <4 x i32> [[VEC_PHI]], [[TMP45]]
|
||||
; CHECK: middle.block:
|
||||
@ -157,7 +157,7 @@ for.end: ; preds = %for.body, %entry
|
||||
define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
|
||||
; CHECK-LABEL: @reduction_xor(
|
||||
; CHECK: vector.body:
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
|
||||
; CHECK: [[TMP45:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP44:%.*]], <4 x i32> zeroinitializer
|
||||
; CHECK: [[TMP46]] = xor <4 x i32> [[VEC_PHI]], [[TMP45]]
|
||||
; CHECK: middle.block:
|
||||
@ -187,7 +187,7 @@ for.end: ; preds = %for.body, %entry
|
||||
define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
|
||||
; CHECK-LABEL: @reduction_fadd(
|
||||
; CHECK: vector.body:
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
|
||||
; CHECK: [[TMP44:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]]
|
||||
; CHECK: [[TMP45:%.*]] = fadd fast <4 x float> [[TMP44]], [[TMP43:%.*]]
|
||||
; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]]
|
||||
@ -218,7 +218,7 @@ for.end: ; preds = %for.body, %entry
|
||||
define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
|
||||
; CHECK-LABEL: @reduction_fmul(
|
||||
; CHECK: vector.body:
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ]
|
||||
; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ]
|
||||
; CHECK: [[TMP44:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]]
|
||||
; CHECK: [[TMP45:%.*]] = fmul fast <4 x float> [[TMP44]], [[TMP43:%.*]]
|
||||
; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]]
|
||||
|
@ -14,23 +14,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
||||
; CHECK-NEXT: loop:
|
||||
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
|
||||
; CHECK-NEXT: EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
|
||||
; CHECK-NEXT: Successor(s): pred.load
|
||||
|
||||
; CHECK: <xVFxUF> pred.load: {
|
||||
; CHECK-NEXT: pred.load.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
|
||||
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
|
||||
; CHECK-NEXT: CondBit: vp<%2> (loop)
|
||||
|
||||
; CHECK: pred.load.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
|
||||
; CHECK-NEXT: Successor(s): pred.load.continue
|
||||
|
||||
; CHECK: pred.load.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.b>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.0
|
||||
|
||||
; CHECK: loop.0:
|
||||
; CHECK-NEXT: Successor(s): pred.store
|
||||
@ -42,13 +26,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
||||
; CHECK-NEXT: CondBit: vp<%2> (loop)
|
||||
|
||||
; CHECK: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%add> = add vp<%5>, ir<10>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
|
||||
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%lv.b>, ir<10>
|
||||
; CHECK-NEXT: REPLICATE ir<%mul> = mul ir<2>, ir<%add>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE store ir<%mul>, ir<%gep.a>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
|
||||
; CHECK: pred.store.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%lv.b>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
|
||||
@ -615,61 +602,12 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
|
||||
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
|
||||
; CHECK-NEXT: EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
|
||||
; CHECK-NEXT: Successor(s): pred.load
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.load: {
|
||||
; CHECK-NEXT: pred.load.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
|
||||
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
|
||||
; CHECK-NEXT: CondBit: vp<%2> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.load.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a>
|
||||
; CHECK-NEXT: Successor(s): pred.load.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.load.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.a>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.0
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.0:
|
||||
; CHECK-NEXT: Successor(s): pred.load
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.load: {
|
||||
; CHECK-NEXT: pred.load.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
|
||||
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
|
||||
; CHECK-NEXT: CondBit: vp<%2> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.load.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
|
||||
; CHECK-NEXT: Successor(s): pred.load.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.load.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%lv.b>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.1
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1:
|
||||
; CHECK-NEXT: Successor(s): pred.store
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.store: {
|
||||
; CHECK-NEXT: pred.store.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
|
||||
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
|
||||
; CHECK-NEXT: CondBit: vp<%2> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%gep.c> = getelementptr ir<@c>, ir<0>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE store vp<%5>, ir<%gep.c>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.2
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.2:
|
||||
@ -682,10 +620,17 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
|
||||
; CHECK-NEXT: CondBit: vp<%2> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep.a>
|
||||
; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep.c> = getelementptr ir<@c>, ir<0>, ir<%iv>
|
||||
; CHECK-NEXT: REPLICATE store ir<%lv.a>, ir<%gep.c>
|
||||
; CHECK-NEXT: REPLICATE store ir<%lv.b>, ir<%gep.a>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%lv.a>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.b>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.3
|
||||
@ -695,7 +640,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
|
||||
; CHECK-NEXT: Successor(s): then.0
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: then.0:
|
||||
; CHECK-NEXT: WIDEN ir<%mul> = mul vp<%5>, vp<%8>
|
||||
; CHECK-NEXT: WIDEN ir<%mul> = mul vp<%10>, vp<%11>
|
||||
; CHECK-NEXT: EMIT vp<%14> = select vp<%2> ir<%c.0> ir<false>
|
||||
; CHECK-NEXT: Successor(s): pred.store
|
||||
; CHECK-EMPTY:
|
||||
@ -764,41 +709,9 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) {
|
||||
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
|
||||
; CHECK-NEXT: EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
|
||||
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
|
||||
; CHECK-NEXT: Successor(s): pred.load
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.load: {
|
||||
; CHECK-NEXT: pred.load.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
|
||||
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
|
||||
; CHECK-NEXT: CondBit: vp<%2> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.load.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a>
|
||||
; CHECK-NEXT: Successor(s): pred.load.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.load.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.a>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.0
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.0:
|
||||
; CHECK-NEXT: Successor(s): pred.sdiv
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.sdiv: {
|
||||
; CHECK-NEXT: pred.sdiv.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%2>
|
||||
; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue
|
||||
; CHECK-NEXT: CondBit: vp<%2> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.sdiv.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%div> = sdiv vp<%5>, vp<%5>
|
||||
; CHECK-NEXT: Successor(s): pred.sdiv.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.sdiv.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%div>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.1
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1:
|
||||
@ -811,10 +724,14 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) {
|
||||
; CHECK-NEXT: CondBit: vp<%2> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE store vp<%7>, ir<%gep.a>
|
||||
; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a>
|
||||
; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%lv.a>, ir<%lv.a>
|
||||
; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%lv.a>
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%div>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.2
|
||||
@ -871,22 +788,6 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
|
||||
; CHECK-NEXT: Successor(s): loop.0
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.0:
|
||||
; CHECK-NEXT: Successor(s): pred.sdiv
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: <xVFxUF> pred.sdiv: {
|
||||
; CHECK-NEXT: pred.sdiv.entry:
|
||||
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
|
||||
; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.sdiv.if:
|
||||
; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%for>, vp<%6>
|
||||
; CHECK-NEXT: Successor(s): pred.sdiv.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.sdiv.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%div>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.1
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.1:
|
||||
@ -899,10 +800,19 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
|
||||
; CHECK-NEXT: CondBit: vp<%3> (loop)
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.if:
|
||||
; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep.a>
|
||||
; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%for>, vp<%6>
|
||||
; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a>
|
||||
; CHECK-NEXT: Successor(s): pred.store.continue
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: pred.store.continue:
|
||||
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%div>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Successor(s): loop.2
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: loop.2:
|
||||
; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8>
|
||||
; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
|
||||
; CHECK-NEXT: No successors
|
||||
; CHECK-NEXT: }
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user