mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[LV] Test once if vector trip count is zero, instead of twice
Generate a single test to decide if there are enough iterations to jump to the vectorized loop, or else go to the scalar remainder loop. This test compares the Scalar Trip Count: if STC < VF * UF go to the scalar loop. If requiresScalarEpilogue() holds, at-least one iteration must remain scalar; the rest can be used to form vector iterations. So in this case the test checks instead if (STC - 1) < VF * UF by comparing STC <= VF * UF, and going to the scalar loop if so. Otherwise the vector loop is entered for at-least one vector iteration. This test covers the case where incrementing the backedge-taken count will overflow leading to an incorrect trip count of zero. In this (rare) case we will also avoid the vector loop and jump to the scalar loop. This patch simplifies the existing tests and effectively removes the basic-block originally named "min.iters.checked", leaving the single test in block "vector.ph". Original observation and initial patch by Evgeny Stupachenko. Differential Revision: https://reviews.llvm.org/D34150 llvm-svn: 308421
This commit is contained in:
parent
58072411f5
commit
fd3659df66
@ -574,11 +574,9 @@ protected:
|
||||
/// Returns (and creates if needed) the trip count of the widened loop.
|
||||
Value *getOrCreateVectorTripCount(Loop *NewLoop);
|
||||
|
||||
/// Emit a bypass check to see if the trip count would overflow, or we
|
||||
/// wouldn't have enough iterations to execute one vector loop.
|
||||
/// Emit a bypass check to see if the vector trip count is zero, including if
|
||||
/// it overflows.
|
||||
void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass);
|
||||
/// Emit a bypass check to see if the vector trip count is nonzero.
|
||||
void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass);
|
||||
/// Emit a bypass check to see if all of the SCEV assumptions we've
|
||||
/// had to make are correct.
|
||||
void emitSCEVChecks(Loop *L, BasicBlock *Bypass);
|
||||
@ -3289,37 +3287,16 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
|
||||
BasicBlock *BB = L->getLoopPreheader();
|
||||
IRBuilder<> Builder(BB->getTerminator());
|
||||
|
||||
// Generate code to check that the loop's trip count that we computed by
|
||||
// adding one to the backedge-taken count will not overflow.
|
||||
Value *CheckMinIters = Builder.CreateICmpULT(
|
||||
Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
|
||||
// Generate code to check if the loop's trip count is less than VF * UF, or
|
||||
// equal to it in case a scalar epilogue is required; this implies that the
|
||||
// vector trip count is zero. This check also covers the case where adding one
|
||||
// to the backedge-taken count overflowed leading to an incorrect trip count
|
||||
// of zero. In this case we will also jump to the scalar loop.
|
||||
auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
|
||||
: ICmpInst::ICMP_ULT;
|
||||
Value *CheckMinIters = Builder.CreateICmp(
|
||||
P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
|
||||
|
||||
BasicBlock *NewBB =
|
||||
BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked");
|
||||
// Update dominator tree immediately if the generated block is a
|
||||
// LoopBypassBlock because SCEV expansions to generate loop bypass
|
||||
// checks may query it before the current function is finished.
|
||||
DT->addNewBlock(NewBB, BB);
|
||||
if (L->getParentLoop())
|
||||
L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
|
||||
ReplaceInstWithInst(BB->getTerminator(),
|
||||
BranchInst::Create(Bypass, NewBB, CheckMinIters));
|
||||
LoopBypassBlocks.push_back(BB);
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
|
||||
BasicBlock *Bypass) {
|
||||
Value *TC = getOrCreateVectorTripCount(L);
|
||||
BasicBlock *BB = L->getLoopPreheader();
|
||||
IRBuilder<> Builder(BB->getTerminator());
|
||||
|
||||
// Now, compare the new count to zero. If it is zero skip the vector loop and
|
||||
// jump to the scalar loop.
|
||||
Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()),
|
||||
"cmp.zero");
|
||||
|
||||
// Generate code to check that the loop's trip count that we computed by
|
||||
// adding one to the backedge-taken count will not overflow.
|
||||
BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
|
||||
// Update dominator tree immediately if the generated block is a
|
||||
// LoopBypassBlock because SCEV expansions to generate loop bypass
|
||||
@ -3328,7 +3305,7 @@ void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
|
||||
if (L->getParentLoop())
|
||||
L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
|
||||
ReplaceInstWithInst(BB->getTerminator(),
|
||||
BranchInst::Create(Bypass, NewBB, Cmp));
|
||||
BranchInst::Create(Bypass, NewBB, CheckMinIters));
|
||||
LoopBypassBlocks.push_back(BB);
|
||||
}
|
||||
|
||||
@ -3477,14 +3454,13 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() {
|
||||
|
||||
Value *StartIdx = ConstantInt::get(IdxTy, 0);
|
||||
|
||||
// We need to test whether the backedge-taken count is uint##_max. Adding one
|
||||
// to it will cause overflow and an incorrect loop trip count in the vector
|
||||
// body. In case of overflow we want to directly jump to the scalar remainder
|
||||
// loop.
|
||||
emitMinimumIterationCountCheck(Lp, ScalarPH);
|
||||
// Now, compare the new count to zero. If it is zero skip the vector loop and
|
||||
// jump to the scalar loop.
|
||||
emitVectorLoopEnteredCheck(Lp, ScalarPH);
|
||||
// jump to the scalar loop. This check also covers the case where the
|
||||
// backedge-taken count is uint##_max: adding one to it will overflow leading
|
||||
// to an incorrect trip count of zero. In this (rare) case we will also jump
|
||||
// to the scalar loop.
|
||||
emitMinimumIterationCountCheck(Lp, ScalarPH);
|
||||
|
||||
// Generate the code to check any assumptions that we've made for SCEV
|
||||
// expressions.
|
||||
emitSCEVChecks(Lp, ScalarPH);
|
||||
@ -3527,7 +3503,7 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() {
|
||||
// We know what the end value is.
|
||||
EndValue = CountRoundDown;
|
||||
} else {
|
||||
IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
|
||||
IRBuilder<> B(Lp->getLoopPreheader()->getTerminator());
|
||||
Type *StepType = II.getStep()->getType();
|
||||
Instruction::CastOps CastOp =
|
||||
CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
|
||||
@ -4168,7 +4144,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
|
||||
// To do so, we need to generate the 'identity' vector and override
|
||||
// one of the elements with the incoming scalar reduction. We need
|
||||
// to do it in the vector-loop preheader.
|
||||
Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
|
||||
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
|
||||
|
||||
// This is the vector-clone of the value that leaves the loop.
|
||||
Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType();
|
||||
|
@ -86,10 +86,10 @@ for.end: ; preds = %for.end.loopexit, %
|
||||
; AUTO_VEC-NEXT: entry:
|
||||
; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp sgt i64 %n, 1
|
||||
; AUTO_VEC-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 %n, i64 1
|
||||
; AUTO_VEC: br i1 {{.*}}, label %for.body, label %min.iters.checked
|
||||
; AUTO_VEC: min.iters.checked:
|
||||
; AUTO_VEC: br i1 {{.*}}, label %for.body, label %vector.ph
|
||||
; AUTO_VEC: vector.ph:
|
||||
; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792
|
||||
; AUTO_VEC: br i1 {{.*}}, label %for.body, label %vector.body
|
||||
; AUTO_VEC: br label %vector.body
|
||||
; AUTO_VEC: middle.block:
|
||||
; AUTO_VEC: [[TMP11:%.*]] = add nsw i64 [[N_VEC]], -1
|
||||
; AUTO_VEC-NEXT: [[CAST_CMO:%.*]] = sitofp i64 [[TMP11]] to double
|
||||
|
@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
||||
; Make sure we are preserving debug info in the vectorized code.
|
||||
|
||||
; CHECK: for.body.lr.ph
|
||||
; CHECK: cmp.zero = icmp eq i64 {{.*}}, 0, !dbg !{{[0-9]+}}
|
||||
; CHECK: min.iters.check = icmp ult i64 {{.*}}, 2, !dbg !{{[0-9]+}}
|
||||
; CHECK: vector.body
|
||||
; CHECK: index {{.*}}, !dbg ![[LOC:[0-9]+]]
|
||||
; CHECK: getelementptr inbounds i32, i32* %a, {{.*}}, !dbg ![[LOC]]
|
||||
|
@ -22,7 +22,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
; CHECK: middle.block:
|
||||
; CHECK: %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %pre_load, %vector.memcheck ], [ %pre_load, %min.iters.checked ], [ %pre_load, %for.preheader ]
|
||||
; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %pre_load, %vector.memcheck ], [ %pre_load, %for.preheader ]
|
||||
; CHECK: scalar.body:
|
||||
; CHECK: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
|
||||
;
|
||||
@ -79,7 +79,7 @@ for.exit:
|
||||
; CHECK: middle.block:
|
||||
; CHECK: %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %.pre, %min.iters.checked ], [ %.pre, %for.preheader ]
|
||||
; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %.pre, %for.preheader ]
|
||||
; CHECK: scalar.body:
|
||||
; CHECK: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
|
||||
;
|
||||
@ -144,7 +144,7 @@ scalar.body:
|
||||
; CHECK: middle.block:
|
||||
; CHECK: %vector.recur.extract = extractelement <4 x i16> [[L1]], i32 3
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK: %scalar.recur.init = phi i16 [ %vector.recur.extract, %middle.block ], [ %0, %vector.memcheck ], [ %0, %min.iters.checked ], [ %0, %for.preheader ]
|
||||
; CHECK: %scalar.recur.init = phi i16 [ %vector.recur.extract, %middle.block ], [ %0, %vector.memcheck ], [ %0, %for.preheader ]
|
||||
; CHECK: scalar.body:
|
||||
; CHECK: %scalar.recur = phi i16 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
|
||||
;
|
||||
@ -288,7 +288,7 @@ for.cond.cleanup3:
|
||||
|
||||
; UNROLL-NO-IC-LABEL: @PR30183(
|
||||
; UNROLL-NO-IC: vector.ph:
|
||||
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> undef, i32 [[PRE_LOAD:%.*]], i32 3
|
||||
; UNROLL-NO-IC: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> undef, i32 [[PRE_LOAD:%.*]], i32 3
|
||||
; UNROLL-NO-IC-NEXT: br label %vector.body
|
||||
; UNROLL-NO-IC: vector.body:
|
||||
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_loop1(
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
|
||||
; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> undef, <4 x i32> zeroinitializer
|
||||
@ -37,7 +37,7 @@
|
||||
|
||||
; VEC4_INTERL2-LABEL: @fp_iv_loop1(
|
||||
; VEC4_INTERL2: vector.ph:
|
||||
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
|
||||
; VEC4_INTERL2: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
|
||||
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0
|
||||
; VEC4_INTERL2-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT3]], <4 x float> undef, <4 x i32> zeroinitializer
|
||||
@ -63,7 +63,7 @@
|
||||
|
||||
; VEC1_INTERL2-LABEL: @fp_iv_loop1(
|
||||
; VEC1_INTERL2: vector.ph:
|
||||
; VEC1_INTERL2-NEXT: br label %vector.body
|
||||
; VEC1_INTERL2: br label %vector.body
|
||||
; VEC1_INTERL2: vector.body:
|
||||
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
|
||||
@ -115,7 +115,7 @@ for.end: ; preds = %for.end.loopexit, %
|
||||
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_loop2(
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
|
||||
; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: [[INDUCTION2:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
|
||||
; VEC4_INTERL1-NEXT: br label %vector.body
|
||||
@ -172,7 +172,7 @@ for.end: ; preds = %for.end.loopexit, %
|
||||
; VEC4_INTERL1: for.body.lr.ph:
|
||||
; VEC4_INTERL1: [[TMP0:%.*]] = load float, float* @fp_inc, align 4
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
|
||||
; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0
|
||||
; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> undef, <4 x i32> zeroinitializer
|
||||
@ -250,7 +250,7 @@ for.end:
|
||||
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_loop4(
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: br label %vector.body
|
||||
; VEC4_INTERL1: br label %vector.body
|
||||
; VEC4_INTERL1: vector.body:
|
||||
; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
|
||||
@ -289,7 +289,7 @@ for.end: ; preds = %for.end.loopexit, %
|
||||
|
||||
; VEC2_INTERL1_PRED_STORE-LABEL: @non_primary_iv_float_scalar(
|
||||
; VEC2_INTERL1_PRED_STORE: vector.body:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ], [ 0, %min.iters.checked ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = sitofp i64 [[INDEX]] to float
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
|
||||
|
@ -13,24 +13,21 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[MIN_ITERS_CHECKED:%.*]]
|
||||
; CHECK: min.iters.checked:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[N]], 3
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP3]] to i64
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[CMP_ZERO:%.*]] = icmp eq i64 [[N_VEC]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP_ZERO]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
; CHECK: vector.memcheck:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1
|
||||
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
|
||||
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]]
|
||||
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
|
||||
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
|
||||
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[N]], 3
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP6]] to i64
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
@ -55,10 +52,10 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], 0
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[MIN_ITERS_CHECKED]] ], [ 0, [[VECTOR_MEMCHECK]] ]
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||
|
@ -15,7 +15,7 @@
|
||||
; CHECK: for.body.lr.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @int_inc, align 4
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %init, i32 0
|
||||
; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %init, i32 0
|
||||
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP0]], i32 0
|
||||
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
@ -86,7 +86,7 @@ for.end: ; preds = %for.end.loopexit, %
|
||||
|
||||
; CHECK-LABEL: @induction_with_loop_inv(
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %x.011, i32 0
|
||||
; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %x.011, i32 0
|
||||
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> undef, i32 %j.012, i32 0
|
||||
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
|
@ -501,13 +501,13 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
|
||||
; condition and branch directly to the scalar loop.
|
||||
|
||||
; CHECK-LABEL: max_i32_backedgetaken
|
||||
; CHECK: br i1 true, label %scalar.ph, label %min.iters.checked
|
||||
; CHECK: br i1 true, label %scalar.ph, label %vector.ph
|
||||
|
||||
; CHECK: middle.block:
|
||||
; CHECK: %[[v9:.+]] = extractelement <2 x i32> %bin.rdx, i32 0
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %[[v0:.+]] ]
|
||||
; CHECK: %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ 1, %min.iters.checked ], [ %[[v9]], %middle.block ]
|
||||
; CHECK: %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ %[[v9]], %middle.block ]
|
||||
|
||||
define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
|
||||
|
||||
|
@ -9,7 +9,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
;
|
||||
; CHECK-LABEL: @interleaved_with_cond_store_0(
|
||||
;
|
||||
; CHECK: min.iters.checked
|
||||
; CHECK: vector.ph
|
||||
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
|
||||
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
|
||||
; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
|
||||
@ -58,7 +58,7 @@ for.end:
|
||||
;
|
||||
; CHECK-LABEL: @interleaved_with_cond_store_1(
|
||||
;
|
||||
; CHECK: min.iters.checked
|
||||
; CHECK: vector.ph
|
||||
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
|
||||
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
|
||||
; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
|
||||
@ -117,7 +117,7 @@ for.end:
|
||||
;
|
||||
; CHECK-LABEL: @interleaved_with_cond_store_2(
|
||||
;
|
||||
; CHECK: min.iters.checked
|
||||
; CHECK: vector.ph
|
||||
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
|
||||
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
|
||||
; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
|
||||
|
@ -338,7 +338,7 @@ for.body: ; preds = %for.body, %entry
|
||||
; }
|
||||
|
||||
; CHECK-LABEL: @even_load_dynamic_tc(
|
||||
; CHECK: min.iters.checked:
|
||||
; CHECK: vector.ph:
|
||||
; CHECK: %n.mod.vf = and i64 %[[N:[a-zA-Z0-9]+]], 3
|
||||
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
|
||||
; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
|
||||
@ -579,7 +579,7 @@ for.body: ; preds = %for.body, %entry
|
||||
; }
|
||||
|
||||
; CHECK-LABEL: @PR27626_0(
|
||||
; CHECK: min.iters.checked:
|
||||
; CHECK: vector.ph:
|
||||
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
|
||||
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
|
||||
; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
|
||||
@ -627,7 +627,7 @@ for.end:
|
||||
; }
|
||||
|
||||
; CHECK-LABEL: @PR27626_1(
|
||||
; CHECK: min.iters.checked:
|
||||
; CHECK: vector.ph:
|
||||
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
|
||||
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
|
||||
; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
|
||||
@ -680,7 +680,7 @@ for.end:
|
||||
; }
|
||||
|
||||
; CHECK-LABEL: @PR27626_2(
|
||||
; CHECK: min.iters.checked:
|
||||
; CHECK: vector.ph:
|
||||
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
|
||||
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
|
||||
; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
|
||||
@ -728,7 +728,7 @@ for.end:
|
||||
; }
|
||||
|
||||
; CHECK-LABEL: @PR27626_3(
|
||||
; CHECK: min.iters.checked:
|
||||
; CHECK: vector.ph:
|
||||
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
|
||||
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
|
||||
; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
|
||||
|
@ -135,7 +135,7 @@ for.end:
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @PR30742
|
||||
; CHECK: min.iters.checked
|
||||
; CHECK: vector.ph
|
||||
; CHECK: %[[N_MOD_VF:.+]] = urem i32 %[[T5:.+]], 2
|
||||
; CHECK: %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
|
||||
; CHECK: middle.block
|
||||
|
@ -10,10 +10,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
|
||||
; CHECK-LABEL: foo(
|
||||
; CHECK: %min.iters.check = icmp ult i64 %N, 4
|
||||
; CHECK: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
|
||||
; CHECK: br i1 %min.iters.check, label %scalar.ph, label %vector.ph
|
||||
; UNROLL-LABEL: foo(
|
||||
; UNROLL: %min.iters.check = icmp ult i64 %N, 8
|
||||
; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
|
||||
; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %vector.ph
|
||||
|
||||
define void @foo(i64 %N) {
|
||||
entry:
|
||||
|
@ -4,7 +4,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
||||
|
||||
;CHECK-LABEL: @add_ints(
|
||||
;CHECK: br
|
||||
;CHECK: br
|
||||
;CHECK: getelementptr
|
||||
;CHECK-DAG: getelementptr
|
||||
;CHECK-DAG: icmp ugt
|
||||
|
@ -10,7 +10,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
||||
|
||||
;CHECK-LABEL: define i32 @foo
|
||||
;CHECK: for.body.preheader:
|
||||
;CHECK: br i1 %cmp.zero, label %scalar.ph, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
|
||||
;CHECK: br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
|
||||
;CHECK: vector.memcheck:
|
||||
;CHECK: br i1 %memcheck.conflict, label %scalar.ph, label %vector.ph, !dbg [[BODY_LOC]]
|
||||
;CHECK: load <4 x float>
|
||||
|
Loading…
Reference in New Issue
Block a user