mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[LV] Fold-tail flag
This is the compiler-flag equivalent of the Predicate pragma (https://reviews.llvm.org/D65197), to direct the vectorizer to fold the remainder-loop into the main-loop using predication. Differential Revision: https://reviews.llvm.org/D66108 Reviewers: Ayal, hsaito, fhahn, SjoerdMeije llvm-svn: 368801
This commit is contained in:
parent
c2f2511312
commit
26369ba67b
@ -177,6 +177,14 @@ static cl::opt<unsigned> TinyTripCountVectorThreshold(
|
||||
"value are vectorized only if no scalar iteration overheads "
|
||||
"are incurred."));
|
||||
|
||||
// Indicates that an epilogue is undesired, predication is preferred.
|
||||
// This means that the vectorizer will try to fold the loop-tail (epilogue)
|
||||
// into the loop and predicate the loop body accordingly.
|
||||
static cl::opt<bool> PreferPredicateOverEpilog(
|
||||
"prefer-predicate-over-epilog", cl::init(false), cl::Hidden,
|
||||
cl::desc("Indicate that an epilogue is undesired, predication should be "
|
||||
"used instead."));
|
||||
|
||||
static cl::opt<bool> MaximizeBandwidth(
|
||||
"vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
|
||||
cl::desc("Maximize bandwidth when selecting vectorization factor which "
|
||||
@ -906,7 +914,7 @@ enum ScalarEpilogueLowering {
|
||||
CM_ScalarEpilogueNotAllowedLowTripLoop,
|
||||
|
||||
// Loop hint predicate indicating an epilogue is undesired.
|
||||
CM_ScalarEpilogueNotNeededPredicatePragma
|
||||
CM_ScalarEpilogueNotNeededUsePredicate
|
||||
};
|
||||
|
||||
/// LoopVectorizationCostModel - estimates the expected speedups due to
|
||||
@ -4804,9 +4812,9 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF() {
|
||||
switch (ScalarEpilogueStatus) {
|
||||
case CM_ScalarEpilogueAllowed:
|
||||
return computeFeasibleMaxVF(TC);
|
||||
case CM_ScalarEpilogueNotNeededPredicatePragma:
|
||||
case CM_ScalarEpilogueNotNeededUsePredicate:
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "LV: vector predicate hint found.\n"
|
||||
dbgs() << "LV: vector predicate hint/switch found.\n"
|
||||
<< "LV: Not allowing scalar epilogue, creating predicated "
|
||||
<< "vector loop.\n");
|
||||
break;
|
||||
@ -7298,8 +7306,8 @@ getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints,
|
||||
(F->hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)))
|
||||
SEL = CM_ScalarEpilogueNotAllowedOptSize;
|
||||
else if (Hints.getPredicate())
|
||||
SEL = CM_ScalarEpilogueNotNeededPredicatePragma;
|
||||
else if (PreferPredicateOverEpilog || Hints.getPredicate())
|
||||
SEL = CM_ScalarEpilogueNotNeededUsePredicate;
|
||||
|
||||
return SEL;
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilog -S | FileCheck -check-prefix=PREDFLAG %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
@ -13,7 +14,15 @@ define dso_local void @tail_folding_enabled(i32* noalias nocapture %A, i32* noal
|
||||
; CHECK: %index.next = add i64 %index, 8
|
||||
; CHECK: %12 = icmp eq i64 %index.next, 432
|
||||
; CHECK: br i1 %12, label %middle.block, label %vector.body, !llvm.loop !0
|
||||
|
||||
; PREDFLAG-LABEL: tail_folding_enabled(
|
||||
; PREDFLAG: vector.body:
|
||||
; PREDFLAG: %wide.masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(
|
||||
; PREDFLAG: %wide.masked.load1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(
|
||||
; PREDFLAG: %8 = add nsw <8 x i32> %wide.masked.load1, %wide.masked.load
|
||||
; PREDFLAG: call void @llvm.masked.store.v8i32.p0v8i32(
|
||||
; PREDFLAG: %index.next = add i64 %index, 8
|
||||
; PREDFLAG: %12 = icmp eq i64 %index.next, 432
|
||||
; PREDFLAG: br i1 %12, label %middle.block, label %vector.body, !llvm.loop !0
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@ -40,6 +49,15 @@ define dso_local void @tail_folding_disabled(i32* noalias nocapture %A, i32* noa
|
||||
; CHECK-NOT: @llvm.masked.load.v8i32.p0v8i32(
|
||||
; CHECK-NOT: @llvm.masked.store.v8i32.p0v8i32(
|
||||
; CHECK: br i1 %44, label {{.*}}, label %vector.body
|
||||
; PREDFLAG-LABEL: tail_folding_disabled(
|
||||
; PREDFLAG: vector.body:
|
||||
; PREDFLAG: %wide.masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(
|
||||
; PREDFLAG: %wide.masked.load1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(
|
||||
; PREDFLAG: %8 = add nsw <8 x i32> %wide.masked.load1, %wide.masked.load
|
||||
; PREDFLAG: call void @llvm.masked.store.v8i32.p0v8i32(
|
||||
; PREDFLAG: %index.next = add i64 %index, 8
|
||||
; PREDFLAG: %12 = icmp eq i64 %index.next, 432
|
||||
; PREDFLAG: br i1 %12, label %middle.block, label %vector.body, !llvm.loop !4
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user