mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Revert "[PGO][PGSO] Add profile guided size optimization to loop vectorization legality."
This reverts commit 30c382a7c6607a7d898730f8d288768110cdf1d2. See https://crbug.com/1106813.
This commit is contained in:
parent
571c882dda
commit
9627b7997d
@ -202,10 +202,9 @@ public:
|
|||||||
Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
|
Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
|
||||||
LoopInfo *LI, OptimizationRemarkEmitter *ORE,
|
LoopInfo *LI, OptimizationRemarkEmitter *ORE,
|
||||||
LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
|
LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
|
||||||
AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
|
AssumptionCache *AC)
|
||||||
: TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
|
: TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
|
||||||
GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC),
|
GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
|
||||||
BFI(BFI), PSI(PSI) {}
|
|
||||||
|
|
||||||
/// ReductionList contains the reduction descriptors for all
|
/// ReductionList contains the reduction descriptors for all
|
||||||
/// of the reductions that were found in the loop.
|
/// of the reductions that were found in the loop.
|
||||||
@ -479,10 +478,6 @@ private:
|
|||||||
/// Assume instructions in predicated blocks must be dropped if the CFG gets
|
/// Assume instructions in predicated blocks must be dropped if the CFG gets
|
||||||
/// flattened.
|
/// flattened.
|
||||||
SmallPtrSet<Instruction *, 8> ConditionalAssumes;
|
SmallPtrSet<Instruction *, 8> ConditionalAssumes;
|
||||||
|
|
||||||
/// BFI and PSI are used to check for profile guided size optimizations.
|
|
||||||
BlockFrequencyInfo *BFI;
|
|
||||||
ProfileSummaryInfo *PSI;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace llvm
|
} // namespace llvm
|
||||||
|
@ -20,7 +20,6 @@
|
|||||||
#include "llvm/Analysis/VectorUtils.h"
|
#include "llvm/Analysis/VectorUtils.h"
|
||||||
#include "llvm/IR/IntrinsicInst.h"
|
#include "llvm/IR/IntrinsicInst.h"
|
||||||
#include "llvm/IR/PatternMatch.h"
|
#include "llvm/IR/PatternMatch.h"
|
||||||
#include "llvm/Transforms/Utils/SizeOpts.h"
|
|
||||||
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
|
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
@ -413,11 +412,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
|
|||||||
const ValueToValueMap &Strides =
|
const ValueToValueMap &Strides =
|
||||||
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
|
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
|
||||||
|
|
||||||
Function *F = TheLoop->getHeader()->getParent();
|
bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
|
||||||
bool OptForSize = F->hasOptSize() ||
|
|
||||||
llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
|
|
||||||
PGSOQueryType::IRPass);
|
|
||||||
bool CanAddPredicate = !OptForSize;
|
|
||||||
int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
|
int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
|
||||||
if (Stride == 1 || Stride == -1)
|
if (Stride == 1 || Stride == -1)
|
||||||
return Stride;
|
return Stride;
|
||||||
|
@ -395,13 +395,11 @@ public:
|
|||||||
const TargetTransformInfo *TTI, AssumptionCache *AC,
|
const TargetTransformInfo *TTI, AssumptionCache *AC,
|
||||||
OptimizationRemarkEmitter *ORE, unsigned VecWidth,
|
OptimizationRemarkEmitter *ORE, unsigned VecWidth,
|
||||||
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
|
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
|
||||||
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
|
LoopVectorizationCostModel *CM)
|
||||||
ProfileSummaryInfo *PSI)
|
|
||||||
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
|
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
|
||||||
AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
|
AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
|
||||||
Builder(PSE.getSE()->getContext()),
|
Builder(PSE.getSE()->getContext()),
|
||||||
VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM),
|
VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM) {}
|
||||||
BFI(BFI), PSI(PSI) {}
|
|
||||||
virtual ~InnerLoopVectorizer() = default;
|
virtual ~InnerLoopVectorizer() = default;
|
||||||
|
|
||||||
/// Create a new empty loop. Unlink the old loop and connect the new one.
|
/// Create a new empty loop. Unlink the old loop and connect the new one.
|
||||||
@ -781,10 +779,6 @@ protected:
|
|||||||
// Vector of original scalar PHIs whose corresponding widened PHIs need to be
|
// Vector of original scalar PHIs whose corresponding widened PHIs need to be
|
||||||
// fixed up at the end of vector code generation.
|
// fixed up at the end of vector code generation.
|
||||||
SmallVector<PHINode *, 8> OrigPHIsToFix;
|
SmallVector<PHINode *, 8> OrigPHIsToFix;
|
||||||
|
|
||||||
/// BFI and PSI are used to check for profile guided size optimizations.
|
|
||||||
BlockFrequencyInfo *BFI;
|
|
||||||
ProfileSummaryInfo *PSI;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class InnerLoopUnroller : public InnerLoopVectorizer {
|
class InnerLoopUnroller : public InnerLoopVectorizer {
|
||||||
@ -795,10 +789,9 @@ public:
|
|||||||
const TargetTransformInfo *TTI, AssumptionCache *AC,
|
const TargetTransformInfo *TTI, AssumptionCache *AC,
|
||||||
OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
|
OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
|
||||||
LoopVectorizationLegality *LVL,
|
LoopVectorizationLegality *LVL,
|
||||||
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
|
LoopVectorizationCostModel *CM)
|
||||||
ProfileSummaryInfo *PSI)
|
|
||||||
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1,
|
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1,
|
||||||
UnrollFactor, LVL, CM, BFI, PSI) {}
|
UnrollFactor, LVL, CM) {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Value *getBroadcastInstrs(Value *V) override;
|
Value *getBroadcastInstrs(Value *V) override;
|
||||||
@ -2761,9 +2754,7 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
|
|||||||
if (C->isZero())
|
if (C->isZero())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
|
assert(!SCEVCheckBlock->getParent()->hasOptSize() &&
|
||||||
llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
|
|
||||||
PGSOQueryType::IRPass)) &&
|
|
||||||
"Cannot SCEV check stride or overflow when optimizing for size");
|
"Cannot SCEV check stride or overflow when optimizing for size");
|
||||||
|
|
||||||
SCEVCheckBlock->setName("vector.scevcheck");
|
SCEVCheckBlock->setName("vector.scevcheck");
|
||||||
@ -2809,9 +2800,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
|
|||||||
assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
|
assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
|
||||||
"claimed checks are required");
|
"claimed checks are required");
|
||||||
|
|
||||||
if (MemCheckBlock->getParent()->hasOptSize() ||
|
if (MemCheckBlock->getParent()->hasOptSize()) {
|
||||||
llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
|
|
||||||
PGSOQueryType::IRPass)) {
|
|
||||||
assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
|
assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
|
||||||
"Cannot emit memory checks when optimizing for size, unless forced "
|
"Cannot emit memory checks when optimizing for size, unless forced "
|
||||||
"to vectorize.");
|
"to vectorize.");
|
||||||
@ -7740,7 +7729,7 @@ static bool processLoopInVPlanNativePath(
|
|||||||
LVP.setBestPlan(VF.Width, 1);
|
LVP.setBestPlan(VF.Width, 1);
|
||||||
|
|
||||||
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
|
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
|
||||||
&CM, BFI, PSI);
|
&CM);
|
||||||
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
|
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
|
||||||
<< L->getHeader()->getParent()->getName() << "\"\n");
|
<< L->getHeader()->getParent()->getName() << "\"\n");
|
||||||
LVP.executePlan(LB, DT);
|
LVP.executePlan(LB, DT);
|
||||||
@ -7804,7 +7793,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
|||||||
// Check if it is legal to vectorize the loop.
|
// Check if it is legal to vectorize the loop.
|
||||||
LoopVectorizationRequirements Requirements(*ORE);
|
LoopVectorizationRequirements Requirements(*ORE);
|
||||||
LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE,
|
LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE,
|
||||||
&Requirements, &Hints, DB, AC, BFI, PSI);
|
&Requirements, &Hints, DB, AC);
|
||||||
if (!LVL.canVectorize(EnableVPlanNativePath)) {
|
if (!LVL.canVectorize(EnableVPlanNativePath)) {
|
||||||
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
|
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
|
||||||
Hints.emitRemarkWithHints();
|
Hints.emitRemarkWithHints();
|
||||||
@ -8004,8 +7993,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
|||||||
assert(IC > 1 && "interleave count should not be 1 or 0");
|
assert(IC > 1 && "interleave count should not be 1 or 0");
|
||||||
// If we decided that it is not legal to vectorize the loop, then
|
// If we decided that it is not legal to vectorize the loop, then
|
||||||
// interleave it.
|
// interleave it.
|
||||||
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM,
|
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
|
||||||
BFI, PSI);
|
&CM);
|
||||||
LVP.executePlan(Unroller, DT);
|
LVP.executePlan(Unroller, DT);
|
||||||
|
|
||||||
ORE->emit([&]() {
|
ORE->emit([&]() {
|
||||||
@ -8017,7 +8006,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
|||||||
} else {
|
} else {
|
||||||
// If we decided that it is *legal* to vectorize the loop, then do it.
|
// If we decided that it is *legal* to vectorize the loop, then do it.
|
||||||
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
|
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
|
||||||
&LVL, &CM, BFI, PSI);
|
&LVL, &CM);
|
||||||
LVP.executePlan(LB, DT);
|
LVP.executePlan(LB, DT);
|
||||||
++LoopsVectorized;
|
++LoopsVectorized;
|
||||||
|
|
||||||
|
@ -121,38 +121,6 @@ for.body29:
|
|||||||
br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
|
br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @pr43371_pgso() !prof !14 {
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: @pr43371_pgso
|
|
||||||
; CHECK-NOT: vector.scevcheck
|
|
||||||
;
|
|
||||||
; We do not want to generate SCEV predicates when optimising for size, because
|
|
||||||
; that will lead to extra code generation such as the SCEV overflow runtime
|
|
||||||
; checks. Not generating SCEV predicates can still result in vectorisation as
|
|
||||||
; the non-consecutive loads/stores can be scalarized:
|
|
||||||
;
|
|
||||||
; CHECK: vector.body:
|
|
||||||
; CHECK: store i16 0, i16* %{{.*}}, align 1
|
|
||||||
; CHECK: store i16 0, i16* %{{.*}}, align 1
|
|
||||||
; CHECK: br i1 {{.*}}, label %vector.body
|
|
||||||
;
|
|
||||||
entry:
|
|
||||||
br label %for.body29
|
|
||||||
|
|
||||||
for.cond.cleanup28:
|
|
||||||
unreachable
|
|
||||||
|
|
||||||
for.body29:
|
|
||||||
%i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
|
|
||||||
%add33 = add i16 undef, %i24.0170
|
|
||||||
%idxprom34 = zext i16 %add33 to i32
|
|
||||||
%arrayidx35 = getelementptr [2592 x i16], [2592 x i16] * @cm_array, i32 0, i32 %idxprom34
|
|
||||||
store i16 0, i16 * %arrayidx35, align 1
|
|
||||||
%inc37 = add i16 %i24.0170, 1
|
|
||||||
%cmp26 = icmp ult i16 %inc37, 756
|
|
||||||
br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
|
|
||||||
}
|
|
||||||
|
|
||||||
; PR45526: don't vectorize with fold-tail if first-order-recurrence is live-out.
|
; PR45526: don't vectorize with fold-tail if first-order-recurrence is live-out.
|
||||||
;
|
;
|
||||||
define i32 @pr45526() optsize {
|
define i32 @pr45526() optsize {
|
||||||
@ -186,37 +154,6 @@ exit:
|
|||||||
ret i32 %for
|
ret i32 %for
|
||||||
}
|
}
|
||||||
|
|
||||||
define i32 @pr45526_pgso() !prof !14 {
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: @pr45526_pgso
|
|
||||||
; CHECK-NEXT: entry:
|
|
||||||
; CHECK-NEXT: br label %loop
|
|
||||||
; CHECK-EMPTY:
|
|
||||||
; CHECK-NEXT: loop:
|
|
||||||
; CHECK-NEXT: %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
|
|
||||||
; CHECK-NEXT: %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
|
|
||||||
; CHECK-NEXT: %pivPlus1 = add nuw nsw i32 %piv, 1
|
|
||||||
; CHECK-NEXT: %cond = icmp ult i32 %piv, 510
|
|
||||||
; CHECK-NEXT: br i1 %cond, label %loop, label %exit
|
|
||||||
; CHECK-EMPTY:
|
|
||||||
; CHECK-NEXT: exit:
|
|
||||||
; CHECK-NEXT: %for.lcssa = phi i32 [ %for, %loop ]
|
|
||||||
; CHECK-NEXT: ret i32 %for.lcssa
|
|
||||||
;
|
|
||||||
entry:
|
|
||||||
br label %loop
|
|
||||||
|
|
||||||
loop:
|
|
||||||
%piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
|
|
||||||
%for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
|
|
||||||
%pivPlus1 = add nuw nsw i32 %piv, 1
|
|
||||||
%cond = icmp ult i32 %piv, 510
|
|
||||||
br i1 %cond, label %loop, label %exit
|
|
||||||
|
|
||||||
exit:
|
|
||||||
ret i32 %for
|
|
||||||
}
|
|
||||||
|
|
||||||
; PR46228: Vectorize w/o versioning for unit stride under optsize and enabled
|
; PR46228: Vectorize w/o versioning for unit stride under optsize and enabled
|
||||||
; vectorization.
|
; vectorization.
|
||||||
|
|
||||||
@ -253,7 +190,7 @@ define void @stride1(i16* noalias %B, i32 %BStride) optsize {
|
|||||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
|
||||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
|
||||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
|
||||||
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !21
|
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !19
|
||||||
; CHECK: middle.block:
|
; CHECK: middle.block:
|
||||||
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
||||||
; CHECK: scalar.ph:
|
; CHECK: scalar.ph:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user