1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[SLP] Added more missed optimization remarks

Summary:
Added more remarks to SLP pass, in particular "missed" optimization remarks.
Also proposed several tests for new functionality.

Patch by Vladimir Miloserdov!

For reference you may look at: https://reviews.llvm.org/rL302811

Reviewers: anemet, fhahn

Reviewed By: anemet

Subscribers: javed.absar, lattner, petecoup, yakush, llvm-commits

Differential Revision: https://reviews.llvm.org/D38367

llvm-svn: 318307
This commit is contained in:
Adam Nemet 2017-11-15 17:04:53 +00:00
parent 71ec02692c
commit 01d159b6a1
7 changed files with 303 additions and 21 deletions

View File

@ -4452,19 +4452,51 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
unsigned Sz = R.getVectorElementSize(I0);
unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
if (MaxVF < 2)
return false;
if (MaxVF < 2) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(
SV_NAME, "SmallVF", I0)
<< "Cannot SLP vectorize list: vectorization factor "
<< "less than 2 is not supported";
});
return false;
}
for (Value *V : VL) {
Type *Ty = V->getType();
if (!isValidElementType(Ty))
if (!isValidElementType(Ty)) {
// NOTE: the following will give user internal llvm type name, which may not be useful
R.getORE()->emit([&]() {
std::string type_str;
llvm::raw_string_ostream rso(type_str);
Ty->print(rso);
return OptimizationRemarkMissed(
SV_NAME, "UnsupportedType", I0)
<< "Cannot SLP vectorize list: type "
<< rso.str() + " is unsupported by vectorizer";
});
return false;
}
Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst || Inst->getOpcode() != Opcode0)
if (!Inst)
return false;
if (Inst->getOpcode() != Opcode0) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(
SV_NAME, "InequableTypes", I0)
<< "Cannot SLP vectorize list: not all of the "
<< "parts of scalar instructions are of the same type: "
<< ore::NV("Instruction1Opcode", I0) << " and "
<< ore::NV("Instruction2Opcode", Inst);
});
return false;
}
}
bool Changed = false;
bool CandidateFound = false;
int MinCost = SLPCostThreshold;
// Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
@ -4518,14 +4550,16 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
R.computeMinimumValueSizes();
int Cost = R.getTreeCost();
CandidateFound = true;
MinCost = std::min(MinCost, Cost);
if (Cost < -SLPCostThreshold) {
DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList",
cast<Instruction>(Ops[0]))
<< "SLP vectorized with cost " << ore::NV("Cost", Cost)
<< " and with tree size "
<< ore::NV("TreeSize", R.getTreeSize()));
cast<Instruction>(Ops[0]))
<< "SLP vectorized with cost " << ore::NV("Cost", Cost)
<< " and with tree size "
<< ore::NV("TreeSize", R.getTreeSize()));
Value *VectorizedRoot = R.vectorizeTree();
@ -4560,6 +4594,22 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
}
}
if (!Changed && CandidateFound) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(
SV_NAME, "NotBeneficial", I0)
<< "List vectorization was possible but not beneficial with cost "
<< ore::NV("Cost", MinCost) << " >= "
<< ore::NV("Treshold", -SLPCostThreshold);
});
} else if (!Changed) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(
SV_NAME, "NotPossible", I0)
<< "Cannot SLP vectorize list: vectorization was impossible"
<< " with available vectorization factors";
});
}
return Changed;
}
@ -5268,17 +5318,27 @@ public:
// Estimate cost.
int Cost =
V.getTreeCost() + getReductionCost(TTI, ReducedVals[i], ReduxWidth);
if (Cost >= -SLPCostThreshold)
break;
if (Cost >= -SLPCostThreshold) {
V.getORE()->emit([&]() {
return OptimizationRemarkMissed(
SV_NAME, "HorSLPNotBeneficial", cast<Instruction>(VL[0]))
<< "Vectorizing horizontal reduction is possible"
<< "but not beneficial with cost "
<< ore::NV("Cost", Cost) << " and threshold "
<< ore::NV("Threshold", -SLPCostThreshold);
});
break;
}
DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
<< ". (HorRdx)\n");
auto *I0 = cast<Instruction>(VL[0]);
V.getORE()->emit(
OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction", I0)
V.getORE()->emit([&]() {
return OptimizationRemark(
SV_NAME, "VectorizedHorizontalReduction", cast<Instruction>(VL[0]))
<< "Vectorized horizontal reduction with cost "
<< ore::NV("Cost", Cost) << " and with tree size "
<< ore::NV("TreeSize", V.getTreeSize()));
<< ore::NV("TreeSize", V.getTreeSize());
});
// Vectorize a tree.
DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();

View File

@ -28,7 +28,8 @@ target triple = "aarch64--linux-gnu"
; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> [[A]]
; CHECK: sext i32 [[X]] to i64
; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedList
; YAML-NEXT: Function: getelementptr_4x32
; YAML-NEXT: Args:
@ -37,7 +38,8 @@ target triple = "aarch64--linux-gnu"
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '5'
; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedList
; YAML-NEXT: Function: getelementptr_4x32
; YAML-NEXT: Args:
@ -92,7 +94,8 @@ for.body:
; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[A]]
; CHECK: sext i32 [[X]] to i64
; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedList
; YAML-NEXT: Function: getelementptr_2x32
; YAML-NEXT: Args:
@ -101,7 +104,8 @@ for.body:
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '5'
; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedList
; YAML-NEXT: Function: getelementptr_2x32
; YAML-NEXT: Args:

View File

@ -15,7 +15,8 @@ target triple = "aarch64--linux"
; CHECK: load <4 x i32>
; CHECK: select <4 x i1>
; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedHorizontalReduction
; YAML-NEXT: Function: test_select
; YAML-NEXT: Args:
@ -108,7 +109,8 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
; CHECK: load <4 x i32>
; CHECK: mul nsw <4 x i32>
; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedHorizontalReduction
; YAML-NEXT: Function: reduction_with_br
; YAML-NEXT: Args:
@ -175,7 +177,8 @@ for.end: ; preds = %for.end.loopexit, %
; CHECK: load <8 x i8>
; CHECK: select <8 x i1>
; YAML: Pass: slp-vectorizer
; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedHorizontalReduction
; YAML-NEXT: Function: test_unrolled_select
; YAML-NEXT: Args:

View File

@ -0,0 +1,78 @@
; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
define i32 @foo(i32* %diff) #0 {
entry:
%m2 = alloca [8 x [8 x i32]], align 16
%0 = bitcast [8 x [8 x i32]]* %m2 to i8*
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%a.088 = phi i32 [ 0, %entry ], [ %add52, %for.body ]
%1 = shl i64 %indvars.iv, 3
%arrayidx = getelementptr inbounds i32, i32* %diff, i64 %1
%2 = load i32, i32* %arrayidx, align 4
%3 = or i64 %1, 4
%arrayidx2 = getelementptr inbounds i32, i32* %diff, i64 %3
%4 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %4, %2
%arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
store i32 %add3, i32* %arrayidx6, align 16
%add10 = add nsw i32 %add3, %a.088
%5 = or i64 %1, 1
%arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5
%6 = load i32, i32* %arrayidx13, align 4
%7 = or i64 %1, 5
%arrayidx16 = getelementptr inbounds i32, i32* %diff, i64 %7
%8 = load i32, i32* %arrayidx16, align 4
%add17 = add nsw i32 %8, %6
%arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
store i32 %add17, i32* %arrayidx20, align 4
%add24 = add nsw i32 %add10, %add17
%9 = or i64 %1, 2
%arrayidx27 = getelementptr inbounds i32, i32* %diff, i64 %9
%10 = load i32, i32* %arrayidx27, align 4
%11 = or i64 %1, 6
%arrayidx30 = getelementptr inbounds i32, i32* %diff, i64 %11
%12 = load i32, i32* %arrayidx30, align 4
%add31 = add nsw i32 %12, %10
%arrayidx34 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2
store i32 %add31, i32* %arrayidx34, align 8
%add38 = add nsw i32 %add24, %add31
%13 = or i64 %1, 3
%arrayidx41 = getelementptr inbounds i32, i32* %diff, i64 %13
%14 = load i32, i32* %arrayidx41, align 4
%15 = or i64 %1, 7
%arrayidx44 = getelementptr inbounds i32, i32* %diff, i64 %15
%16 = load i32, i32* %arrayidx44, align 4
%add45 = add nsw i32 %16, %14
%arrayidx48 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3
store i32 %add45, i32* %arrayidx48, align 4
%add52 = add nsw i32 %add38, %add45
; CHECK: add nsw <{{[0-9]+}} x i32>
; CHECK-NOT: add nsw <{{[0-9]+}} x i32>
; YAML: --- !Missed
; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: HorSLPNotBeneficial
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: Vectorizing horizontal reduction is possible
; YAML-NEXT: - String: 'but not beneficial with cost '
; YAML-NEXT: - Cost: '1'
; YAML-NEXT: - String: ' and threshold '
; YAML-NEXT: - Threshold: '0'
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 8
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret i32 %add52
}

View File

@ -0,0 +1,43 @@
; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 {
br label %1
%idx.04 = phi i32 [ 0, %0 ], [ %8, %1 ]
%po.03 = phi i32* [ %pout, %0 ], [ %7, %1 ]
%ptmpi2.02 = phi i32* [ %pin2, %0 ], [ %4, %1 ]
%ptmpi1.01 = phi i32* [ %pin1, %0 ], [ %2, %1 ]
%2 = getelementptr inbounds i32, i32* %ptmpi1.01, i64 1
%3 = load i32, i32* %ptmpi1.01, align 4, !tbaa !1
%4 = getelementptr inbounds i32, i32* %ptmpi2.02, i64 1
%5 = load i32, i32* %ptmpi2.02, align 4, !tbaa !1
%6 = sub nsw i32 %3, %5
%7 = getelementptr inbounds i32, i32* %po.03, i64 1
; CHECK-NOT: <{{[0-9]+}} x i32>
; YAML: Pass: slp-vectorizer
; YAML-NEXT: Name: NotBeneficial
; YAML-NEXT: Function: vsub2_test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'List vectorization was possible but not beneficial with cost '
; YAML-NEXT: - Cost: '0'
; YAML-NEXT: - String: ' >= '
; YAML-NEXT: - Treshold: '0'
store i32 %6, i32* %po.03, align 4, !tbaa !1
%8 = add nuw nsw i32 %idx.04, 1
%exitcond = icmp eq i32 %8, 64
br i1 %exitcond, label %9, label %1, !llvm.loop !5
ret void
}
!llvm.ident = !{!0}
!0 = !{!"clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = distinct !{!5, !6, !7}
!6 = !{!"llvm.loop.vectorize.width", i32 1}
!7 = !{!"llvm.loop.interleave.count", i32 1}

View File

@ -0,0 +1,60 @@
; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
define i32 @foo(i32* nocapture readonly %diff) #0 {
entry:
%m2 = alloca [8 x [8 x i32]], align 16
%0 = bitcast [8 x [8 x i32]]* %m2 to i8*
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%a.088 = phi i32 [ 0, %entry ], [ %add24, %for.body ]
%1 = shl i64 %indvars.iv, 3
%arrayidx = getelementptr inbounds i32, i32* %diff, i64 %1
%2 = load i32, i32* %arrayidx, align 4
%3 = or i64 %1, 4
%arrayidx2 = getelementptr inbounds i32, i32* %diff, i64 %3
%4 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %4, %2
%arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
store i32 %add3, i32* %arrayidx6, align 16
%add10 = add nsw i32 %add3, %a.088
%5 = or i64 %1, 1
%arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5
%6 = load i32, i32* %arrayidx13, align 4
%7 = or i64 %1, 5
%arrayidx16 = getelementptr inbounds i32, i32* %diff, i64 %7
%8 = load i32, i32* %arrayidx16, align 4
%add17 = add nsw i32 %8, %6
%arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
store i32 %add17, i32* %arrayidx20, align 4
%add24 = add nsw i32 %add10, %add17
; CHECK-NOT: add nsw <{{[0-9]+}} x i32>
; YAML: Pass: slp-vectorizer
; YAML-NEXT: Name: InequableTypes
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Cannot SLP vectorize list: not all of the '
; YAML-NEXT: - String: 'parts of scalar instructions are of the same type: '
; YAML-NEXT: - Instruction1Opcode: add
; YAML-NEXT: - String: ' and '
; YAML-NEXT: - Instruction2Opcode: phi
; YAML: Pass: slp-vectorizer
; YAML-NEXT: Name: NotPossible
; YAML-NEXT: Function: foo
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Cannot SLP vectorize list: vectorization was impossible'
; YAML-NEXT: - String: ' with available vectorization factors'
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 8
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
%arraydecay = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 0
ret i32 %add24
}

View File

@ -0,0 +1,34 @@
; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
; This type is not supported by SLP
define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
entry:
%i1.0 = load x86_fp80, x86_fp80* %i1, align 16
%i1.gep1 = getelementptr x86_fp80, x86_fp80* %i1, i64 1
%i1.1 = load x86_fp80, x86_fp80* %i1.gep1, align 16
br i1 undef, label %then, label %end
then:
%i2.gep0 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 0
%i2.0 = load x86_fp80, x86_fp80* %i2.gep0, align 16
%i2.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 1
%i2.1 = load x86_fp80, x86_fp80* %i2.gep1, align 16
br label %end
end:
%phi0 = phi x86_fp80 [ %i1.0, %entry ], [ %i2.0, %then ]
%phi1 = phi x86_fp80 [ %i1.1, %entry ], [ %i2.1, %then ]
store x86_fp80 %phi0, x86_fp80* %o, align 16
%o.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %o, i64 1
store x86_fp80 %phi1, x86_fp80* %o.gep1, align 16
; CHECK-NOT: <{{[0-9]+}} x x86_fp80>
; YAML: Pass: slp-vectorizer
; YAML-NEXT: Name: UnsupportedType
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Cannot SLP vectorize list: type '
; YAML-NEXT: - String: x86_fp80 is unsupported by vectorizer
ret void
}