mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[NewPM/Inliner] Reduce threshold for cold callsites in the non-PGO case
Differential Revision: https://reviews.llvm.org/D34312 llvm-svn: 306484
This commit is contained in:
parent
1182731c7c
commit
ba7953456e
@ -66,6 +66,12 @@ static cl::opt<int>
|
||||
cl::ZeroOrMore,
|
||||
cl::desc("Threshold for hot callsites "));
|
||||
|
||||
static cl::opt<int> ColdCallSiteRelFreq(
|
||||
"cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
|
||||
cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
|
||||
"entry frequency, for a callsite to be cold in the absence of "
|
||||
"profile information."));
|
||||
|
||||
namespace {
|
||||
|
||||
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
||||
@ -172,6 +178,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
||||
/// Return true if size growth is allowed when inlining the callee at CS.
|
||||
bool allowSizeGrowth(CallSite CS);
|
||||
|
||||
/// Return true if \p CS is a cold callsite.
|
||||
bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
|
||||
|
||||
// Custom analysis routines.
|
||||
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
|
||||
|
||||
@ -631,6 +640,26 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
|
||||
// If global profile summary is available, then callsite's coldness is
|
||||
// determined based on that.
|
||||
if (PSI->hasProfileSummary())
|
||||
return PSI->isColdCallSite(CS, CallerBFI);
|
||||
if (!CallerBFI)
|
||||
return false;
|
||||
|
||||
// In the absence of global profile summary, determine if the callsite is cold
|
||||
// relative to caller's entry. We could potentially cache the computation of
|
||||
// scaled entry frequency, but the added complexity is not worth it unless
|
||||
// this scaling shows up high in the profiles.
|
||||
const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
|
||||
auto CallSiteBB = CS.getInstruction()->getParent();
|
||||
auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
|
||||
auto CallerEntryFreq =
|
||||
CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
|
||||
return CallSiteFreq < CallerEntryFreq * ColdProb;
|
||||
}
|
||||
|
||||
void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
|
||||
// If no size growth is allowed for this inlining, set Threshold to 0.
|
||||
if (!allowSizeGrowth(CS)) {
|
||||
@ -676,7 +705,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
|
||||
if (PSI->isHotCallSite(CS, CallerBFI)) {
|
||||
DEBUG(dbgs() << "Hot callsite.\n");
|
||||
Threshold = Params.HotCallSiteThreshold.getValue();
|
||||
} else if (PSI->isColdCallSite(CS, CallerBFI)) {
|
||||
} else if (isColdCallSite(CS, CallerBFI)) {
|
||||
DEBUG(dbgs() << "Cold callsite.\n");
|
||||
Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
|
||||
}
|
||||
|
54
test/Transforms/Inline/inline-cold-callsite-pgo.ll
Normal file
54
test/Transforms/Inline/inline-cold-callsite-pgo.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s
|
||||
|
||||
; This tests that a cold callsite gets the inline-cold-callsite-threshold
|
||||
; and does not get inlined. Another callsite to an identical callee that
|
||||
; is not cold gets inlined because cost is below the inline-threshold.
|
||||
|
||||
define i32 @callee1(i32 %x) !prof !21 {
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
%x3 = add i32 %x2, 1
|
||||
call void @extern()
|
||||
ret i32 %x3
|
||||
}
|
||||
|
||||
define i32 @caller(i32 %n) !prof !22 {
|
||||
; CHECK-LABEL: @caller(
|
||||
%cond = icmp sle i32 %n, 100
|
||||
br i1 %cond, label %cond_true, label %cond_false, !prof !0
|
||||
|
||||
cond_true:
|
||||
; CHECK-LABEL: cond_true:
|
||||
; CHECK-NOT: call i32 @callee1
|
||||
; CHECK: ret i32 %x3.i
|
||||
%i = call i32 @callee1(i32 %n)
|
||||
ret i32 %i
|
||||
cond_false:
|
||||
; CHECK-LABEL: cond_false:
|
||||
; CHECK: call i32 @callee1
|
||||
; CHECK: ret i32 %j
|
||||
%j = call i32 @callee1(i32 %n)
|
||||
ret i32 %j
|
||||
}
|
||||
declare void @extern()
|
||||
|
||||
!0 = !{!"branch_weights", i32 200, i32 1}
|
||||
|
||||
!llvm.module.flags = !{!1}
|
||||
!21 = !{!"function_entry_count", i64 200}
|
||||
!22 = !{!"function_entry_count", i64 200}
|
||||
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!4 = !{!"TotalCount", i64 10000}
|
||||
!5 = !{!"MaxCount", i64 1000}
|
||||
!6 = !{!"MaxInternalCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 1000}
|
||||
!8 = !{!"NumCounts", i64 3}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12, !13, !14}
|
||||
!12 = !{i32 10000, i64 1000, i32 1}
|
||||
!13 = !{i32 999000, i64 1000, i32 1}
|
||||
!14 = !{i32 999999, i64 1, i32 2}
|
@ -1,54 +1,47 @@
|
||||
|
||||
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s
|
||||
|
||||
; This tests that a cold callsite gets the inline-cold-callsite-threshold
|
||||
; and does not get inlined. Another callsite to an identical callee that
|
||||
; is not cold gets inlined because cost is below the inline-threshold.
|
||||
|
||||
define i32 @callee1(i32 %x) !prof !21 {
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
%x3 = add i32 %x2, 1
|
||||
define void @callee() {
|
||||
call void @extern()
|
||||
ret i32 %x3
|
||||
call void @extern()
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @caller(i32 %n) !prof !22 {
|
||||
; CHECK-LABEL: @caller(
|
||||
%cond = icmp sle i32 %n, 100
|
||||
br i1 %cond, label %cond_true, label %cond_false, !prof !0
|
||||
|
||||
cond_true:
|
||||
; CHECK-LABEL: cond_true:
|
||||
; CHECK-NOT: call i32 @callee1
|
||||
; CHECK: ret i32 %x3.i
|
||||
%i = call i32 @callee1(i32 %n)
|
||||
ret i32 %i
|
||||
cond_false:
|
||||
; CHECK-LABEL: cond_false:
|
||||
; CHECK: call i32 @callee1
|
||||
; CHECK: ret i32 %j
|
||||
%j = call i32 @callee1(i32 %n)
|
||||
ret i32 %j
|
||||
}
|
||||
declare void @extern()
|
||||
declare i1 @ext(i32)
|
||||
|
||||
!0 = !{!"branch_weights", i32 200, i32 1}
|
||||
; CHECK-LABEL: caller
|
||||
define i32 @caller(i32 %n) {
|
||||
entry:
|
||||
%cmp4 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp4, label %for.body, label %for.cond.cleanup
|
||||
|
||||
!llvm.module.flags = !{!1}
|
||||
!21 = !{!"function_entry_count", i64 200}
|
||||
!22 = !{!"function_entry_count", i64 200}
|
||||
for.cond.cleanup:
|
||||
ret i32 0
|
||||
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!4 = !{!"TotalCount", i64 10000}
|
||||
!5 = !{!"MaxCount", i64 1000}
|
||||
!6 = !{!"MaxInternalCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 1000}
|
||||
!8 = !{!"NumCounts", i64 3}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12, !13, !14}
|
||||
!12 = !{i32 10000, i64 1000, i32 1}
|
||||
!13 = !{i32 999000, i64 1000, i32 1}
|
||||
!14 = !{i32 999999, i64 1, i32 2}
|
||||
for.body:
|
||||
%i.05 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
|
||||
; CHECK: %call = tail call
|
||||
%call = tail call zeroext i1 @ext(i32 %i.05)
|
||||
; CHECK-NOT: call void @callee
|
||||
; CHECK-NEXT: call void @extern
|
||||
call void @callee()
|
||||
br i1 %call, label %cold, label %for.inc, !prof !0
|
||||
|
||||
cold:
|
||||
; CHECK: call void @callee
|
||||
call void @callee()
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%inc = add nuw nsw i32 %i.05, 1
|
||||
%exitcond = icmp eq i32 %inc, %n
|
||||
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
|
||||
!0 = !{!"branch_weights", i32 1, i32 2000}
|
||||
|
Loading…
Reference in New Issue
Block a user