1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[NewPM/Inliner] Reduce threshold for cold callsites in the non-PGO case

Differential Revision: https://reviews.llvm.org/D34312

llvm-svn: 306484
This commit is contained in:
Easwaran Raman 2017-06-27 23:11:18 +00:00
parent 1182731c7c
commit ba7953456e
3 changed files with 118 additions and 42 deletions

View File

@ -66,6 +66,12 @@ static cl::opt<int>
cl::ZeroOrMore,
cl::desc("Threshold for hot callsites "));
static cl::opt<int> ColdCallSiteRelFreq(
"cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
"entry frequency, for a callsite to be cold in the absence of "
"profile information."));
namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@ -172,6 +178,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Return true if size growth is allowed when inlining the callee at CS.
bool allowSizeGrowth(CallSite CS);
/// Return true if \p CS is a cold callsite.
bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
@ -631,6 +640,26 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
return true;
}
bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
// If global profile summary is available, then callsite's coldness is
// determined based on that.
if (PSI->hasProfileSummary())
return PSI->isColdCallSite(CS, CallerBFI);
if (!CallerBFI)
return false;
// In the absence of global profile summary, determine if the callsite is cold
// relative to caller's entry. We could potentially cache the computation of
// scaled entry frequency, but the added complexity is not worth it unless
// this scaling shows up high in the profiles.
const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
auto CallSiteBB = CS.getInstruction()->getParent();
auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
auto CallerEntryFreq =
CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
return CallSiteFreq < CallerEntryFreq * ColdProb;
}
void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// If no size growth is allowed for this inlining, set Threshold to 0.
if (!allowSizeGrowth(CS)) {
@ -676,7 +705,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
if (PSI->isHotCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Hot callsite.\n");
Threshold = Params.HotCallSiteThreshold.getValue();
} else if (PSI->isColdCallSite(CS, CallerBFI)) {
} else if (isColdCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Cold callsite.\n");
Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
}

View File

@ -0,0 +1,54 @@
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s
; This tests that a cold callsite gets the inline-cold-callsite-threshold
; and does not get inlined. Another callsite to an identical callee that
; is not cold gets inlined because cost is below the inline-threshold.
define i32 @callee1(i32 %x) !prof !21 {
%x1 = add i32 %x, 1
%x2 = add i32 %x1, 1
%x3 = add i32 %x2, 1
call void @extern()
ret i32 %x3
}
define i32 @caller(i32 %n) !prof !22 {
; CHECK-LABEL: @caller(
%cond = icmp sle i32 %n, 100
br i1 %cond, label %cond_true, label %cond_false, !prof !0
cond_true:
; CHECK-LABEL: cond_true:
; CHECK-NOT: call i32 @callee1
; CHECK: ret i32 %x3.i
%i = call i32 @callee1(i32 %n)
ret i32 %i
cond_false:
; CHECK-LABEL: cond_false:
; CHECK: call i32 @callee1
; CHECK: ret i32 %j
%j = call i32 @callee1(i32 %n)
ret i32 %j
}
declare void @extern()
!0 = !{!"branch_weights", i32 200, i32 1}
!llvm.module.flags = !{!1}
!21 = !{!"function_entry_count", i64 200}
!22 = !{!"function_entry_count", i64 200}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 1000, i32 1}
!13 = !{i32 999000, i64 1000, i32 1}
!14 = !{i32 999999, i64 1, i32 2}

View File

@ -1,54 +1,47 @@
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s
; This tests that a cold callsite gets the inline-cold-callsite-threshold
; and does not get inlined. Another callsite to an identical callee that
; is not cold gets inlined because cost is below the inline-threshold.
define i32 @callee1(i32 %x) !prof !21 {
%x1 = add i32 %x, 1
%x2 = add i32 %x1, 1
%x3 = add i32 %x2, 1
define void @callee() {
call void @extern()
ret i32 %x3
call void @extern()
ret void
}
define i32 @caller(i32 %n) !prof !22 {
; CHECK-LABEL: @caller(
%cond = icmp sle i32 %n, 100
br i1 %cond, label %cond_true, label %cond_false, !prof !0
cond_true:
; CHECK-LABEL: cond_true:
; CHECK-NOT: call i32 @callee1
; CHECK: ret i32 %x3.i
%i = call i32 @callee1(i32 %n)
ret i32 %i
cond_false:
; CHECK-LABEL: cond_false:
; CHECK: call i32 @callee1
; CHECK: ret i32 %j
%j = call i32 @callee1(i32 %n)
ret i32 %j
}
declare void @extern()
declare i1 @ext(i32)
!0 = !{!"branch_weights", i32 200, i32 1}
; CHECK-LABEL: caller
define i32 @caller(i32 %n) {
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.cond.cleanup
!llvm.module.flags = !{!1}
!21 = !{!"function_entry_count", i64 200}
!22 = !{!"function_entry_count", i64 200}
for.cond.cleanup:
ret i32 0
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 1000, i32 1}
!13 = !{i32 999000, i64 1000, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
for.body:
%i.05 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
; CHECK: %call = tail call
%call = tail call zeroext i1 @ext(i32 %i.05)
; CHECK-NOT: call void @callee
; CHECK-NEXT: call void @extern
call void @callee()
br i1 %call, label %cold, label %for.inc, !prof !0
cold:
; CHECK: call void @callee
call void @callee()
br label %for.inc
for.inc:
%inc = add nuw nsw i32 %i.05, 1
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
!0 = !{!"branch_weights", i32 1, i32 2000}