1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

[SampleFDO] handle ProfileSampleAccurate when initializing function entry count

ProfileSampleAccurate is used to indicate the profile has exact match to the
code to be optimized.

Previously ProfileSampleAccurate is handled in ProfileSummaryInfo::isColdCallSite
and ProfileSummaryInfo::isColdBlock. A better solution is to initialize function
entry count to 0 when ProfileSampleAccurate is true, so we don't have to handle
ProfileSampleAccurate in multiple places.

Differential Revision: https://reviews.llvm.org/D55660

llvm-svn: 349088
This commit is contained in:
Wei Mi 2018-12-13 21:51:42 +00:00
parent ee2b4d8ed1
commit a323ae1555
6 changed files with 103 additions and 98 deletions

View File

@ -39,11 +39,6 @@ static cl::opt<int> ProfileSummaryCutoffCold(
cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));
static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
"callsite as cold. Otherwise, treat un-sampled callsites as if "
"we have no profile."));
static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
cl::init(15000), cl::ZeroOrMore,
@ -261,14 +256,7 @@ bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BF
bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
BlockFrequencyInfo *BFI) {
auto Count = BFI->getBlockProfileCount(BB);
if (Count)
return isColdCount(*Count);
if (!hasSampleProfile())
return false;
const Function *F = BB->getParent();
return ProfileSampleAccurate ||
(F && F->hasFnAttribute("profile-sample-accurate"));
return Count && isColdCount(*Count);
}
bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
@ -285,11 +273,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
// In SamplePGO, if the caller has been sampled, and there is no profile
// annotated on the callsite, we consider the callsite as cold.
// If there is no profile for the caller, and we know the profile is
// accurate, we consider the callsite as cold.
return (hasSampleProfile() &&
(CS.getCaller()->hasProfileData() || ProfileSampleAccurate ||
CS.getCaller()->hasFnAttribute("profile-sample-accurate")));
return hasSampleProfile() && CS.getCaller()->hasProfileData();
}
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",

View File

@ -123,6 +123,12 @@ static cl::opt<bool> NoWarnSampleUnused(
cl::desc("Use this option to turn off/on warnings about function with "
"samples but without debug information to use those samples. "));
static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@ -1604,10 +1610,18 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
// Initialize the entry count to -1, which will be treated conservatively
// by getEntryCount as the same as unknown (None). If we have samples this
// will be overwritten in emitAnnotations.
F.setEntryCount(ProfileCount(-1, Function::PCT_Real));
// By default the entry count is initialized to -1, which will be treated
// conservatively by getEntryCount as the same as unknown (None). This is
// to avoid newly added code to be treated as cold. If we have samples
// this will be overwritten in emitAnnotations.
// If ProfileSampleAccurate is true or F has profile-sample-accurate
// attribute, initialize the entry count to 0 so callsites or functions
// unsampled will be treated as cold.
uint64_t initialEntryCount =
(ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
? 0
: -1;
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
auto &FAM =

View File

@ -1,19 +1,16 @@
; RUN: opt < %s -codegenprepare -S | FileCheck %s
; RUN: opt < %s -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE
target triple = "x86_64-pc-linux-gnu"
; This tests that hot/cold functions get correct section prefix assigned
; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
; ACCURATE: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
; The entry is hot
define void @hot_func() !prof !15 {
ret void
}
; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
; ACCURATE: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
; The sum of 2 callsites are hot
define void @hot_call_func() !prof !16 {
call void @hot_func(), !prof !17
@ -22,7 +19,6 @@ define void @hot_call_func() !prof !16 {
}
; CHECK-NOT: normal_func{{.*}}!section_prefix
; ACCURATE-NOT: normal_func{{.*}}!section_prefix
; The sum of all callsites are neither hot or cold
define void @normal_func() !prof !16 {
call void @hot_func(), !prof !17
@ -32,36 +28,12 @@ define void @normal_func() !prof !16 {
}
; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; ACCURATE: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; The entry and the callsite are both cold
define void @cold_func() !prof !16 {
call void @hot_func(), !prof !18
ret void
}
; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
; The function not appearing in profile is neither hot nor cold
;
; ACCURATE: foo_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; The function not appearing in profile is cold when -profile-sample-accurate
; is on
define void @foo_not_in_profile() !prof !19 {
call void @hot_func()
ret void
}
; CHECK: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; ACCURATE: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; The function not appearing in profile is cold when the func has
; profile-sample-accurate attribute
define void @bar_not_in_profile() #0 !prof !19 {
call void @hot_func()
ret void
}
attributes #0 = { "profile-sample-accurate" }
; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
!llvm.module.flags = !{!1}
@ -83,4 +55,3 @@ attributes #0 = { "profile-sample-accurate" }
!16 = !{!"function_entry_count", i64 1}
!17 = !{!"branch_weights", i32 80}
!18 = !{!"branch_weights", i32 1}
!19 = !{!"function_entry_count", i64 -1}

View File

@ -1,47 +0,0 @@
; For SamplePGO, if -profile-sample-accurate is specified, cold callsite
; heuristics should be honored if the caller has no profile.
; RUN: opt < %s -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s
define i32 @callee(i32 %x) {
%x1 = add i32 %x, 1
%x2 = add i32 %x1, 1
%x3 = add i32 %x2, 1
call void @extern()
call void @extern()
ret i32 %x3
}
define i32 @caller(i32 %y1) {
; CHECK-LABEL: @caller
; CHECK-NOT: call i32 @callee
%y2 = call i32 @callee(i32 %y1)
ret i32 %y2
}
define i32 @caller_accurate(i32 %y1) #0 {
; CHECK-LABEL: @caller_accurate
; CHECK: call i32 @callee
%y2 = call i32 @callee(i32 %y1)
ret i32 %y2
}
declare void @extern()
attributes #0 = { "profile-sample-accurate" }
!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"SampleProfile"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}

View File

@ -0,0 +1,31 @@
; For SamplePGO, if -profile-sample-accurate is specified, cold callsite
; heuristics should be honored if the caller has no profile.
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s --check-prefix ACCURATE
declare void @extern()
define void @callee() {
call void @extern()
ret void
}
define void @caller(i32 %y1) {
; CHECK-LABEL: @caller
; CHECK-NOT: call void @callee
; ACCURATE-LABEL: @caller
; ACCURATE: call void @callee
call void @callee()
ret void
}
define void @caller_accurate(i32 %y1) #0 {
; CHECK-LABEL: @caller_accurate
; CHECK: call void @callee
; ACCURATE-LABEL: @caller_accurate
; ACCURATE: call void @callee
call void @callee()
ret void
}
attributes #0 = { "profile-sample-accurate" }

View File

@ -0,0 +1,52 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -S | FileCheck %s
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE
target triple = "x86_64-pc-linux-gnu"
; The test checks that function without profile gets unlikely section prefix
; if -profile-sample-accurate is specified or the function has the
; profile-sample-accurate attribute.
declare void @hot_func()
; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
; CHECK: foo_not_in_profile{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
; ACCURATE: foo_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
; The function not appearing in profile is cold when -profile-sample-accurate
; is on.
define void @foo_not_in_profile() {
call void @hot_func()
ret void
}
; CHECK: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
; ACCURATE: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
; The function not appearing in profile is cold when the func has
; profile-sample-accurate attribute.
define void @bar_not_in_profile() #0 {
call void @hot_func()
ret void
}
attributes #0 = { "profile-sample-accurate" }
; CHECK: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"SampleProfile"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}