mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[SampleFDO] handle ProfileSampleAccurate when initializing function entry count
ProfileSampleAccurate is used to indicate the profile has exact match to the code to be optimized. Previously ProfileSampleAccurate is handled in ProfileSummaryInfo::isColdCallSite and ProfileSummaryInfo::isColdBlock. A better solution is to initialize function entry count to 0 when ProfileSampleAccurate is true, so we don't have to handle ProfileSampleAccurate in multiple places. Differential Revision: https://reviews.llvm.org/D55660 llvm-svn: 349088
This commit is contained in:
parent
ee2b4d8ed1
commit
a323ae1555
@ -39,11 +39,6 @@ static cl::opt<int> ProfileSummaryCutoffCold(
|
||||
cl::desc("A count is cold if it is below the minimum count"
|
||||
" to reach this percentile of total counts."));
|
||||
|
||||
static cl::opt<bool> ProfileSampleAccurate(
|
||||
"profile-sample-accurate", cl::Hidden, cl::init(false),
|
||||
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
|
||||
"callsite as cold. Otherwise, treat un-sampled callsites as if "
|
||||
"we have no profile."));
|
||||
static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
|
||||
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
|
||||
cl::init(15000), cl::ZeroOrMore,
|
||||
@ -261,14 +256,7 @@ bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BF
|
||||
bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
|
||||
BlockFrequencyInfo *BFI) {
|
||||
auto Count = BFI->getBlockProfileCount(BB);
|
||||
if (Count)
|
||||
return isColdCount(*Count);
|
||||
if (!hasSampleProfile())
|
||||
return false;
|
||||
|
||||
const Function *F = BB->getParent();
|
||||
return ProfileSampleAccurate ||
|
||||
(F && F->hasFnAttribute("profile-sample-accurate"));
|
||||
return Count && isColdCount(*Count);
|
||||
}
|
||||
|
||||
bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
|
||||
@ -285,11 +273,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
|
||||
|
||||
// In SamplePGO, if the caller has been sampled, and there is no profile
|
||||
// annotated on the callsite, we consider the callsite as cold.
|
||||
// If there is no profile for the caller, and we know the profile is
|
||||
// accurate, we consider the callsite as cold.
|
||||
return (hasSampleProfile() &&
|
||||
(CS.getCaller()->hasProfileData() || ProfileSampleAccurate ||
|
||||
CS.getCaller()->hasFnAttribute("profile-sample-accurate")));
|
||||
return hasSampleProfile() && CS.getCaller()->hasProfileData();
|
||||
}
|
||||
|
||||
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
|
||||
|
@ -123,6 +123,12 @@ static cl::opt<bool> NoWarnSampleUnused(
|
||||
cl::desc("Use this option to turn off/on warnings about function with "
|
||||
"samples but without debug information to use those samples. "));
|
||||
|
||||
static cl::opt<bool> ProfileSampleAccurate(
|
||||
"profile-sample-accurate", cl::Hidden, cl::init(false),
|
||||
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
|
||||
"callsite and function as having 0 samples. Otherwise, treat "
|
||||
"un-sampled callsites and functions conservatively as unknown. "));
|
||||
|
||||
namespace {
|
||||
|
||||
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
|
||||
@ -1604,10 +1610,18 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
|
||||
}
|
||||
|
||||
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
|
||||
// Initialize the entry count to -1, which will be treated conservatively
|
||||
// by getEntryCount as the same as unknown (None). If we have samples this
|
||||
// will be overwritten in emitAnnotations.
|
||||
F.setEntryCount(ProfileCount(-1, Function::PCT_Real));
|
||||
// By default the entry count is initialized to -1, which will be treated
|
||||
// conservatively by getEntryCount as the same as unknown (None). This is
|
||||
// to avoid newly added code to be treated as cold. If we have samples
|
||||
// this will be overwritten in emitAnnotations.
|
||||
// If ProfileSampleAccurate is true or F has profile-sample-accurate
|
||||
// attribute, initialize the entry count to 0 so callsites or functions
|
||||
// unsampled will be treated as cold.
|
||||
uint64_t initialEntryCount =
|
||||
(ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
|
||||
? 0
|
||||
: -1;
|
||||
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
|
||||
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
|
||||
if (AM) {
|
||||
auto &FAM =
|
||||
|
@ -1,19 +1,16 @@
|
||||
; RUN: opt < %s -codegenprepare -S | FileCheck %s
|
||||
; RUN: opt < %s -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE
|
||||
|
||||
target triple = "x86_64-pc-linux-gnu"
|
||||
|
||||
; This tests that hot/cold functions get correct section prefix assigned
|
||||
|
||||
; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
|
||||
; ACCURATE: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
|
||||
; The entry is hot
|
||||
define void @hot_func() !prof !15 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
|
||||
; ACCURATE: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
|
||||
; The sum of 2 callsites are hot
|
||||
define void @hot_call_func() !prof !16 {
|
||||
call void @hot_func(), !prof !17
|
||||
@ -22,7 +19,6 @@ define void @hot_call_func() !prof !16 {
|
||||
}
|
||||
|
||||
; CHECK-NOT: normal_func{{.*}}!section_prefix
|
||||
; ACCURATE-NOT: normal_func{{.*}}!section_prefix
|
||||
; The sum of all callsites are neither hot or cold
|
||||
define void @normal_func() !prof !16 {
|
||||
call void @hot_func(), !prof !17
|
||||
@ -32,36 +28,12 @@ define void @normal_func() !prof !16 {
|
||||
}
|
||||
|
||||
; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
|
||||
; ACCURATE: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
|
||||
; The entry and the callsite are both cold
|
||||
define void @cold_func() !prof !16 {
|
||||
call void @hot_func(), !prof !18
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
|
||||
; The function not appearing in profile is neither hot nor cold
|
||||
;
|
||||
; ACCURATE: foo_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
|
||||
; The function not appearing in profile is cold when -profile-sample-accurate
|
||||
; is on
|
||||
define void @foo_not_in_profile() !prof !19 {
|
||||
call void @hot_func()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
|
||||
; ACCURATE: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
|
||||
; The function not appearing in profile is cold when the func has
|
||||
; profile-sample-accurate attribute
|
||||
define void @bar_not_in_profile() #0 !prof !19 {
|
||||
call void @hot_func()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "profile-sample-accurate" }
|
||||
|
||||
; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
|
||||
; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
|
||||
!llvm.module.flags = !{!1}
|
||||
@ -83,4 +55,3 @@ attributes #0 = { "profile-sample-accurate" }
|
||||
!16 = !{!"function_entry_count", i64 1}
|
||||
!17 = !{!"branch_weights", i32 80}
|
||||
!18 = !{!"branch_weights", i32 1}
|
||||
!19 = !{!"function_entry_count", i64 -1}
|
||||
|
@ -1,47 +0,0 @@
|
||||
; For SamplePGO, if -profile-sample-accurate is specified, cold callsite
|
||||
; heuristics should be honored if the caller has no profile.
|
||||
|
||||
; RUN: opt < %s -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s
|
||||
|
||||
define i32 @callee(i32 %x) {
|
||||
%x1 = add i32 %x, 1
|
||||
%x2 = add i32 %x1, 1
|
||||
%x3 = add i32 %x2, 1
|
||||
call void @extern()
|
||||
call void @extern()
|
||||
ret i32 %x3
|
||||
}
|
||||
|
||||
define i32 @caller(i32 %y1) {
|
||||
; CHECK-LABEL: @caller
|
||||
; CHECK-NOT: call i32 @callee
|
||||
%y2 = call i32 @callee(i32 %y1)
|
||||
ret i32 %y2
|
||||
}
|
||||
|
||||
define i32 @caller_accurate(i32 %y1) #0 {
|
||||
; CHECK-LABEL: @caller_accurate
|
||||
; CHECK: call i32 @callee
|
||||
%y2 = call i32 @callee(i32 %y1)
|
||||
ret i32 %y2
|
||||
}
|
||||
|
||||
declare void @extern()
|
||||
|
||||
attributes #0 = { "profile-sample-accurate" }
|
||||
|
||||
!llvm.module.flags = !{!1}
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"SampleProfile"}
|
||||
!4 = !{!"TotalCount", i64 10000}
|
||||
!5 = !{!"MaxCount", i64 1000}
|
||||
!6 = !{!"MaxInternalCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 1000}
|
||||
!8 = !{!"NumCounts", i64 3}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12, !13, !14}
|
||||
!12 = !{i32 10000, i64 100, i32 1}
|
||||
!13 = !{i32 999000, i64 100, i32 1}
|
||||
!14 = !{i32 999999, i64 1, i32 2}
|
@ -0,0 +1,31 @@
|
||||
; For SamplePGO, if -profile-sample-accurate is specified, cold callsite
|
||||
; heuristics should be honored if the caller has no profile.
|
||||
|
||||
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s
|
||||
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s --check-prefix ACCURATE
|
||||
|
||||
declare void @extern()
|
||||
define void @callee() {
|
||||
call void @extern()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @caller(i32 %y1) {
|
||||
; CHECK-LABEL: @caller
|
||||
; CHECK-NOT: call void @callee
|
||||
; ACCURATE-LABEL: @caller
|
||||
; ACCURATE: call void @callee
|
||||
call void @callee()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @caller_accurate(i32 %y1) #0 {
|
||||
; CHECK-LABEL: @caller_accurate
|
||||
; CHECK: call void @callee
|
||||
; ACCURATE-LABEL: @caller_accurate
|
||||
; ACCURATE: call void @callee
|
||||
call void @callee()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "profile-sample-accurate" }
|
52
test/Transforms/SampleProfile/section-accurate-samplepgo.ll
Normal file
52
test/Transforms/SampleProfile/section-accurate-samplepgo.ll
Normal file
@ -0,0 +1,52 @@
|
||||
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -S | FileCheck %s
|
||||
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE
|
||||
|
||||
target triple = "x86_64-pc-linux-gnu"
|
||||
|
||||
; The test checks that function without profile gets unlikely section prefix
|
||||
; if -profile-sample-accurate is specified or the function has the
|
||||
; profile-sample-accurate attribute.
|
||||
|
||||
declare void @hot_func()
|
||||
|
||||
; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
|
||||
; CHECK: foo_not_in_profile{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
|
||||
; ACCURATE: foo_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
|
||||
; The function not appearing in profile is cold when -profile-sample-accurate
|
||||
; is on.
|
||||
define void @foo_not_in_profile() {
|
||||
call void @hot_func()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
|
||||
; ACCURATE: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
|
||||
; The function not appearing in profile is cold when the func has
|
||||
; profile-sample-accurate attribute.
|
||||
define void @bar_not_in_profile() #0 {
|
||||
call void @hot_func()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "profile-sample-accurate" }
|
||||
|
||||
; CHECK: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
|
||||
; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
|
||||
; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
|
||||
; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
|
||||
; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
|
||||
!llvm.module.flags = !{!1}
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
|
||||
!3 = !{!"ProfileFormat", !"SampleProfile"}
|
||||
!4 = !{!"TotalCount", i64 10000}
|
||||
!5 = !{!"MaxCount", i64 1000}
|
||||
!6 = !{!"MaxInternalCount", i64 1}
|
||||
!7 = !{!"MaxFunctionCount", i64 1000}
|
||||
!8 = !{!"NumCounts", i64 3}
|
||||
!9 = !{!"NumFunctions", i64 3}
|
||||
!10 = !{!"DetailedSummary", !11}
|
||||
!11 = !{!12, !13, !14}
|
||||
!12 = !{i32 10000, i64 100, i32 1}
|
||||
!13 = !{i32 999000, i64 100, i32 1}
|
||||
!14 = !{i32 999999, i64 1, i32 2}
|
Loading…
Reference in New Issue
Block a user