From eca1a76d00e328d590b784703e7d1e4717f08bde Mon Sep 17 00:00:00 2001 From: wlei Date: Thu, 22 Jul 2021 12:53:42 -0700 Subject: [PATCH] [CSSPGO] Tweak ICP threshold in top-down inliner This change slightly relaxed the current ICP threshold in top-down inliner, specifically always allow one ICP for it. It shows some perf improvements on SPEC and our internal benchmarks. Also renamed the previous flag. We can also try to turn off PGO ICP in the future. Reviewed By: wenlei, hoy, wmi Differential Revision: https://reviews.llvm.org/D106588 --- lib/Transforms/IPO/SampleProfile.cpp | 16 ++++++++++++---- .../SampleProfile/csspgo-inline-icall.ll | 8 ++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 90999c1d6b3..8e9c79fc7bb 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -197,12 +197,17 @@ cl::opt SampleColdCallSiteThreshold( "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites")); -static cl::opt ProfileICPThreshold( - "sample-profile-icp-threshold", cl::Hidden, cl::init(5), +static cl::opt ProfileICPRelativeHotness( + "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc( - "Relative hotness threshold for indirect " + "Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining.")); +static cl::opt ProfileICPRelativeHotnessSkip( + "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), + cl::desc( + "Skip relative hotness check for ICP up to given number of targets.")); + static cl::opt CallsitePrioritizedInline( "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, cl::init(false), @@ -1348,6 +1353,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum); uint64_t SumOrigin = Sum; Sum *= Candidate.CallsiteDistribution; + unsigned ICPCount = 0; for (const auto *FS : CalleeSamples) { // TODO: Consider disable pre-lTO ICP for MonoLTO as well if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { @@ -1361,7 +1367,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( // ICP isn't introducing excessive speculative checks even if individual // target looks beneficial to promote and inline. That means we should // only do ICP when there's a small number dominant targets. - if (EntryCountDistributed < SumOrigin / ProfileICPThreshold) + if (ICPCount >= ProfileICPRelativeHotnessSkip && + EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness) break; // TODO: Fix CallAnalyzer to handle all indirect calls. // For indirect call, we don't run CallAnalyzer to get InlineCost @@ -1383,6 +1390,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( if (getInlineCandidate(&NewCandidate, CB)) CQueue.emplace(NewCandidate); } + ICPCount++; Changed = true; } } diff --git a/test/Transforms/SampleProfile/csspgo-inline-icall.ll b/test/Transforms/SampleProfile/csspgo-inline-icall.ll index 3ec64326da2..7b6cd545c73 100644 --- a/test/Transforms/SampleProfile/csspgo-inline-icall.ll +++ b/test/Transforms/SampleProfile/csspgo-inline-icall.ll @@ -1,7 +1,7 @@ -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s -; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s define void @test(void ()*) #0 !dbg !3 { ;; Add two direct call to force top-down order for sample profile loader