diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 69194eac078..5cb71f71e70 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -75,6 +75,16 @@ static cl::opt SampleProfileHotThreshold( "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"), cl::desc("Inlined functions that account for more than N% of all samples " "collected in the parent function, will be inlined again.")); +static cl::opt SampleProfileGlobalHotThreshold( + "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"), + cl::desc("Top-level functions that account for more than N% of all samples " + "collected in the profile, will be marked as hot for the inliner " + "to consider.")); +static cl::opt SampleProfileGlobalColdThreshold( + "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"), + cl::desc("Top-level functions that account for less than N% of all samples " + "collected in the profile, will be marked as cold for the inliner " + "to consider.")); namespace { typedef DenseMap BlockWeightMap; @@ -96,7 +106,8 @@ public: SampleProfileLoader(StringRef Name = SampleProfileFile) : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(), - Samples(nullptr), Filename(Name), ProfileIsValid(false) { + Samples(nullptr), Filename(Name), ProfileIsValid(false), + TotalCollectedSamples(0) { initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry()); } @@ -121,6 +132,7 @@ protected: const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; bool inlineHotFunctions(Function &F); + bool emitInlineHints(Function &F); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -185,6 +197,12 @@ protected: /// \brief Flag indicating whether the profile input loaded successfully. bool ProfileIsValid; + + /// \brief Total number of samples collected in this profile. + /// + /// This is the sum of all the samples collected in all the functions executed + /// at runtime. + uint64_t TotalCollectedSamples; }; class SampleCoverageTracker { @@ -582,6 +600,60 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return FS; } +/// \brief Emit an inline hint if \p F is globally hot or cold. +/// +/// If \p F consumes a significant fraction of samples (indicated by +/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the +/// inliner to consider the function hot. +/// +/// If \p F consumes a small fraction of samples (indicated by +/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner +/// to consider the function cold. +/// +/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a +/// function globally hot or cold, we should be annotating individual callsites. +/// This is not currently possible, but work on the inliner will eventually +/// provide this ability. See http://reviews.llvm.org/D15003 for details and +/// discussion. +/// +/// \returns True if either attribute was applied to \p F. +bool SampleProfileLoader::emitInlineHints(Function &F) { + if (TotalCollectedSamples == 0) + return false; + + uint64_t FunctionSamples = Samples->getTotalSamples(); + double SamplesPercent = + (double)FunctionSamples / (double)TotalCollectedSamples * 100.0; + + // If the function collected more samples than the hot threshold, mark + // it globally hot. + if (SamplesPercent >= SampleProfileGlobalHotThreshold) { + F.addFnAttr(llvm::Attribute::InlineHint); + emitOptimizationRemark( + F.getContext(), DEBUG_TYPE, F, DebugLoc(), + Twine("Applied inline hint to globally hot function '" + F.getName() + + "' with " + Twine(std::to_string(SamplesPercent)) + + "% of samples (threshold: " + + Twine(std::to_string(SampleProfileGlobalHotThreshold)) + "%)")); + return true; + } + + // If the function collected fewer samples than the cold threshold, mark + // it globally cold. + if (SamplesPercent <= SampleProfileGlobalColdThreshold) { + F.addFnAttr(llvm::Attribute::Cold); + emitOptimizationRemark( + F.getContext(), DEBUG_TYPE, F, DebugLoc(), + Twine("Applied cold hint to globally cold function '" + F.getName() + + "' with " + Twine(std::to_string(SamplesPercent)) + + "% of samples (threshold: " + + Twine(std::to_string(SampleProfileGlobalColdThreshold)) + "%)")); + return true; + } + + return false; +} + /// \brief Iteratively inline hot callsites of a function. /// /// Iteratively traverse all callsites of the function \p F, and find if @@ -1088,6 +1160,8 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F) << "\n"); + Changed |= emitInlineHints(F); + Changed |= inlineHotFunctions(F); // Compute basic block weights. @@ -1165,6 +1239,10 @@ bool SampleProfileLoader::runOnModule(Module &M) { if (!ProfileIsValid) return false; + // Compute the total number of samples collected in this profile. + for (const auto &I : Reader->getProfiles()) + TotalCollectedSamples += I.second.getTotalSamples(); + bool retval = false; for (auto &F : M) if (!F.isDeclaration()) { diff --git a/test/Transforms/SampleProfile/Inputs/inline-hint.prof b/test/Transforms/SampleProfile/Inputs/inline-hint.prof new file mode 100644 index 00000000000..a6840346eb4 --- /dev/null +++ b/test/Transforms/SampleProfile/Inputs/inline-hint.prof @@ -0,0 +1,3 @@ +_Z6hot_fnRxi:700:0 +_Z7cold_fnRxi:1:0 +other:299:0 diff --git a/test/Transforms/SampleProfile/inline-hint.ll b/test/Transforms/SampleProfile/inline-hint.ll new file mode 100644 index 00000000000..16c4e64ec5b --- /dev/null +++ b/test/Transforms/SampleProfile/inline-hint.ll @@ -0,0 +1,38 @@ +; RUN: opt %s -sample-profile -sample-profile-file=%S/Inputs/inline-hint.prof -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s +; +; CHECK: Applied cold hint to globally cold function '_Z7cold_fnRxi' with 0.1 +define void @_Z7cold_fnRxi() !dbg !4 { +entry: + ret void, !dbg !29 +} + +; CHECK: Applied inline hint to globally hot function '_Z6hot_fnRxi' with 70.0 +define void @_Z6hot_fnRxi() #0 !dbg !10 { +entry: + ret void, !dbg !38 +} + +!llvm.module.flags = !{!17, !18} +!llvm.ident = !{!19} + +!1 = !DIFile(filename: "inline-hint.cc", directory: ".") +!2 = !{} +!3 = !{!4, !10, !11, !14} +!4 = distinct !DISubprogram(name: "cold_fn", linkageName: "_Z7cold_fnRxi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{null, !7, !9} +!7 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !8, size: 64, align: 64) +!8 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed) +!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!10 = distinct !DISubprogram(name: "hot_fn", linkageName: "_Z6hot_fnRxi", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2) +!11 = distinct !DISubprogram(name: "compute", linkageName: "_Z7computex", scope: !1, file: !1, line: 11, type: !12, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, variables: !2) +!12 = !DISubroutineType(types: !13) +!13 = !{!8, !8} +!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !15, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, variables: !2) +!15 = !DISubroutineType(types: !16) +!16 = !{!9} +!17 = !{i32 2, !"Dwarf Version", i32 4} +!18 = !{i32 2, !"Debug Info Version", i32 3} +!19 = !{!"clang version 3.8.0 (trunk 254067) (llvm/trunk 254079)"} +!29 = !DILocation(line: 5, column: 1, scope: !4) +!38 = !DILocation(line: 9, column: 1, scope: !10)