1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 19:52:54 +01:00

[InlineAdvisor] Allow replay of inline decisions for the CGSCC inliner from optimization remarks

This change leverages the work done in D83743 to replay in the SampleProfile inliner to also be used in the CGSCC inliner. NOTE: currently restricted to non-ML advisors only.

The added switch `-cgscc-inline-replay=<remarks file>` will replay the inlining decisions in that file where the remarks file is generated via `-Rpass=inline`. The aim here is to make it easier to analyze changes that would modify inlining heuristics to be separated from this behavior. Doing so allows easier examination of assembly and runtime behavior compared to the baseline rather than trying to dig through the large churn caused by inlining.

In LTO compilation, since inlining is done twice you can separately specify replay by passing the flag to the FE (`-cgscc-inline-replay=`) and to the linker (`-Wl,cgscc-inline-replay=`) with the remarks generated from their respective places.

Testing on mysqld by comparing the inline decisions between base (generates remarks.txt) and diff (replay using identical input/tools with remarks.txt) and examining the inlining sites with `diff` shows 14,000 mismatches out of 247,341 for a ~94% replay accuracy. I believe this gap can be narrowed further though for the general case we may never achieve full accuracy. For my personal use, this is close enough to be representative: I set the baseline as the one generated by the replay on identical input/toolset and compare that to my modified input/toolset using the same replay.

Testing:
ninja check-llvm
newly added test correctly replays CGSCC inlining decisions

Reviewed By: mtrofin, wenlei

Differential Revision: https://reviews.llvm.org/D94334
This commit is contained in:
modimo 2021-01-25 15:25:39 -08:00
parent d8f7c22241
commit 46d7a90a64
9 changed files with 167 additions and 17 deletions

View File

@ -228,7 +228,8 @@ public:
// InlineAdvisor must be preserved across analysis invalidations.
return false;
}
bool tryCreate(InlineParams Params, InliningAdvisorMode Mode);
bool tryCreate(InlineParams Params, InliningAdvisorMode Mode,
StringRef ReplayFile);
InlineAdvisor *getAdvisor() const { return Advisor.get(); }
void clear() { Advisor.reset(); }

View File

@ -25,13 +25,15 @@ class OptimizationRemarkEmitter;
class ReplayInlineAdvisor : public InlineAdvisor {
public:
ReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
LLVMContext &Context, StringRef RemarksFile,
bool EmitRemarks);
LLVMContext &Context,
std::unique_ptr<InlineAdvisor> OriginalAdvisor,
StringRef RemarksFile, bool EmitRemarks);
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
private:
StringSet<> InlineSitesFromRemarks;
std::unique_ptr<InlineAdvisor> OriginalAdvisor;
bool HasReplayRemarks = false;
bool EmitRemarks = false;
};

View File

@ -14,6 +14,7 @@
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/PassManager.h"
#include <utility>
@ -105,7 +106,7 @@ public:
private:
InlineAdvisor &getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
FunctionAnalysisManager &FAM, Module &M);
std::unique_ptr<DefaultInlineAdvisor> OwnedDefaultAdvisor;
std::unique_ptr<InlineAdvisor> OwnedAdvisor;
const bool OnlyMandatory;
};

View File

@ -16,6 +16,7 @@
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@ -153,11 +154,19 @@ void InlineAdvice::recordInliningWithCalleeDeleted() {
AnalysisKey InlineAdvisorAnalysis::Key;
bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params,
InliningAdvisorMode Mode) {
InliningAdvisorMode Mode,
StringRef ReplayFile) {
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
switch (Mode) {
case InliningAdvisorMode::Default:
Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params));
// Restrict replay to default advisor, ML advisors are stateful so
// replay will need augmentations to interleave with them correctly.
if (!ReplayFile.empty()) {
Advisor = std::make_unique<ReplayInlineAdvisor>(
M, FAM, M.getContext(), std::move(Advisor), ReplayFile,
/* EmitRemarks =*/true);
}
break;
case InliningAdvisorMode::Development:
#ifdef LLVM_HAVE_TF_API
@ -174,6 +183,7 @@ bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params,
#endif
break;
}
return !!Advisor;
}

View File

@ -22,12 +22,12 @@ using namespace llvm;
#define DEBUG_TYPE "inline-replay"
ReplayInlineAdvisor::ReplayInlineAdvisor(Module &M,
FunctionAnalysisManager &FAM,
LLVMContext &Context,
StringRef RemarksFile,
bool EmitRemarks)
: InlineAdvisor(M, FAM), HasReplayRemarks(false), EmitRemarks(EmitRemarks) {
ReplayInlineAdvisor::ReplayInlineAdvisor(
Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
std::unique_ptr<InlineAdvisor> OriginalAdvisor, StringRef RemarksFile,
bool EmitRemarks)
: InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)),
HasReplayRemarks(false), EmitRemarks(EmitRemarks) {
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
std::error_code EC = BufferOrErr.getError();
if (EC) {

View File

@ -92,6 +92,13 @@ static cl::opt<bool>
extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
static cl::opt<std::string> CGSCCInlineReplayFile(
"cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
cl::desc(
"Optimization remarks file containing inline remarks to be replayed "
"by inlining from cgscc inline remarks."),
cl::Hidden);
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@ -633,8 +640,8 @@ bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
InlineAdvisor &
InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
FunctionAnalysisManager &FAM, Module &M) {
if (OwnedDefaultAdvisor)
return *OwnedDefaultAdvisor;
if (OwnedAdvisor)
return *OwnedAdvisor;
auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
if (!IAA) {
@ -646,9 +653,16 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
// duration of the inliner pass, and thus the lifetime of the owned advisor.
// The one we would get from the MAM can be invalidated as a result of the
// inliner's activity.
OwnedDefaultAdvisor =
OwnedAdvisor =
std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
return *OwnedDefaultAdvisor;
if (!CGSCCInlineReplayFile.empty())
OwnedAdvisor = std::make_unique<ReplayInlineAdvisor>(
M, FAM, M.getContext(), std::move(OwnedAdvisor),
CGSCCInlineReplayFile,
/*EmitRemarks=*/true);
return *OwnedAdvisor;
}
assert(IAA->getAdvisor() &&
"Expected a present InlineAdvisorAnalysis also have an "
@ -998,7 +1012,7 @@ ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
ModuleAnalysisManager &MAM) {
auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
if (!IAA.tryCreate(Params, Mode)) {
if (!IAA.tryCreate(Params, Mode, CGSCCInlineReplayFile)) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");

View File

@ -1967,7 +1967,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
if (FAM && !ProfileInlineReplayFile.empty()) {
ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
M, *FAM, Ctx, ProfileInlineReplayFile, /*EmitRemarks=*/false);
M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile,
/*EmitRemarks=*/false);
if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
ExternalInlineAdvisor.reset();
}

View File

@ -0,0 +1,2 @@
remark: calls.cc:10:0: _Z3sumii inlined into main with (cost=45, threshold=337) at callsite main:3:0.1;
remark: calls.cc:4:0: _Z3subii inlined into main with (cost=-5, threshold=337) at callsite _Z3sumii:1:0 @ main:3:0.1;

View File

@ -0,0 +1,119 @@
;; Note that this needs new pass manager for now. Passing `-cgscc-inline-replay` to legacy pass manager is a no-op.
;; Check replay inline decisions
; RUN: opt < %s -passes=inline -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s
; RUN: opt < %s -passes=inline -cgscc-inline-replay=%S/Inputs/cgscc-inline-replay.txt -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
entry:
%x.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
store i32 %y, i32* %y.addr, align 4
%tmp = load i32, i32* %x.addr, align 4, !dbg !8
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
%add = add nsw i32 %tmp, %tmp1, !dbg !8
%tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
%tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
%call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
ret i32 %add, !dbg !8
}
define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 {
entry:
%x.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
store i32 %y, i32* %y.addr, align 4
%tmp = load i32, i32* %x.addr, align 4, !dbg !10
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
%add = sub nsw i32 %tmp, %tmp1, !dbg !10
ret i32 %add, !dbg !11
}
define i32 @main() #0 !dbg !12 {
entry:
%retval = alloca i32, align 4
%s = alloca i32, align 4
%i = alloca i32, align 4
store i32 0, i32* %retval
store i32 0, i32* %i, align 4, !dbg !13
br label %while.cond, !dbg !14
while.cond: ; preds = %if.end, %entry
%tmp = load i32, i32* %i, align 4, !dbg !15
%inc = add nsw i32 %tmp, 1, !dbg !15
store i32 %inc, i32* %i, align 4, !dbg !15
%cmp = icmp slt i32 %tmp, 400000000, !dbg !15
br i1 %cmp, label %while.body, label %while.end, !dbg !15
while.body: ; preds = %while.cond
%tmp1 = load i32, i32* %i, align 4, !dbg !17
%cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
br i1 %cmp1, label %if.then, label %if.else, !dbg !17
if.then: ; preds = %while.body
%tmp2 = load i32, i32* %i, align 4, !dbg !19
%tmp3 = load i32, i32* %s, align 4, !dbg !19
%call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
store i32 %call, i32* %s, align 4, !dbg !19
br label %if.end, !dbg !19
if.else: ; preds = %while.body
store i32 30, i32* %s, align 4, !dbg !21
br label %if.end
if.end: ; preds = %if.else, %if.then
br label %while.cond, !dbg !23
while.end: ; preds = %while.cond
%tmp4 = load i32, i32* %s, align 4, !dbg !25
%call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
ret i32 0, !dbg !26
}
declare i32 @printf(i8*, ...)
attributes #0 = { "use-sample-profile" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "calls.cc", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 1, !"Debug Info Version", i32 3}
!5 = !{!"clang version 3.5 "}
!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!7 = !DISubroutineType(types: !2)
!8 = !DILocation(line: 4, scope: !6)
!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!10 = !DILocation(line: 20, scope: !9)
!11 = !DILocation(line: 21, scope: !9)
!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!13 = !DILocation(line: 8, scope: !12)
!14 = !DILocation(line: 9, scope: !12)
!15 = !DILocation(line: 9, scope: !16)
!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
!17 = !DILocation(line: 10, scope: !18)
!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
!19 = !DILocation(line: 10, scope: !20)
!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
!21 = !DILocation(line: 10, scope: !22)
!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
!23 = !DILocation(line: 10, scope: !24)
!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
!25 = !DILocation(line: 11, scope: !12)
!26 = !DILocation(line: 12, scope: !12)
; DEFAULT: _Z3subii inlined into _Z3sumii
; DEFAULT: _Z3sumii inlined into main
; DEFAULT-NOT: _Z3subii inlined into main
; REPLAY: _Z3sumii inlined into main
; REPLAY: _Z3subii inlined into main
; REPLAY-NOT: _Z3subii inlined into _Z3sumii