diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 96fdd7632dc..e48ed0b060e 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -208,7 +208,6 @@ static cl::opt CallsitePrioritizedInline( cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported.")); - static cl::opt ProfileInlineReplayFile( "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc( @@ -222,6 +221,10 @@ static cl::opt cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader")); +static cl::opt OverwriteExistingWeights( + "overwrite-existing-weights", cl::Hidden, cl::init(false), + cl::desc("Ignore existing branch weights on IR and always overwrite.")); + namespace { using BlockWeightMap = DenseMap; @@ -1453,9 +1456,10 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { auto T = FS->findCallTargetMapAt(CallSite); if (!T || T.get().empty()) continue; - // Prorate the callsite counts to reflect what is already done to the - // callsite, such as ICP or calliste cloning. if (FunctionSamples::ProfileIsProbeBased) { + // Prorate the callsite counts based on the pre-ICP distribution + // factor to reflect what is already done to the callsite before + // ICP, such as calliste cloning. if (Optional Probe = extractProbe(I)) { if (Probe->Factor < 1) T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor); @@ -1476,16 +1480,29 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { Sum += NameFS.second.getEntrySamples(); } } - if (!Sum) - continue; - updateIDTMetaData(I, SortedCallTargets, Sum); + if (Sum) + updateIDTMetaData(I, SortedCallTargets, Sum); + else if (OverwriteExistingWeights) + I.setMetadata(LLVMContext::MD_prof, nullptr); } else if (!isa(&I)) { I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights( {static_cast(BlockWeights[BB])})); } } + } else if (OverwriteExistingWeights) { + // Set profile metadata (possibly annotated by LTO prelink) to zero or + // clear it for cold code. + for (auto &I : BB->getInstList()) { + if (isa(I) || isa(I)) { + if (cast(I).isIndirectCall()) + I.setMetadata(LLVMContext::MD_prof, nullptr); + else + I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(0)); + } + } } + Instruction *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 1) continue; @@ -1527,20 +1544,28 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { uint64_t TempWeight; // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. - // Do not set weights if the weights are present. In ThinLTO, the profile - // annotation is done twice. If the first annotation already set the - // weights, the second pass does not need to set it. - if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) { + // Do not set weights if the weights are present unless under + // OverwriteExistingWeights. In ThinLTO, the profile annotation is done + // twice. If the first annotation already set the weights, the second pass + // does not need to set it. With OverwriteExistingWeights, Blocks with zero + // weight should have their existing metadata (possibly annotated by LTO + // prelink) cleared. + if (MaxWeight > 0 && + (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) { LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); - TI->setMetadata(LLVMContext::MD_prof, - MDB.createBranchWeights(Weights)); + TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst) << "most popular destination for conditional branches at " << ore::NV("CondBranchesLoc", BranchLoc); }); } else { - LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); + if (OverwriteExistingWeights) { + TI->setMetadata(LLVMContext::MD_prof, nullptr); + LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n"); + } else { + LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); + } } } } diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll index 32c857c56d6..dcf61588d54 100644 --- a/test/Transforms/SampleProfile/branch.ll +++ b/test/Transforms/SampleProfile/branch.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -passes='print' -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/branch.prof -overwrite-existing-weights=1 | opt -passes='print' -disable-output 2>&1 | FileCheck %s --check-prefix=OVW ; Original C++ code for this test case: ; @@ -90,6 +91,8 @@ for.cond: ; preds = %for.inc, %if.then.2 br i1 %cmp5, label %for.body, label %for.end, !dbg !50, !prof !80 ; CHECK: edge for.cond -> for.body probability is 0x73333333 / 0x80000000 = 90.00% ; CHECK: edge for.cond -> for.end probability is 0x0ccccccd / 0x80000000 = 10.00% +; OVW: edge for.cond -> for.body probability is 0x76b3f3be / 0x80000000 = 92.74% +; OVW: edge for.cond -> for.end probability is 0x094c0c42 / 0x80000000 = 7.26% for.body: ; preds = %for.cond call void @llvm.dbg.declare(metadata double* %x, metadata !51, metadata !17), !dbg !53 diff --git a/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll b/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll new file mode 100644 index 00000000000..1b4e5db669f --- /dev/null +++ b/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile.prof -pass-remarks=sample-profile -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile.prof -pass-remarks=sample-profile -overwrite-existing-weights=1 -S | FileCheck %s -check-prefix=OVW + +define dso_local i32 @foo(i32 %x, void (i32)* %f) #0 !dbg !4 !prof !10 { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %cmp = icmp eq i32 %0, 0 + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1) + br i1 %cmp, label %if.then, label %if.else, !prof !11 + +if.then: + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1) + ; CHECK: call {{.*}}, !dbg ![[#]], !prof ![[#PROF:]] + ; OVW: call {{.*}}, !dbg ![[#]], !prof ![[#PROF:]] + call void %f(i32 1), !dbg !13, !prof !16 + store i32 1, i32* %retval, align 4 + br label %return + +if.else: + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 0) + ; CHECK: call {{.*}}, !dbg ![[#]], !prof ![[#PROF]] + ;; The block should have a 0 weight. Check the profile metadata is dropped. + ; OVW-NOT: call {{.*}}, !dbg ![[#]], !prof + call void %f(i32 2), !dbg !15, !prof !16 + store i32 2, i32* %retval, align 4 + br label %return + +return: + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1) + %1 = load i32, i32* %retval, align 4 + ret i32 %1 +} + +; CHECK: ![[#PROF]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} +; OVW: ![[#PROF]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} + +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = {"use-sample-profile"} + +!llvm.module.flags = !{!0, !1} +!llvm.pseudo_probe_desc = !{!2} + +!0 = !{i32 7, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = !{i64 6699318081062747564, i64 563022570642068, !"foo", null} +!4 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 9, type: !6, scopeLine: 9, spFlags: DISPFlagDefinition, unit: !9) +!5 = !DIFile(filename: "test.cpp", directory: "test") +!6 = !DISubroutineType(types: !7) +!7 = !{!8, !8} +!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) +!10 = !{!"function_entry_count", i64 14} +!11 = !{!"branch_weights", i32 100, i32 0} +;; A discriminator of 186646575 which is 0x6f80057 in hexdecimal, stands for an indirect call probe +;; with an index of 5 and probe factor of 1.0. +!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646575) +!13 = distinct !DILocation(line: 10, column: 11, scope: !12) +;; A discriminator of 134217775 which is 0x6f80057 in hexdecimal, stands for an indirect call probe +;; with an index of 5 and probe factor of 0. +!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217775) +!15 = distinct !DILocation(line: 10, column: 11, scope: !14) +!16 = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} +