mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[PartialInliner]: Handle code regions in a switch stmt cases
This patch enhances computeOutliningColdRegionsInfo() to allow it to consider regions containing a single basic block and a single predecessor as candidate for partial inlining. Reviewed By: fhann Differential Revision: https://reviews.llvm.org/D89911
This commit is contained in:
parent
3fae7c2ab5
commit
157bbdf8a4
@ -414,11 +414,6 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
|
||||
std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
|
||||
std::make_unique<FunctionOutliningMultiRegionInfo>();
|
||||
|
||||
auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
|
||||
BasicBlock *Dom = BlockList.front();
|
||||
return BlockList.size() > 1 && Dom->hasNPredecessors(1);
|
||||
};
|
||||
|
||||
auto IsSingleExit =
|
||||
[&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
|
||||
BasicBlock *ExitBlock = nullptr;
|
||||
@ -502,15 +497,24 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
|
||||
|
||||
SmallVector<BasicBlock *, 8> DominateVector;
|
||||
DT.getDescendants(*SI, DominateVector);
|
||||
assert(!DominateVector.empty() &&
|
||||
"SI should be reachable and have at least itself as descendant");
|
||||
|
||||
// We can only outline single entry regions (for now).
|
||||
if (!IsSingleEntry(DominateVector))
|
||||
if (!DominateVector.front()->hasNPredecessors(1)) {
|
||||
LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
|
||||
<< " doesn't have a single predecessor in the "
|
||||
"dominator tree\n";);
|
||||
continue;
|
||||
}
|
||||
|
||||
BasicBlock *ExitBlock = nullptr;
|
||||
// We can only outline single exit regions (for now).
|
||||
if (!(ExitBlock = IsSingleExit(DominateVector)))
|
||||
if (!(ExitBlock = IsSingleExit(DominateVector))) {
|
||||
LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
|
||||
<< " doesn't have a unique successor\n";);
|
||||
continue;
|
||||
}
|
||||
|
||||
int OutlineRegionCost = 0;
|
||||
for (auto *BB : DominateVector)
|
||||
@ -519,7 +523,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
|
||||
LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
|
||||
<< "\n";);
|
||||
|
||||
if (OutlineRegionCost < MinOutlineRegionCost) {
|
||||
if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
|
||||
ORE.emit([&]() {
|
||||
return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
|
||||
&SI->front())
|
||||
@ -527,8 +531,12 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
|
||||
<< " inline cost-savings smaller than "
|
||||
<< ore::NV("Cost", MinOutlineRegionCost);
|
||||
});
|
||||
|
||||
LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
|
||||
<< MinOutlineRegionCost << "\n";);
|
||||
continue;
|
||||
}
|
||||
|
||||
// For now, ignore blocks that belong to a SISE region that is a
|
||||
// candidate for outlining. In the future, we may want to look
|
||||
// at inner regions because the outer region may have live-exit
|
||||
|
128
test/Transforms/PartialInlining/switch_stmt.ll
Normal file
128
test/Transforms/PartialInlining/switch_stmt.ll
Normal file
@ -0,0 +1,128 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -passes="partial-inliner" -skip-partial-inlining-cost-analysis -S < %s | FileCheck %s
|
||||
; RUN: opt -partial-inliner -skip-partial-inlining-cost-analysis -S < %s | FileCheck %s
|
||||
|
||||
define dso_local signext i32 @callee(i32 signext %c1, i32 signext %c2) !prof !30 {
|
||||
; CHECK-LABEL: @callee(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RC:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: store i32 0, i32* [[RC]], align 4
|
||||
; CHECK-NEXT: switch i32 [[C1:%.*]], label [[SW_DEFAULT:%.*]] [
|
||||
; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
|
||||
; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
|
||||
; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
|
||||
; CHECK-NEXT: ], !prof !31
|
||||
; CHECK: sw.bb:
|
||||
; CHECK-NEXT: store i32 1, i32* [[RC]], align 4
|
||||
; CHECK-NEXT: br label [[SW_EPILOG:%.*]]
|
||||
; CHECK: sw.bb1:
|
||||
; CHECK-NEXT: store i32 2, i32* [[RC]], align 4
|
||||
; CHECK-NEXT: br label [[SW_EPILOG]]
|
||||
; CHECK: sw.bb2:
|
||||
; CHECK-NEXT: store i32 4, i32* [[RC]], align 4
|
||||
; CHECK-NEXT: br label [[SW_EPILOG]]
|
||||
; CHECK: sw.default:
|
||||
; CHECK-NEXT: store i32 [[C2:%.*]], i32* [[RC]], align 4
|
||||
; CHECK-NEXT: br label [[SW_EPILOG]]
|
||||
; CHECK: sw.epilog:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[RC]], align 4
|
||||
; CHECK-NEXT: ret i32 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
%rc = alloca i32, align 4
|
||||
store i32 0, i32* %rc, align 4
|
||||
switch i32 %c1, label %sw.default [
|
||||
i32 0, label %sw.bb
|
||||
i32 1, label %sw.bb1
|
||||
i32 2, label %sw.bb2
|
||||
], !prof !31
|
||||
|
||||
sw.bb: ;; cold
|
||||
store i32 1, i32* %rc, align 4
|
||||
br label %sw.epilog
|
||||
|
||||
sw.bb1:
|
||||
store i32 2, i32* %rc, align 4
|
||||
br label %sw.epilog
|
||||
|
||||
sw.bb2: ;; cold
|
||||
store i32 4, i32* %rc, align 4
|
||||
br label %sw.epilog
|
||||
|
||||
sw.default:
|
||||
store i32 %c2, i32* %rc, align 4
|
||||
br label %sw.epilog
|
||||
|
||||
sw.epilog:
|
||||
%0 = load i32, i32* %rc, align 4
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
define dso_local signext i32 @caller(i32 signext %c) !prof !30 {
|
||||
; CHECK-LABEL: @caller(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RC_I:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[RC_I]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]])
|
||||
; CHECK-NEXT: store i32 0, i32* [[RC_I]], align 4
|
||||
; CHECK-NEXT: switch i32 [[C:%.*]], label [[SW_DEFAULT_I:%.*]] [
|
||||
; CHECK-NEXT: i32 0, label [[CODEREPL_I:%.*]]
|
||||
; CHECK-NEXT: i32 1, label [[SW_BB1_I:%.*]]
|
||||
; CHECK-NEXT: i32 2, label [[CODEREPL1_I:%.*]]
|
||||
; CHECK-NEXT: ], !prof !31
|
||||
; CHECK: codeRepl.i:
|
||||
; CHECK-NEXT: call void @callee.1.sw.bb(i32* [[RC_I]])
|
||||
; CHECK-NEXT: br label [[CALLEE_1_EXIT:%.*]]
|
||||
; CHECK: sw.bb1.i:
|
||||
; CHECK-NEXT: store i32 2, i32* [[RC_I]], align 4
|
||||
; CHECK-NEXT: br label [[CALLEE_1_EXIT]]
|
||||
; CHECK: codeRepl1.i:
|
||||
; CHECK-NEXT: call void @callee.1.sw.bb2(i32* [[RC_I]])
|
||||
; CHECK-NEXT: br label [[CALLEE_1_EXIT]]
|
||||
; CHECK: sw.default.i:
|
||||
; CHECK-NEXT: store i32 [[C]], i32* [[RC_I]], align 4
|
||||
; CHECK-NEXT: br label [[CALLEE_1_EXIT]]
|
||||
; CHECK: callee.1.exit:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[RC_I]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[RC_I]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP2]])
|
||||
;
|
||||
entry:
|
||||
%0 = call signext i32 @callee(i32 signext %c, i32 signext %c)
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0, !1}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{i32 1, !"ProfileSummary", !2}
|
||||
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
|
||||
!3 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!4 = !{!"TotalCount", i64 2}
|
||||
!5 = !{!"MaxCount", i64 1000}
|
||||
!6 = !{!"MaxInternalCount", i64 1000}
|
||||
!7 = !{!"MaxFunctionCount", i64 1000}
|
||||
!8 = !{!"NumCounts", i64 4}
|
||||
!9 = !{!"NumFunctions", i64 2}
|
||||
!10 = !{!"IsPartialProfile", i64 0}
|
||||
!11 = !{!"PartialProfileRatio", double 0.000000e+00}
|
||||
!12 = !{!"DetailedSummary", !13}
|
||||
!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29}
|
||||
!14 = !{i32 10000, i64 0, i32 0}
|
||||
!15 = !{i32 100000, i64 0, i32 0}
|
||||
!16 = !{i32 200000, i64 0, i32 0}
|
||||
!17 = !{i32 300000, i64 0, i32 0}
|
||||
!18 = !{i32 400000, i64 0, i32 0}
|
||||
!19 = !{i32 500000, i64 1, i32 2}
|
||||
!20 = !{i32 600000, i64 1, i32 2}
|
||||
!21 = !{i32 700000, i64 1, i32 2}
|
||||
!22 = !{i32 800000, i64 1, i32 2}
|
||||
!23 = !{i32 900000, i64 1, i32 2}
|
||||
!24 = !{i32 950000, i64 1, i32 2}
|
||||
!25 = !{i32 990000, i64 1, i32 2}
|
||||
!26 = !{i32 999000, i64 1, i32 2}
|
||||
!27 = !{i32 999900, i64 1, i32 2}
|
||||
!28 = !{i32 999990, i64 1, i32 2}
|
||||
!29 = !{i32 999999, i64 1, i32 2}
|
||||
!30 = !{!"function_entry_count", i64 1000}
|
||||
!31 = !{!"branch_weights", i32 500, i32 10, i32 150, i32 40}
|
Loading…
Reference in New Issue
Block a user