From 7a8a4c3cae33ec04073b933467323b6d2627bef3 Mon Sep 17 00:00:00 2001 From: Kyle Butt Date: Tue, 29 Mar 2016 00:23:41 +0000 Subject: [PATCH] [Codegen] Decrease minimum jump table density. Minimum density for both optsize and non optsize are now options -sparse-jump-table-density (default 10) for non optsize functions -dense-jump-table-density (default 40) for optsize functions, which matches the current default. This improves several benchmarks at google at the cost of a small codesize increase. For code compiled with -Os, the old behavior continues llvm-svn: 264689 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 26 +++++- .../SelectionDAG/SelectionDAGBuilder.h | 5 +- test/CodeGen/ARM/2011-08-25-ldmia_ret.ll | 2 +- test/CodeGen/Generic/MachineBranchProb.ll | 14 ++-- test/CodeGen/PowerPC/pr26690.ll | 6 +- test/CodeGen/Thumb2/ldr-str-imm12.ll | 20 ++--- test/CodeGen/X86/switch-bt.ll | 2 +- test/CodeGen/X86/switch-density.ll | 81 +++++++++++++++++++ test/CodeGen/X86/switch-edge-weight.ll | 12 +-- test/CodeGen/X86/switch.ll | 4 +- 10 files changed, 134 insertions(+), 38 deletions(-) create mode 100644 test/CodeGen/X86/switch-density.ll diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5126bdea724..7ed4706b6da 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -86,6 +86,19 @@ static cl::opt EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, cl::desc("Enable fast-math-flags for DAG nodes")); +/// Minimum jump table density for normal functions. +static cl::opt +JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "a normal function")); + +/// Minimum jump table density for -Os or -Oz functions. +static cl::opt +OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "an optsize function")); + + // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -7918,7 +7931,8 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, unsigned First, - unsigned Last) { + unsigned Last, + unsigned Density) { assert(Last >= First); assert(TotalCases[Last] >= TotalCases[First]); @@ -7939,7 +7953,7 @@ bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, assert(NumCases < UINT64_MAX / 100); assert(Range >= NumCases); - return NumCases * 100 >= Range * MinJumpTableDensity; + return NumCases * 100 >= Range * Density; } static inline bool areJTsAllowed(const TargetLowering &TLI) { @@ -8053,7 +8067,11 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, TotalCases[i] += TotalCases[i - 1]; } - if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) { + unsigned MinDensity = JumpTableDensity; + if (DefaultMBB->getParent()->getFunction()->optForSize()) + MinDensity = OptsizeJumpTableDensity; + if (N >= MinJumpTableSize + && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) { // Cheap case: the whole range might be suitable for jump table. CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { @@ -8098,7 +8116,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. - if (isDense(Clusters, &TotalCases[0], i, j)) { + if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); bool IsTable = j - i + 1 >= MinJumpTableSize; unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f9c9909ad3e..edd1be7fa06 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -303,12 +303,9 @@ private: BranchProbability DefaultProb; }; - /// Minimum jump table density, in percent. - enum { MinJumpTableDensity = 40 }; - /// Check whether a range of clusters is dense enough for a jump table. bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, - unsigned First, unsigned Last); + unsigned First, unsigned Last, unsigned MinDensity); /// Build a jump table cluster from Clusters[First..Last]. Returns false if it /// decides it's not a good idea. diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll index 9163166177c..e70f973d6a7 100644 --- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll +++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 -jump-table-density=40 | FileCheck %s ; Test that ldmia_ret preserves implicit operands for return values. ; ; This CFG is reduced from a benchmark miscompile. With current diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll index 9c672c1f283..e7021504999 100644 --- a/test/CodeGen/Generic/MachineBranchProb.ll +++ b/test/CodeGen/Generic/MachineBranchProb.ll @@ -41,11 +41,11 @@ define void @left_leaning_weight_balanced_tree(i32 %x) { entry: switch i32 %x, label %return [ i32 0, label %bb0 - i32 10, label %bb1 - i32 20, label %bb2 - i32 30, label %bb3 - i32 40, label %bb4 - i32 50, label %bb5 + i32 100, label %bb1 + i32 200, label %bb2 + i32 300, label %bb3 + i32 400, label %bb4 + i32 500, label %bb5 ], !prof !1 bb0: tail call void @g(i32 0) br label %return bb1: tail call void @g(i32 1) br label %return @@ -68,7 +68,7 @@ return: ret void !1 = !{!"branch_weights", ; Default: i32 1, - ; Case 0, 10, 20: + ; Case 0, 100, 200: i32 10, i32 1, i32 1, - ; Case 30, 40, 50: + ; Case 300, 400, 500: i32 1, i32 10, i32 10} diff --git a/test/CodeGen/PowerPC/pr26690.ll b/test/CodeGen/PowerPC/pr26690.ll index 3e7662409d5..524e8b524bf 100644 --- a/test/CodeGen/PowerPC/pr26690.ll +++ b/test/CodeGen/PowerPC/pr26690.ll @@ -35,9 +35,9 @@ while.body.lr.ph: ; preds = %while.cond.preheade while.body: ; preds = %while.body.backedge, %while.body.lr.ph switch i32 %.pre, label %while.body.backedge [ i32 0, label %sw.bb1 - i32 8, label %sw.bb1 - i32 6, label %sw.bb1 - i32 24, label %while.cond.backedge + i32 80, label %sw.bb1 + i32 60, label %sw.bb1 + i32 240, label %while.cond.backedge ] while.body.backedge: ; preds = %while.body, %while.cond.backedge diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll index d20eef0c8bb..3e4bd02097a 100644 --- a/test/CodeGen/Thumb2/ldr-str-imm12.ll +++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll @@ -29,16 +29,16 @@ entry: bb20: ; preds = %entry switch i32 undef, label %bb1287 [ - i32 11, label %bb119 - i32 12, label %bb119 - i32 21, label %bb420 - i32 23, label %bb420 - i32 45, label %bb438 - i32 46, label %bb438 - i32 55, label %bb533 - i32 56, label %bb569 - i32 64, label %bb745 - i32 78, label %bb1098 + i32 110, label %bb119 + i32 120, label %bb119 + i32 210, label %bb420 + i32 230, label %bb420 + i32 450, label %bb438 + i32 460, label %bb438 + i32 550, label %bb533 + i32 560, label %bb569 + i32 640, label %bb745 + i32 780, label %bb1098 ] bb119: ; preds = %bb20, %bb20 diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll index 6a2cbe1ec6c..e4fbbeb26c3 100644 --- a/test/CodeGen/X86/switch-bt.ll +++ b/test/CodeGen/X86/switch-bt.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=x86-64 -asm-verbose=false < %s | FileCheck %s +; RUN: llc -march=x86-64 -asm-verbose=false < %s -jump-table-density=40 | FileCheck %s ; This switch should use bit tests, and the third bit test case is just ; testing for one possible value, so it doesn't need a bt. diff --git a/test/CodeGen/X86/switch-density.ll b/test/CodeGen/X86/switch-density.ll new file mode 100644 index 00000000000..52216fb4d7c --- /dev/null +++ b/test/CodeGen/X86/switch-density.ll @@ -0,0 +1,81 @@ +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=25 | FileCheck %s --check-prefix=DENSE --check-prefix=CHECK +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=10 | FileCheck %s --check-prefix=SPARSE --check-prefix=CHECK + +declare void @g(i32) + +define void @sparse(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 300, label %bb0 + i32 100, label %bb1 + i32 400, label %bb1 + i32 500, label %bb2 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 1) br label %return +return: ret void + +; Should pivot around 400 for two subtrees with two jump tables each. +; CHECK-LABEL: sparse +; CHECK-NOT: cmpl +; CHECK: cmpl $399 +; CHECK: cmpl $100 +; CHECK: cmpl $300 +; CHECK: cmpl $400 +; CHECK: cmpl $500 +} + +define void @med(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 30, label %bb0 + i32 10, label %bb1 + i32 40, label %bb1 + i32 50, label %bb2 + i32 20, label %bb3 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 1) br label %return +bb3: tail call void @g(i32 2) br label %return +return: ret void + +; Lowered as a jump table when sparse, and branches when dense. +; CHECK-LABEL: med +; SPARSE: addl $-10 +; SPARSE: cmpl $40 +; SPARSE: ja +; SPARSE: jmpq *.LJTI +; DENSE-NOT: cmpl +; DENSE: cmpl $29 +; DENSE-DAG: cmpl $10 +; DENSE-DAG: cmpl $20 +; DENSE-DAG: cmpl $30 +; DENSE-DAG: cmpl $40 +; DENSE-DAG: cmpl $50 +; DENSE: retq +} + +define void @dense(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 12, label %bb0 + i32 4, label %bb1 + i32 16, label %bb1 + i32 20, label %bb2 + i32 8, label %bb3 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 1) br label %return +bb3: tail call void @g(i32 2) br label %return +return: ret void + +; Lowered as a jump table when sparse, and branches when dense. +; CHECK-LABEL: dense +; CHECK: addl $-4 +; CHECK: cmpl $16 +; CHECK: ja +; CHECK: jmpq *.LJTI +} diff --git a/test/CodeGen/X86/switch-edge-weight.ll b/test/CodeGen/X86/switch-edge-weight.ll index b8cb7b1280a..3679433c372 100644 --- a/test/CodeGen/X86/switch-edge-weight.ll +++ b/test/CodeGen/X86/switch-edge-weight.ll @@ -233,11 +233,11 @@ entry: ; block. switch i32 %x, label %sw.default [ - i32 1, label %sw.bb - i32 5, label %sw.bb2 - i32 7, label %sw.bb3 - i32 9, label %sw.bb4 - i32 31, label %sw.bb5 + i32 4, label %sw.bb + i32 20, label %sw.bb2 + i32 28, label %sw.bb3 + i32 36, label %sw.bb4 + i32 124, label %sw.bb5 ], !prof !2 sw.bb: @@ -272,7 +272,7 @@ sw.epilog: ; ; CHECK: BB#0: ; BB#0 to BB#6: [10, UINT32_MAX] (15) -; BB#0 to BB#8: [1, 5, 7, 9] (jump table) (45) +; BB#0 to BB#8: [4, 20, 28, 36] (jump table) (45) ; CHECK: Successors according to CFG: BB#8({{[0-9a-fx/= ]+}}25.00%) BB#9({{[0-9a-fx/= ]+}}75.00%) } diff --git a/test/CodeGen/X86/switch.ll b/test/CodeGen/X86/switch.ll index 46587341ea7..70da4a2abb7 100644 --- a/test/CodeGen/X86/switch.ll +++ b/test/CodeGen/X86/switch.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 | FileCheck --check-prefix=NOOPT %s +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 | FileCheck %s +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 -jump-table-density=40 | FileCheck --check-prefix=NOOPT %s declare void @g(i32)