mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-25 05:52:53 +02:00
d4bc2d86b6
This is a re-commit of r235101, which also fixes the problems with the previous patch: - Switches with only a default case and non-fallthrough were handled incorrectly - The previous patch tickled a bug in PowerPC Early-Return Creation which is fixed here. > This is a major rewrite of the SelectionDAG switch lowering. The previous code > would lower switches as a binary tre, discovering clusters of cases > suitable for lowering by jump tables or bit tests as it went along. To increase > the likelihood of finding jump tables, the binary tree pivot was selected to > maximize case density on both sides of the pivot. > > By not selecting the pivot in the middle, the binary trees would not always > be balanced, leading to performance problems in the generated code. > > This patch rewrites the lowering to search for clusters of cases > suitable for jump tables or bit tests first, and then builds the binary > tree around those clusters. This way, the binary tree will always be balanced. > > This has the added benefit of decoupling the different aspects of the lowering: > tree building and jump table or bit tests finding are now easier to tweak > separately. > > For example, this will enable us to balance the tree based on profile info > in the future. > > The algorithm for finding jump tables is quadratic, whereas the previous algorithm > was O(n log n) for common cases, and quadratic only in the worst-case. This > doesn't seem to be major problem in practice, e.g. compiling a file consisting > of a 10k-case switch was only 30% slower, and such large switches should be rare > in practice. Compiling e.g. gcc.c showed no compile-time difference. If this > does turn out to be a problem, we could limit the search space of the algorithm. > > This commit also disables all optimizations during switch lowering in -O0. > > Differential Revision: http://reviews.llvm.org/D8649 llvm-svn: 235560
33 lines
733 B
LLVM
33 lines
733 B
LLVM
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
|
|
; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s -check-prefix CHECK-V4-CMP
|
|
; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s -check-prefix CHECK-V4-BX
|
|
|
|
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
; CHECK-LABEL: t1:
|
|
; CHECK: cmp r2, #7
|
|
; CHECK: cmpne r2, #1
|
|
switch i32 %c, label %cond_next [
|
|
i32 1, label %cond_true
|
|
i32 7, label %cond_true
|
|
]
|
|
|
|
cond_true:
|
|
; CHECK: addne r0
|
|
; CHECK: bxne
|
|
%tmp12 = add i32 %a, 1
|
|
%tmp1518 = add i32 %tmp12, %b
|
|
ret i32 %tmp1518
|
|
|
|
cond_next:
|
|
%tmp15 = add i32 %b, %a
|
|
ret i32 %tmp15
|
|
}
|
|
|
|
; CHECK-V4-CMP: cmpne
|
|
; CHECK-V4-CMP-NOT: cmpne
|
|
|
|
; CHECK-V4-BX: bx
|
|
; CHECK-V4-BX: bx
|
|
; CHECK-V4-BX-NOT: bx
|
|
|