1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/Thumb2/thumb2-tbb.ll
James Molloy 5538ea11d7 [Thumb-1] Synthesize TBB/TBH instructions to make use of compressed jump tables
[Reapplying r284580 and r285917 with fix and testing to ensure emitted jump tables for Thumb-1 have 4-byte alignment]

The TBB and TBH instructions in Thumb-2 allow jump tables to be compressed into sequences of bytes or shorts respectively. These instructions do not exist in Thumb-1, however it is possible to synthesize them out of a sequence of other instructions.

It turns out this sequence is so short that it's almost never a lose for performance and is ALWAYS a significant win for code size.

TBB example:
Before: lsls r0, r0, #2    After: add  r0, pc
        adr  r1, .LJTI0_0         ldrb r0, [r0, #6]
        ldr  r0, [r0, r1]         lsls r0, r0, #1
        mov  pc, r0               add  pc, r0
  => No change in prologue code size or dynamic instruction count. Jump table shrunk by a factor of 4.

The only case that can increase dynamic instruction count is the TBH case:

Before: lsls r0, r4, #2    After: lsls r4, r4, #1
        adr  r1, .LJTI0_0         add  r4, pc
        ldr  r0, [r0, r1]         ldrh r4, [r4, #6]
        mov  pc, r0               lsls r4, r4, #1
                                  add  pc, r4
  => 1 more instruction in prologue. Jump table shrunk by a factor of 2.

So there is an argument that this should be disabled when optimizing for performance (and a TBH needs to be generated). I'm not so sure about that in practice, because on small cores with Thumb-1 performance is often tied to code size. But I'm willing to turn it off when optimizing for performance if people want (also note that TBHs are fairly rare in practice!)

llvm-svn: 285690
2016-11-01 13:37:41 +00:00

69 lines
1.7 KiB
LLVM

; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
; RUN: llc < %s -mtriple=thumbv6m-apple-darwin -relocation-model=static | FileCheck %s --check-prefix=THUMB1
; RUN: llc < %s -mtriple=thumbv6m-apple-darwin -relocation-model=pic | FileCheck %s --check-prefix=THUMB1
define void @bar(i32 %n.u) {
entry:
; CHECK-LABEL: bar:
; CHECK: tbb
; CHECK: .data_region jt8
; CHECK: .end_data_region
; CHECK-NEXT: .p2align 1
; THUMB1-LABEL: bar:
; THUMB1: add pc, r0
; THUMB1: .data_region jt8
; THUMB1: .byte (LBB0_3-(LCPI0_0+4))/2
; THUMB1: .end_data_region
; THUMB1-NEXT: .p2align 1
switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
bb:
tail call void(...) @foo1()
ret void
bb1:
tail call void(...) @foo2()
ret void
bb2:
tail call void(...) @foo6()
ret void
bb3:
tail call void(...) @foo3()
ret void
bb4:
tail call void(...) @foo4()
ret void
bb5:
tail call void(...) @foo5()
ret void
bb6:
tail call void(...) @foo1()
ret void
bb7:
tail call void(...) @foo2()
ret void
bb8:
tail call void(...) @foo6()
ret void
bb9:
tail call void(...) @foo3()
ret void
bb10:
tail call void(...) @foo4()
ret void
bb11:
tail call void(...) @foo5()
ret void
bb12:
tail call void(...) @foo6()
ret void
}
declare void @foo1(...)
declare void @foo2(...)
declare void @foo6(...)
declare void @foo3(...)
declare void @foo4(...)
declare void @foo5(...)