mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
b06062f9f0
Second land attempt. MachineVerifier DefRegState expensive check errors fixed. Prologs and epilogs handle callee-save registers and tend to be irregular with different immediate offsets that are not often handled by the MachineOutliner. Commit D18619/a5335647d5e8 (combining stack operations) stretched irregularity further. This patch tries to emit homogeneous stores and loads with the same offset for prologs and epilogs respectively. We have observed that this canonicalizes (homogenizes) prologs and epilogs significantly and results in a greatly increased chance of outlining, resulting in a code size reduction. Despite the above results, there are still size wins to be had that the MachineOutliner does not provide due to the special handling X30/LR. To handle the LR case, his patch custom-outlines prologs and epilogs in place. It does this by doing the following: * Injects HOM_Prolog and HOM_Epilog pseudo instructions during a Prolog and Epilog Injection Pass. * Lowers and optimizes said pseudos in a AArchLowerHomogneousPrologEpilog Pass. * Outlined helpers are created on demand. Identical helpers are merged by the linker. * An opt-in flag is introduced to enable this feature. Another threshold flag is also introduced to control the aggressiveness of outlining for application's need. This reduced an average of 4% of code size on LLVM-TestSuite/CTMark targeting arm64/-Oz. Differential Revision: https://reviews.llvm.org/D76570
41 lines
1.5 KiB
YAML
41 lines
1.5 KiB
YAML
# RUN: llc -mtriple=arm64-applie-ios7.0 -start-before=aarch64-lower-homogeneous-prolog-epilog -homogeneous-prolog-epilog %s -o - | FileCheck %s
|
|
#
|
|
# This test ensure no outlined epilog is formed when X16 is live across the helper.
|
|
--- |
|
|
@FuncPtr = local_unnamed_addr global i32 (i32)* null, align 8
|
|
|
|
define i32 @_Z3fooi(i32) minsize "frame-pointer"="all" {
|
|
ret i32 0
|
|
}
|
|
|
|
declare i32 @_Z3gooii(i32, i32)
|
|
...
|
|
---
|
|
name: _Z3fooi
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $w0, $lr, $x19, $x20
|
|
successors: %bb.1
|
|
frame-setup HOM_Prolog $lr, $fp, $x19, $x20, 16
|
|
frame-setup CFI_INSTRUCTION def_cfa $w29, 16
|
|
frame-setup CFI_INSTRUCTION offset $w30, -8
|
|
frame-setup CFI_INSTRUCTION offset $w29, -16
|
|
frame-setup CFI_INSTRUCTION offset $w19, -24
|
|
frame-setup CFI_INSTRUCTION offset $w20, -32
|
|
$w19 = nsw ADDWri $w0, 1, 0
|
|
$w1 = ORRWrr $wzr, $w0
|
|
$w0 = ORRWrr $wzr, $w19
|
|
BL @_Z3gooii, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit $w1, implicit-def $sp, implicit-def $w0
|
|
$x8 = ADRP target-flags(aarch64-page) @FuncPtr
|
|
$x16 = LDRXui killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @FuncPtr
|
|
$w0 = nsw ADDWrr renamable $w0, killed renamable $w19
|
|
$lr, $fp, $x19, $x20 = frame-destroy HOM_Epilog
|
|
B %bb.1
|
|
|
|
bb.1:
|
|
liveins: $w0, $x16
|
|
TCRETURNri killed renamable $x16, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0
|
|
# CHECK: _OUTLINED_FUNCTION_PROLOG_FRAME16_x30x29x19x20:
|
|
# CHECK-NOT: _OUTLINED_FUNCTION_EPILOG_x30x29x19x20:
|