mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
300cd7b42b
In some cases, we can rename a store operand, in order to enable pairing of stores. For store pairs, that cannot be merged because the first tored register is defined in between the second store, we try to find suitable rename register. First, we check if we can rename the given register: 1. The first store register must be killed at the store, which means we do not have to rename instructions after the first store. 2. We scan backwards from the first store, to find the definition of the stored register and check all uses in between are renamable. Along they way, we collect the minimal register classes of the uses for overlapping (sub/super)registers. Second, we try to find an available register from the minimal physical register class of the original register. A suitable register must not be 1. defined before FirstMI 2. between the previous definition of the register to rename 3. a callee saved register. We use KILL flags to clear defined registers while scanning from the beginning to the end of the block. This triggers quite often, here are the top changes for MultiSource, SPEC2000, SPEC2006 compiled with -O3 for iOS: Metric: aarch64-ldst-opt.NumPairCreated Program base patch diff test-suite...nch/fourinarow/fourinarow.test 2.00 39.00 1850.0% test-suite...s/ASC_Sequoia/IRSmk/IRSmk.test 46.00 80.00 73.9% test-suite...chmarks/Olden/power/power.test 70.00 96.00 37.1% test-suite...cations/hexxagon/hexxagon.test 29.00 39.00 34.5% test-suite...nchmarks/McCat/05-eks/eks.test 100.00 132.00 32.0% test-suite.../Trimaran/enc-rc4/enc-rc4.test 46.00 59.00 28.3% test-suite...T2006/473.astar/473.astar.test 160.00 200.00 25.0% test-suite.../Trimaran/enc-md5/enc-md5.test 8.00 10.00 25.0% test-suite...telecomm-gsm/telecomm-gsm.test 113.00 139.00 23.0% test-suite...ediabench/gsm/toast/toast.test 113.00 139.00 23.0% test-suite...Source/Benchmarks/sim/sim.test 91.00 111.00 22.0% test-suite...C/CFP2000/179.art/179.art.test 41.00 49.00 19.5% test-suite...peg2/mpeg2dec/mpeg2decode.test 245.00 279.00 13.9% test-suite...marks/Olden/health/health.test 16.00 18.00 12.5% test-suite...ks/Prolangs-C/cdecl/cdecl.test 90.00 101.00 12.2% test-suite...fice-ispell/office-ispell.test 91.00 100.00 9.9% test-suite...oxyApps-C/miniGMG/miniGMG.test 430.00 465.00 8.1% test-suite...lowfish/security-blowfish.test 39.00 42.00 7.7% test-suite.../Applications/spiff/spiff.test 42.00 45.00 7.1% test-suite...arks/mafft/pairlocalalign.test 2473.00 2646.00 7.0% test-suite.../VersaBench/ecbdes/ecbdes.test 29.00 31.00 6.9% test-suite...nch/beamformer/beamformer.test 220.00 235.00 6.8% test-suite...CFP2000/177.mesa/177.mesa.test 2110.00 2252.00 6.7% test-suite...ve-susan/automotive-susan.test 109.00 116.00 6.4% test-suite...s-C/unix-smail/unix-smail.test 65.00 69.00 6.2% test-suite...CI_Purple/SMG2000/smg2000.test 1194.00 1265.00 5.9% test-suite.../Benchmarks/nbench/nbench.test 472.00 500.00 5.9% test-suite...oxyApps-C/miniAMR/miniAMR.test 248.00 262.00 5.6% test-suite...quoia/CrystalMk/CrystalMk.test 18.00 19.00 5.6% test-suite...rks/tramp3d-v4/tramp3d-v4.test 7331.00 7710.00 5.2% test-suite.../Benchmarks/Bullet/bullet.test 5651.00 5938.00 5.1% test-suite...ternal/HMMER/hmmcalibrate.test 750.00 788.00 5.1% test-suite...T2006/456.hmmer/456.hmmer.test 764.00 802.00 5.0% test-suite...ications/JM/ldecod/ldecod.test 1028.00 1079.00 5.0% test-suite...CFP2006/444.namd/444.namd.test 1368.00 1434.00 4.8% test-suite...marks/7zip/7zip-benchmark.test 4471.00 4685.00 4.8% test-suite...6/464.h264ref/464.h264ref.test 3122.00 3271.00 4.8% test-suite...pplications/oggenc/oggenc.test 1497.00 1565.00 4.5% test-suite...T2000/300.twolf/300.twolf.test 742.00 774.00 4.3% test-suite.../Prolangs-C/loader/loader.test 24.00 25.00 4.2% test-suite...0.perlbench/400.perlbench.test 1983.00 2058.00 3.8% test-suite...ications/JM/lencod/lencod.test 4612.00 4785.00 3.8% test-suite...yApps-C++/PENNANT/PENNANT.test 995.00 1032.00 3.7% test-suite...arks/VersaBench/dbms/dbms.test 54.00 56.00 3.7% Reviewers: efriedma, thegameg, samparker, dmgreen, paquette, evandro Reviewed By: paquette Differential Revision: https://reviews.llvm.org/D70450
107 lines
3.3 KiB
LLVM
107 lines
3.3 KiB
LLVM
; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-apple-darwin < %s | FileCheck %s
|
|
; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-apple-darwin -mcpu=cortex-a53 -enable-misched=false < %s | FileCheck %s
|
|
; RUN: llc -verify-machineinstrs -enable-machine-outliner -enable-linkonceodr-outlining -mtriple=aarch64-apple-darwin < %s | FileCheck %s -check-prefix=ODR
|
|
; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-apple-darwin -stop-after=machine-outliner < %s | FileCheck %s -check-prefix=TARGET_FEATURES
|
|
|
|
; Make sure that we inherit target features from functions and make sure we have
|
|
; the right function attributes.
|
|
; TARGET_FEATURES: define internal void @OUTLINED_FUNCTION_{{[0-9]+}}()
|
|
; TARGET_FEATURES-SAME: #[[ATTR_NUM:[0-9]+]]
|
|
; TARGET_FEATURES-DAG: attributes #[[ATTR_NUM]] = {
|
|
; TARGET_FEATURES-SAME: minsize
|
|
; TARGET_FEATURES-SAME: optsize
|
|
; TARGET_FEATURES-SAME: "target-features"="+sse"
|
|
|
|
define linkonce_odr void @fish() #0 {
|
|
; CHECK-LABEL: _fish:
|
|
; CHECK-NOT: OUTLINED
|
|
; ODR: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]]
|
|
%1 = alloca i32, align 4
|
|
%2 = alloca i32, align 4
|
|
%3 = alloca i32, align 4
|
|
%4 = alloca i32, align 4
|
|
%5 = alloca i32, align 4
|
|
%6 = alloca i32, align 4
|
|
store i32 1, i32* %1, align 4
|
|
store i32 2, i32* %2, align 4
|
|
store i32 3, i32* %3, align 4
|
|
store i32 4, i32* %4, align 4
|
|
store i32 5, i32* %5, align 4
|
|
store i32 6, i32* %6, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @turtle() section "TURTLE,turtle" {
|
|
; CHECK-LABEL: _turtle:
|
|
; ODR-LABEL: _turtle:
|
|
; CHECK-NOT: OUTLINED
|
|
%1 = alloca i32, align 4
|
|
%2 = alloca i32, align 4
|
|
%3 = alloca i32, align 4
|
|
%4 = alloca i32, align 4
|
|
%5 = alloca i32, align 4
|
|
%6 = alloca i32, align 4
|
|
store i32 1, i32* %1, align 4
|
|
store i32 2, i32* %2, align 4
|
|
store i32 3, i32* %3, align 4
|
|
store i32 4, i32* %4, align 4
|
|
store i32 5, i32* %5, align 4
|
|
store i32 6, i32* %6, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @cat() #0 {
|
|
; CHECK-LABEL: _cat:
|
|
; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]]
|
|
; ODR: [[OUTLINED]]
|
|
%1 = alloca i32, align 4
|
|
%2 = alloca i32, align 4
|
|
%3 = alloca i32, align 4
|
|
%4 = alloca i32, align 4
|
|
%5 = alloca i32, align 4
|
|
%6 = alloca i32, align 4
|
|
store i32 1, i32* %1, align 4
|
|
store i32 2, i32* %2, align 4
|
|
store i32 3, i32* %3, align 4
|
|
store i32 4, i32* %4, align 4
|
|
store i32 5, i32* %5, align 4
|
|
store i32 6, i32* %6, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @dog() #0 {
|
|
; CHECK-LABEL: _dog:
|
|
; CHECK: [[OUTLINED]]
|
|
; ODR: [[OUTLINED]]
|
|
%1 = alloca i32, align 4
|
|
%2 = alloca i32, align 4
|
|
%3 = alloca i32, align 4
|
|
%4 = alloca i32, align 4
|
|
%5 = alloca i32, align 4
|
|
%6 = alloca i32, align 4
|
|
store i32 1, i32* %1, align 4
|
|
store i32 2, i32* %2, align 4
|
|
store i32 3, i32* %3, align 4
|
|
store i32 4, i32* %4, align 4
|
|
store i32 5, i32* %5, align 4
|
|
store i32 6, i32* %6, align 4
|
|
ret void
|
|
}
|
|
|
|
; ODR: [[OUTLINED]]:
|
|
; CHECK: .p2align 2
|
|
; CHECK-NEXT: [[OUTLINED]]:
|
|
; CHECK: mov w9, #1
|
|
; CHECK-DAG: mov w8, #2
|
|
; CHECK-DAG: stp w8, w9, [sp, #24]
|
|
; CHECK-DAG: mov w9, #3
|
|
; CHECK-DAG: mov w8, #4
|
|
; CHECK-DAG: stp w8, w9, [sp, #16]
|
|
; CHECK-DAG: mov w9, #5
|
|
; CHECK-DAG: mov w8, #6
|
|
; CHECK-DAG: stp w8, w9, [sp, #8]
|
|
; CHECK-DAG: add sp, sp, #32
|
|
; CHECK-DAG: ret
|
|
|
|
attributes #0 = { noredzone "target-cpu"="cyclone" "target-features"="+sse" }
|