2020-01-27 15:11:45 -08:00
|
|
|
; RUN: llc -verify-machineinstrs -enable-machine-outliner -aarch64-load-store-renaming=true -mtriple=aarch64-apple-darwin < %s | FileCheck %s
|
|
|
|
; RUN: llc -verify-machineinstrs -enable-machine-outliner -aarch64-load-store-renaming=true -mtriple=aarch64-apple-darwin -mcpu=cortex-a53 -enable-misched=false < %s | FileCheck %s
|
2018-04-20 18:03:21 +00:00
|
|
|
; RUN: llc -verify-machineinstrs -enable-machine-outliner -enable-linkonceodr-outlining -mtriple=aarch64-apple-darwin < %s | FileCheck %s -check-prefix=ODR
|
2018-10-29 20:27:07 +00:00
|
|
|
; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-apple-darwin -stop-after=machine-outliner < %s | FileCheck %s -check-prefix=TARGET_FEATURES
|
|
|
|
|
|
|
|
; Make sure that we inherit target features from functions and make sure we have
|
|
|
|
; the right function attributes.
|
|
|
|
; TARGET_FEATURES: define internal void @OUTLINED_FUNCTION_{{[0-9]+}}()
|
|
|
|
; TARGET_FEATURES-SAME: #[[ATTR_NUM:[0-9]+]]
|
|
|
|
; TARGET_FEATURES-DAG: attributes #[[ATTR_NUM]] = {
|
|
|
|
; TARGET_FEATURES-SAME: minsize
|
2020-07-01 15:28:44 +01:00
|
|
|
; TARGET_FEATURES-SAME: nounwind
|
2018-10-29 20:27:07 +00:00
|
|
|
; TARGET_FEATURES-SAME: optsize
|
|
|
|
; TARGET_FEATURES-SAME: "target-features"="+sse"
|
[MachineOutliner] Disable outlining from LinkOnceODRs by default
Say you have two identical linkonceodr functions, one in M1 and one in M2.
Say that the outliner outlines A,B,C from one function, and D,E,F from another
function (where letters are instructions). Now those functions are not
identical, and cannot be deduped. Locally to M1 and M2, these outlining
choices would be good-- to the whole program, however, this might not be true!
To mitigate this, this commit makes it so that the outliner sees linkonceodr
functions as unsafe to outline from. It also adds a flag,
-enable-linkonceodr-outlining, which allows the user to specify that they
want to outline from such functions when they know what they're doing.
Changing this handles most code size regressions in the test suite caused by
competing with linker dedupe. It also doesn't have a huge impact on the code
size improvements from the outliner. There are 6 tests that regress > 5% from
outlining WITH linkonceodrs to outlining WITHOUT linkonceodrs. Overall, most
tests either improve or are not impacted.
Not outlined vs outlined without linkonceodrs:
https://hastebin.com/raw/qeguxavuda
Not outlined vs outlined with linkonceodrs:
https://hastebin.com/raw/edepoqoqic
Outlined with linkonceodrs vs outlined without linkonceodrs:
https://hastebin.com/raw/awiqifiheb
Numbers generated using compare.py with -m size.__text. Tests run for AArch64
with -Oz -mllvm -enable-machine-outliner -mno-red-zone.
llvm-svn: 315136
2017-10-07 00:16:34 +00:00
|
|
|
|
|
|
|
define linkonce_odr void @fish() #0 {
|
|
|
|
; CHECK-LABEL: _fish:
|
2018-04-03 21:36:00 +00:00
|
|
|
; CHECK-NOT: OUTLINED
|
|
|
|
; ODR: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]]
|
[MachineOutliner] Disable outlining from LinkOnceODRs by default
Say you have two identical linkonceodr functions, one in M1 and one in M2.
Say that the outliner outlines A,B,C from one function, and D,E,F from another
function (where letters are instructions). Now those functions are not
identical, and cannot be deduped. Locally to M1 and M2, these outlining
choices would be good-- to the whole program, however, this might not be true!
To mitigate this, this commit makes it so that the outliner sees linkonceodr
functions as unsafe to outline from. It also adds a flag,
-enable-linkonceodr-outlining, which allows the user to specify that they
want to outline from such functions when they know what they're doing.
Changing this handles most code size regressions in the test suite caused by
competing with linker dedupe. It also doesn't have a huge impact on the code
size improvements from the outliner. There are 6 tests that regress > 5% from
outlining WITH linkonceodrs to outlining WITHOUT linkonceodrs. Overall, most
tests either improve or are not impacted.
Not outlined vs outlined without linkonceodrs:
https://hastebin.com/raw/qeguxavuda
Not outlined vs outlined with linkonceodrs:
https://hastebin.com/raw/edepoqoqic
Outlined with linkonceodrs vs outlined without linkonceodrs:
https://hastebin.com/raw/awiqifiheb
Numbers generated using compare.py with -m size.__text. Tests run for AArch64
with -Oz -mllvm -enable-machine-outliner -mno-red-zone.
llvm-svn: 315136
2017-10-07 00:16:34 +00:00
|
|
|
%1 = alloca i32, align 4
|
|
|
|
%2 = alloca i32, align 4
|
|
|
|
%3 = alloca i32, align 4
|
|
|
|
%4 = alloca i32, align 4
|
2018-05-16 21:20:16 +00:00
|
|
|
%5 = alloca i32, align 4
|
|
|
|
%6 = alloca i32, align 4
|
|
|
|
store i32 1, i32* %1, align 4
|
|
|
|
store i32 2, i32* %2, align 4
|
|
|
|
store i32 3, i32* %3, align 4
|
|
|
|
store i32 4, i32* %4, align 4
|
|
|
|
store i32 5, i32* %5, align 4
|
|
|
|
store i32 6, i32* %6, align 4
|
[MachineOutliner] Disable outlining from LinkOnceODRs by default
Say you have two identical linkonceodr functions, one in M1 and one in M2.
Say that the outliner outlines A,B,C from one function, and D,E,F from another
function (where letters are instructions). Now those functions are not
identical, and cannot be deduped. Locally to M1 and M2, these outlining
choices would be good-- to the whole program, however, this might not be true!
To mitigate this, this commit makes it so that the outliner sees linkonceodr
functions as unsafe to outline from. It also adds a flag,
-enable-linkonceodr-outlining, which allows the user to specify that they
want to outline from such functions when they know what they're doing.
Changing this handles most code size regressions in the test suite caused by
competing with linker dedupe. It also doesn't have a huge impact on the code
size improvements from the outliner. There are 6 tests that regress > 5% from
outlining WITH linkonceodrs to outlining WITHOUT linkonceodrs. Overall, most
tests either improve or are not impacted.
Not outlined vs outlined without linkonceodrs:
https://hastebin.com/raw/qeguxavuda
Not outlined vs outlined with linkonceodrs:
https://hastebin.com/raw/edepoqoqic
Outlined with linkonceodrs vs outlined without linkonceodrs:
https://hastebin.com/raw/awiqifiheb
Numbers generated using compare.py with -m size.__text. Tests run for AArch64
with -Oz -mllvm -enable-machine-outliner -mno-red-zone.
llvm-svn: 315136
2017-10-07 00:16:34 +00:00
|
|
|
ret void
|
|
|
|
}
|
2017-03-17 22:26:55 +00:00
|
|
|
|
2018-04-27 00:21:34 +00:00
|
|
|
define void @turtle() section "TURTLE,turtle" {
|
|
|
|
; CHECK-LABEL: _turtle:
|
|
|
|
; ODR-LABEL: _turtle:
|
|
|
|
; CHECK-NOT: OUTLINED
|
|
|
|
%1 = alloca i32, align 4
|
|
|
|
%2 = alloca i32, align 4
|
|
|
|
%3 = alloca i32, align 4
|
|
|
|
%4 = alloca i32, align 4
|
2018-05-16 21:20:16 +00:00
|
|
|
%5 = alloca i32, align 4
|
|
|
|
%6 = alloca i32, align 4
|
|
|
|
store i32 1, i32* %1, align 4
|
|
|
|
store i32 2, i32* %2, align 4
|
|
|
|
store i32 3, i32* %3, align 4
|
|
|
|
store i32 4, i32* %4, align 4
|
|
|
|
store i32 5, i32* %5, align 4
|
|
|
|
store i32 6, i32* %6, align 4
|
2018-04-27 00:21:34 +00:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-03-17 22:26:55 +00:00
|
|
|
define void @cat() #0 {
|
[MachineOutliner] Disable outlining from LinkOnceODRs by default
Say you have two identical linkonceodr functions, one in M1 and one in M2.
Say that the outliner outlines A,B,C from one function, and D,E,F from another
function (where letters are instructions). Now those functions are not
identical, and cannot be deduped. Locally to M1 and M2, these outlining
choices would be good-- to the whole program, however, this might not be true!
To mitigate this, this commit makes it so that the outliner sees linkonceodr
functions as unsafe to outline from. It also adds a flag,
-enable-linkonceodr-outlining, which allows the user to specify that they
want to outline from such functions when they know what they're doing.
Changing this handles most code size regressions in the test suite caused by
competing with linker dedupe. It also doesn't have a huge impact on the code
size improvements from the outliner. There are 6 tests that regress > 5% from
outlining WITH linkonceodrs to outlining WITHOUT linkonceodrs. Overall, most
tests either improve or are not impacted.
Not outlined vs outlined without linkonceodrs:
https://hastebin.com/raw/qeguxavuda
Not outlined vs outlined with linkonceodrs:
https://hastebin.com/raw/edepoqoqic
Outlined with linkonceodrs vs outlined without linkonceodrs:
https://hastebin.com/raw/awiqifiheb
Numbers generated using compare.py with -m size.__text. Tests run for AArch64
with -Oz -mllvm -enable-machine-outliner -mno-red-zone.
llvm-svn: 315136
2017-10-07 00:16:34 +00:00
|
|
|
; CHECK-LABEL: _cat:
|
2018-04-03 21:36:00 +00:00
|
|
|
; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]]
|
|
|
|
; ODR: [[OUTLINED]]
|
2017-03-17 22:26:55 +00:00
|
|
|
%1 = alloca i32, align 4
|
|
|
|
%2 = alloca i32, align 4
|
|
|
|
%3 = alloca i32, align 4
|
|
|
|
%4 = alloca i32, align 4
|
2018-05-16 21:20:16 +00:00
|
|
|
%5 = alloca i32, align 4
|
|
|
|
%6 = alloca i32, align 4
|
|
|
|
store i32 1, i32* %1, align 4
|
|
|
|
store i32 2, i32* %2, align 4
|
|
|
|
store i32 3, i32* %3, align 4
|
|
|
|
store i32 4, i32* %4, align 4
|
|
|
|
store i32 5, i32* %5, align 4
|
|
|
|
store i32 6, i32* %6, align 4
|
2017-03-17 22:26:55 +00:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @dog() #0 {
|
[MachineOutliner] Disable outlining from LinkOnceODRs by default
Say you have two identical linkonceodr functions, one in M1 and one in M2.
Say that the outliner outlines A,B,C from one function, and D,E,F from another
function (where letters are instructions). Now those functions are not
identical, and cannot be deduped. Locally to M1 and M2, these outlining
choices would be good-- to the whole program, however, this might not be true!
To mitigate this, this commit makes it so that the outliner sees linkonceodr
functions as unsafe to outline from. It also adds a flag,
-enable-linkonceodr-outlining, which allows the user to specify that they
want to outline from such functions when they know what they're doing.
Changing this handles most code size regressions in the test suite caused by
competing with linker dedupe. It also doesn't have a huge impact on the code
size improvements from the outliner. There are 6 tests that regress > 5% from
outlining WITH linkonceodrs to outlining WITHOUT linkonceodrs. Overall, most
tests either improve or are not impacted.
Not outlined vs outlined without linkonceodrs:
https://hastebin.com/raw/qeguxavuda
Not outlined vs outlined with linkonceodrs:
https://hastebin.com/raw/edepoqoqic
Outlined with linkonceodrs vs outlined without linkonceodrs:
https://hastebin.com/raw/awiqifiheb
Numbers generated using compare.py with -m size.__text. Tests run for AArch64
with -Oz -mllvm -enable-machine-outliner -mno-red-zone.
llvm-svn: 315136
2017-10-07 00:16:34 +00:00
|
|
|
; CHECK-LABEL: _dog:
|
2018-04-03 21:36:00 +00:00
|
|
|
; CHECK: [[OUTLINED]]
|
|
|
|
; ODR: [[OUTLINED]]
|
2017-03-17 22:26:55 +00:00
|
|
|
%1 = alloca i32, align 4
|
|
|
|
%2 = alloca i32, align 4
|
|
|
|
%3 = alloca i32, align 4
|
|
|
|
%4 = alloca i32, align 4
|
2018-05-16 21:20:16 +00:00
|
|
|
%5 = alloca i32, align 4
|
|
|
|
%6 = alloca i32, align 4
|
|
|
|
store i32 1, i32* %1, align 4
|
|
|
|
store i32 2, i32* %2, align 4
|
|
|
|
store i32 3, i32* %3, align 4
|
|
|
|
store i32 4, i32* %4, align 4
|
|
|
|
store i32 5, i32* %5, align 4
|
|
|
|
store i32 6, i32* %6, align 4
|
2017-03-17 22:26:55 +00:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-04-03 21:36:00 +00:00
|
|
|
; ODR: [[OUTLINED]]:
|
2018-05-15 23:36:46 +00:00
|
|
|
; CHECK: .p2align 2
|
|
|
|
; CHECK-NEXT: [[OUTLINED]]:
|
[AArch64] Teach Load/Store optimizier to rename store operands for pairing.
In some cases, we can rename a store operand, in order to enable pairing
of stores. For store pairs, that cannot be merged because the first
tored register is defined in between the second store, we try to find
suitable rename register.
First, we check if we can rename the given register:
1. The first store register must be killed at the store, which means we
do not have to rename instructions after the first store.
2. We scan backwards from the first store, to find the definition of the
stored register and check all uses in between are renamable. Along
they way, we collect the minimal register classes of the uses for
overlapping (sub/super)registers.
Second, we try to find an available register from the minimal physical
register class of the original register. A suitable register must not be
1. defined before FirstMI
2. between the previous definition of the register to rename
3. a callee saved register.
We use KILL flags to clear defined registers while scanning from the
beginning to the end of the block.
This triggers quite often, here are the top changes for MultiSource,
SPEC2000, SPEC2006 compiled with -O3 for iOS:
Metric: aarch64-ldst-opt.NumPairCreated
Program base patch diff
test-suite...nch/fourinarow/fourinarow.test 2.00 39.00 1850.0%
test-suite...s/ASC_Sequoia/IRSmk/IRSmk.test 46.00 80.00 73.9%
test-suite...chmarks/Olden/power/power.test 70.00 96.00 37.1%
test-suite...cations/hexxagon/hexxagon.test 29.00 39.00 34.5%
test-suite...nchmarks/McCat/05-eks/eks.test 100.00 132.00 32.0%
test-suite.../Trimaran/enc-rc4/enc-rc4.test 46.00 59.00 28.3%
test-suite...T2006/473.astar/473.astar.test 160.00 200.00 25.0%
test-suite.../Trimaran/enc-md5/enc-md5.test 8.00 10.00 25.0%
test-suite...telecomm-gsm/telecomm-gsm.test 113.00 139.00 23.0%
test-suite...ediabench/gsm/toast/toast.test 113.00 139.00 23.0%
test-suite...Source/Benchmarks/sim/sim.test 91.00 111.00 22.0%
test-suite...C/CFP2000/179.art/179.art.test 41.00 49.00 19.5%
test-suite...peg2/mpeg2dec/mpeg2decode.test 245.00 279.00 13.9%
test-suite...marks/Olden/health/health.test 16.00 18.00 12.5%
test-suite...ks/Prolangs-C/cdecl/cdecl.test 90.00 101.00 12.2%
test-suite...fice-ispell/office-ispell.test 91.00 100.00 9.9%
test-suite...oxyApps-C/miniGMG/miniGMG.test 430.00 465.00 8.1%
test-suite...lowfish/security-blowfish.test 39.00 42.00 7.7%
test-suite.../Applications/spiff/spiff.test 42.00 45.00 7.1%
test-suite...arks/mafft/pairlocalalign.test 2473.00 2646.00 7.0%
test-suite.../VersaBench/ecbdes/ecbdes.test 29.00 31.00 6.9%
test-suite...nch/beamformer/beamformer.test 220.00 235.00 6.8%
test-suite...CFP2000/177.mesa/177.mesa.test 2110.00 2252.00 6.7%
test-suite...ve-susan/automotive-susan.test 109.00 116.00 6.4%
test-suite...s-C/unix-smail/unix-smail.test 65.00 69.00 6.2%
test-suite...CI_Purple/SMG2000/smg2000.test 1194.00 1265.00 5.9%
test-suite.../Benchmarks/nbench/nbench.test 472.00 500.00 5.9%
test-suite...oxyApps-C/miniAMR/miniAMR.test 248.00 262.00 5.6%
test-suite...quoia/CrystalMk/CrystalMk.test 18.00 19.00 5.6%
test-suite...rks/tramp3d-v4/tramp3d-v4.test 7331.00 7710.00 5.2%
test-suite.../Benchmarks/Bullet/bullet.test 5651.00 5938.00 5.1%
test-suite...ternal/HMMER/hmmcalibrate.test 750.00 788.00 5.1%
test-suite...T2006/456.hmmer/456.hmmer.test 764.00 802.00 5.0%
test-suite...ications/JM/ldecod/ldecod.test 1028.00 1079.00 5.0%
test-suite...CFP2006/444.namd/444.namd.test 1368.00 1434.00 4.8%
test-suite...marks/7zip/7zip-benchmark.test 4471.00 4685.00 4.8%
test-suite...6/464.h264ref/464.h264ref.test 3122.00 3271.00 4.8%
test-suite...pplications/oggenc/oggenc.test 1497.00 1565.00 4.5%
test-suite...T2000/300.twolf/300.twolf.test 742.00 774.00 4.3%
test-suite.../Prolangs-C/loader/loader.test 24.00 25.00 4.2%
test-suite...0.perlbench/400.perlbench.test 1983.00 2058.00 3.8%
test-suite...ications/JM/lencod/lencod.test 4612.00 4785.00 3.8%
test-suite...yApps-C++/PENNANT/PENNANT.test 995.00 1032.00 3.7%
test-suite...arks/VersaBench/dbms/dbms.test 54.00 56.00 3.7%
Reviewers: efriedma, thegameg, samparker, dmgreen, paquette, evandro
Reviewed By: paquette
Differential Revision: https://reviews.llvm.org/D70450
2019-12-11 09:59:18 +00:00
|
|
|
; CHECK: mov w9, #1
|
|
|
|
; CHECK-DAG: mov w8, #2
|
|
|
|
; CHECK-DAG: stp w8, w9, [sp, #24]
|
|
|
|
; CHECK-DAG: mov w9, #3
|
|
|
|
; CHECK-DAG: mov w8, #4
|
|
|
|
; CHECK-DAG: stp w8, w9, [sp, #16]
|
|
|
|
; CHECK-DAG: mov w9, #5
|
|
|
|
; CHECK-DAG: mov w8, #6
|
|
|
|
; CHECK-DAG: stp w8, w9, [sp, #8]
|
|
|
|
; CHECK-DAG: add sp, sp, #32
|
|
|
|
; CHECK-DAG: ret
|
2017-03-17 22:26:55 +00:00
|
|
|
|
2020-07-01 15:28:44 +01:00
|
|
|
attributes #0 = { nounwind noredzone "target-cpu"="cyclone" "target-features"="+sse" }
|