mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AArch64: Cyclone: Remove SlowMisaligned128Store tuning flag
Remove FeatureSlowMisaligned128Store from cyclone flags. This flag causes splitting of 16 byte wide stores into 2 stored of 8 bytes. This was useful on older apple CPUs which were slow for 16byte stores that were not aligned on 16byte. As the compiler often cannot predict the actual alignment, the splitting was choosen. This has been a topic for a lot of debate as the splitting also decreases performance for some benchmarks. Measuring the effects on newer apple chips (rdar://35525421) shows that it harms more cases than it helps. So it is time to retire this workaround. llvm-svn: 323289
This commit is contained in:
parent
c6aca8a06d
commit
81d794ab6d
@ -294,7 +294,6 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
|
||||
FeatureFuseAES,
|
||||
FeatureNEON,
|
||||
FeaturePerfMon,
|
||||
FeatureSlowMisaligned128Store,
|
||||
FeatureZCRegMove,
|
||||
FeatureZCZeroing,
|
||||
FeatureZCZeroingFPWorkaround
|
||||
|
@ -105,8 +105,8 @@ define void @splat_v4i32(i32 %v, i32 *%p) {
|
||||
entry:
|
||||
|
||||
; CHECK-LABEL: splat_v4i32
|
||||
; CHECK-DAG: stp w0, w0, [x1]
|
||||
; CHECK-DAG: stp w0, w0, [x1, #8]
|
||||
; CHECK-DAG: dup v0.4s, w0
|
||||
; CHECK-DAG: str q0, [x1]
|
||||
; CHECK: ret
|
||||
|
||||
%p17 = insertelement <4 x i32> undef, i32 %v, i32 0
|
||||
@ -129,8 +129,7 @@ entry:
|
||||
; CHECK-DAG: mov v[[REG1]].s[1], w0
|
||||
; CHECK-DAG: mov v[[REG1]].s[2], w0
|
||||
; CHECK-DAG: mov v[[REG1]].s[3], w0
|
||||
; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
|
||||
; CHECK: stp d[[REG1]], d[[REG2]], [x1]
|
||||
; CHECK: str q[[REG1]], [x1]
|
||||
; CHECK: ret
|
||||
|
||||
%p17 = insertelement <4 x i32> undef, i32 %v, i32 %v
|
||||
@ -151,8 +150,7 @@ entry:
|
||||
; CHECK: mov v[[REG1]].s[1], w0
|
||||
; CHECK-DAG: mov v[[REG1]].s[2], w0
|
||||
; CHECK-DAG: mov v[[REG1]].s[3], w0
|
||||
; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8
|
||||
; CHECK: stp d[[REG1]], d[[REG2]], [x1]
|
||||
; CHECK: str q[[REG1]], [x1]
|
||||
; CHECK: ret
|
||||
|
||||
%p18 = insertelement <4 x i32> %vin, i32 %v, i32 1
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone -mattr=+slow-misaligned-128store | FileCheck %s
|
||||
%struct.X = type <{ i32, i64, i64 }>
|
||||
|
||||
define void @foo1(i32* %p, i64 %val) nounwind {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone | FileCheck %s --check-prefix=CYCLONE --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone -mattr=+slow-misaligned-128store | FileCheck %s --check-prefix=SPLITTING --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=aarch64-eabi -mattr=-slow-misaligned-128store | FileCheck %s --check-prefix=MISALIGNED --check-prefix=CHECK
|
||||
|
||||
@g0 = external global <3 x float>, align 16
|
||||
@ -44,9 +44,9 @@ define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) {
|
||||
; FIXME: Ideally we would like to use a generic target for this test, but this relies
|
||||
; on suppressing store pairs.
|
||||
|
||||
; CYCLONE-LABEL: merge_vec_extract_stores
|
||||
; CYCLONE: ext v1.16b, v0.16b, v0.16b, #8
|
||||
; CYCLONE-NEXT: str d0, [x0, #24]
|
||||
; CYCLONE-NEXT: str d1, [x0, #32]
|
||||
; CYCLONE-NEXT: ret
|
||||
; SPLITTING-LABEL: merge_vec_extract_stores
|
||||
; SPLITTING: ext v1.16b, v0.16b, v0.16b, #8
|
||||
; SPLITTING-NEXT: str d0, [x0, #24]
|
||||
; SPLITTING-NEXT: str d1, [x0, #32]
|
||||
; SPLITTING-NEXT: ret
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -o - | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa,+slow-misaligned-128store -enable-misched -verify-misched -o - | FileCheck %s
|
||||
|
||||
; Tests to check that the scheduler dependencies derived from alias analysis are
|
||||
; correct when we have loads that have been split up so that they can later be
|
||||
|
Loading…
x
Reference in New Issue
Block a user