mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
[AArch64] Prefer DUP/MOV ("CPY") to INS for vector_extract.
This avoids a partial false dependency on the previous content of the upper lanes of the destination vector register. Differential Revision: http://reviews.llvm.org/D7307 llvm-svn: 227820
This commit is contained in:
parent
e7361de869
commit
53ed9373bc
@ -3716,29 +3716,21 @@ defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
|
||||
|
||||
|
||||
// Floating point vector extractions are codegen'd as either a sequence of
|
||||
// subregister extractions, possibly fed by an INS if the lane number is
|
||||
// anything other than zero.
|
||||
// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
|
||||
// the lane number is anything other than zero.
|
||||
def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
|
||||
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
|
||||
def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
|
||||
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
|
||||
def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
|
||||
(f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
|
||||
|
||||
def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
|
||||
(f64 (EXTRACT_SUBREG
|
||||
(INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
|
||||
V128:$Rn, VectorIndexD:$idx),
|
||||
dsub))>;
|
||||
(f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
|
||||
def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
|
||||
(f32 (EXTRACT_SUBREG
|
||||
(INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
|
||||
V128:$Rn, VectorIndexS:$idx),
|
||||
ssub))>;
|
||||
(f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
|
||||
def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
|
||||
(f16 (EXTRACT_SUBREG
|
||||
(INSvi16lane (v8f16 (IMPLICIT_DEF)), 0,
|
||||
V128:$Rn, VectorIndexH:$idx),
|
||||
hsub))>;
|
||||
(f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
|
||||
|
||||
// All concat_vectors operations are canonicalised to act on i64 vectors for
|
||||
// AArch64. In the general case we need an instruction, which had just as well be
|
||||
|
@ -6,7 +6,7 @@
|
||||
; rdar://11855286
|
||||
define double @foo0(<2 x i64> %a) nounwind {
|
||||
; CHECK: scvtf.2d [[REG:v[0-9]+]], v0, #9
|
||||
; CHECK-NEXT: ins.d v0[0], [[REG]][1]
|
||||
; CHECK-NEXT: mov d0, [[REG]][1]
|
||||
%vecext = extractelement <2 x i64> %a, i32 1
|
||||
%fcvt_n = tail call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9)
|
||||
ret double %fcvt_n
|
||||
|
@ -188,7 +188,7 @@ define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
|
||||
|
||||
define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
|
||||
; CHECK-LABEL: ins2f1:
|
||||
; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
|
||||
; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1]
|
||||
%tmp3 = extractelement <2 x double> %tmp1, i32 1
|
||||
%tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
|
||||
ret <1 x double> %tmp4
|
||||
|
@ -188,10 +188,10 @@ define <8 x half> @s_to_h(<8 x float> %a) {
|
||||
|
||||
define <8 x half> @d_to_h(<8 x double> %a) {
|
||||
; CHECK-LABEL: d_to_h:
|
||||
; CHECK-DAG: ins v{{[0-9]+}}.d
|
||||
; CHECK-DAG: ins v{{[0-9]+}}.d
|
||||
; CHECK-DAG: ins v{{[0-9]+}}.d
|
||||
; CHECK-DAG: ins v{{[0-9]+}}.d
|
||||
; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
|
||||
; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
|
||||
; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
|
||||
; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
|
||||
; CHECK-DAG: fcvt h
|
||||
; CHECK-DAG: fcvt h
|
||||
; CHECK-DAG: fcvt h
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
define float @test_dup_sv2S(<2 x float> %v) #0 {
|
||||
; CHECK-LABEL: test_dup_sv2S:
|
||||
; CHECK-NEXT: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
|
||||
; CHECK-NEXT: mov s{{[0-9]+}}, {{v[0-9]+}}.s[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = extractelement <2 x float> %v, i32 1
|
||||
ret float %tmp1
|
||||
@ -19,7 +19,7 @@ define float @test_dup_sv2S_0(<2 x float> %v) #0 {
|
||||
|
||||
define float @test_dup_sv4S(<4 x float> %v) #0 {
|
||||
; CHECK-LABEL: test_dup_sv4S:
|
||||
; CHECK-NEXT: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
|
||||
; CHECK-NEXT: mov s{{[0-9]+}}, {{v[0-9]+}}.s[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = extractelement <4 x float> %v, i32 1
|
||||
ret float %tmp1
|
||||
@ -45,7 +45,7 @@ define double @test_dup_dvD(<1 x double> %v) #0 {
|
||||
|
||||
define double @test_dup_dv2D(<2 x double> %v) #0 {
|
||||
; CHECK-LABEL: test_dup_dv2D:
|
||||
; CHECK-NEXT: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
|
||||
; CHECK-NEXT: mov d{{[0-9]+}}, {{v[0-9]+}}.d[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = extractelement <2 x double> %v, i32 1
|
||||
ret double %tmp1
|
||||
@ -62,7 +62,7 @@ define double @test_dup_dv2D_0(<2 x double> %v) #0 {
|
||||
|
||||
define half @test_dup_hv8H(<8 x half> %v) #0 {
|
||||
; CHECK-LABEL: test_dup_hv8H:
|
||||
; CHECK-NEXT: ins {{v[0-9]+}}.h[0], {{v[0-9]+}}.h[1]
|
||||
; CHECK-NEXT: mov h{{[0-9]+}}, {{v[0-9]+}}.h[1]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = extractelement <8 x half> %v, i32 1
|
||||
ret half %tmp1
|
||||
|
Loading…
Reference in New Issue
Block a user