mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
[AArch64] Improve codegen of store lane 0 instructions by directly storing the subregister.
For 0-lane stores, we used to generate code similar to: fmov w8, s0 str w8, [x0, x1, lsl #2] instead of: str s0, [x0, x1, lsl #2] To correct that: for store lane 0 patterns, directly match to STR <subreg>0. Byte-sized instructions don't have the special case for a 0 index, because FPR8s are defined to have untyped content. rdar://16372710 Differential Revision: http://reviews.llvm.org/D6772 llvm-svn: 225181
This commit is contained in:
parent
535beaf90d
commit
3f0fc5a029
@ -1889,6 +1889,33 @@ let Predicates = [IsLE] in {
|
||||
}
|
||||
} // AddedComplexity = 10
|
||||
|
||||
// Match stores from lane 0 to the appropriate subreg's store.
|
||||
multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
|
||||
ValueType VecTy, ValueType STy,
|
||||
SubRegIndex SubRegIdx,
|
||||
Instruction STRW, Instruction STRX> {
|
||||
|
||||
def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
|
||||
(ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
|
||||
(STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
|
||||
GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
|
||||
|
||||
def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
|
||||
(ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
|
||||
(STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
|
||||
GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 19 in {
|
||||
defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
|
||||
defm : VecROStoreLane0Pat<ro16, store , v8i16, i16, hsub, STRHroW, STRHroX>;
|
||||
defm : VecROStoreLane0Pat<ro32, truncstorei32, v4i32, i32, ssub, STRSroW, STRSroX>;
|
||||
defm : VecROStoreLane0Pat<ro32, store , v4i32, i32, ssub, STRSroW, STRSroX>;
|
||||
defm : VecROStoreLane0Pat<ro32, store , v4f32, f32, ssub, STRSroW, STRSroX>;
|
||||
defm : VecROStoreLane0Pat<ro64, store , v2i64, i64, dsub, STRDroW, STRDroX>;
|
||||
defm : VecROStoreLane0Pat<ro64, store , v2f64, f64, dsub, STRDroW, STRDroX>;
|
||||
}
|
||||
|
||||
//---
|
||||
// (unsigned immediate)
|
||||
defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str",
|
||||
|
@ -8,6 +8,16 @@ define void @st1lane_16b(<16 x i8> %A, i8* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_16b
|
||||
; CHECK: umov.b w[[WREG:[0-9]+]], v0[0]
|
||||
; CHECK: strb w[[WREG]], [x0, x1]
|
||||
%ptr = getelementptr i8* %D, i64 %offset
|
||||
%tmp = extractelement <16 x i8> %A, i32 0
|
||||
store i8 %tmp, i8* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_8h(<8 x i16> %A, i16* %D) {
|
||||
; CHECK-LABEL: st1lane_8h
|
||||
; CHECK: st1.h
|
||||
@ -16,6 +26,15 @@ define void @st1lane_8h(<8 x i16> %A, i16* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_8h
|
||||
; CHECK: str h0, [x0, x1, lsl #1]
|
||||
%ptr = getelementptr i16* %D, i64 %offset
|
||||
%tmp = extractelement <8 x i16> %A, i32 0
|
||||
store i16 %tmp, i16* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_4s(<4 x i32> %A, i32* %D) {
|
||||
; CHECK-LABEL: st1lane_4s
|
||||
; CHECK: st1.s
|
||||
@ -24,6 +43,15 @@ define void @st1lane_4s(<4 x i32> %A, i32* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_4s
|
||||
; CHECK: str s0, [x0, x1, lsl #2]
|
||||
%ptr = getelementptr i32* %D, i64 %offset
|
||||
%tmp = extractelement <4 x i32> %A, i32 0
|
||||
store i32 %tmp, i32* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_4s_float(<4 x float> %A, float* %D) {
|
||||
; CHECK-LABEL: st1lane_4s_float
|
||||
; CHECK: st1.s
|
||||
@ -32,6 +60,15 @@ define void @st1lane_4s_float(<4 x float> %A, float* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_4s_float
|
||||
; CHECK: str s0, [x0, x1, lsl #2]
|
||||
%ptr = getelementptr float* %D, i64 %offset
|
||||
%tmp = extractelement <4 x float> %A, i32 0
|
||||
store float %tmp, float* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_2d(<2 x i64> %A, i64* %D) {
|
||||
; CHECK-LABEL: st1lane_2d
|
||||
; CHECK: st1.d
|
||||
@ -40,6 +77,15 @@ define void @st1lane_2d(<2 x i64> %A, i64* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_2d
|
||||
; CHECK: str d0, [x0, x1, lsl #3]
|
||||
%ptr = getelementptr i64* %D, i64 %offset
|
||||
%tmp = extractelement <2 x i64> %A, i32 0
|
||||
store i64 %tmp, i64* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_2d_double(<2 x double> %A, double* %D) {
|
||||
; CHECK-LABEL: st1lane_2d_double
|
||||
; CHECK: st1.d
|
||||
@ -48,6 +94,15 @@ define void @st1lane_2d_double(<2 x double> %A, double* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_2d_double
|
||||
; CHECK: str d0, [x0, x1, lsl #3]
|
||||
%ptr = getelementptr double* %D, i64 %offset
|
||||
%tmp = extractelement <2 x double> %A, i32 0
|
||||
store double %tmp, double* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_8b(<8 x i8> %A, i8* %D) {
|
||||
; CHECK-LABEL: st1lane_8b
|
||||
; CHECK: st1.b
|
||||
@ -56,6 +111,16 @@ define void @st1lane_8b(<8 x i8> %A, i8* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_8b
|
||||
; CHECK: umov.b w[[WREG:[0-9]+]], v0[0]
|
||||
; CHECK: strb w[[WREG]], [x0, x1]
|
||||
%ptr = getelementptr i8* %D, i64 %offset
|
||||
%tmp = extractelement <8 x i8> %A, i32 0
|
||||
store i8 %tmp, i8* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_4h(<4 x i16> %A, i16* %D) {
|
||||
; CHECK-LABEL: st1lane_4h
|
||||
; CHECK: st1.h
|
||||
@ -64,6 +129,15 @@ define void @st1lane_4h(<4 x i16> %A, i16* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_4h
|
||||
; CHECK: str h0, [x0, x1, lsl #1]
|
||||
%ptr = getelementptr i16* %D, i64 %offset
|
||||
%tmp = extractelement <4 x i16> %A, i32 0
|
||||
store i16 %tmp, i16* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_2s(<2 x i32> %A, i32* %D) {
|
||||
; CHECK-LABEL: st1lane_2s
|
||||
; CHECK: st1.s
|
||||
@ -72,6 +146,15 @@ define void @st1lane_2s(<2 x i32> %A, i32* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_2s
|
||||
; CHECK: str s0, [x0, x1, lsl #2]
|
||||
%ptr = getelementptr i32* %D, i64 %offset
|
||||
%tmp = extractelement <2 x i32> %A, i32 0
|
||||
store i32 %tmp, i32* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane_2s_float(<2 x float> %A, float* %D) {
|
||||
; CHECK-LABEL: st1lane_2s_float
|
||||
; CHECK: st1.s
|
||||
@ -80,6 +163,15 @@ define void @st1lane_2s_float(<2 x float> %A, float* %D) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
|
||||
; CHECK-LABEL: st1lane0_ro_2s_float
|
||||
; CHECK: str s0, [x0, x1, lsl #2]
|
||||
%ptr = getelementptr float* %D, i64 %offset
|
||||
%tmp = extractelement <2 x float> %A, i32 0
|
||||
store float %tmp, float* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
|
||||
; CHECK-LABEL: st2lane_16b
|
||||
; CHECK: st2.b
|
||||
|
Loading…
Reference in New Issue
Block a user