mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
[AArch64] Add patterns matching (fabs (fsub x y)) to (fabd x y)
Differential Revision: https://reviews.llvm.org/D44573 llvm-svn: 329163
This commit is contained in:
parent
09b5caa8ce
commit
b6e03097ce
@ -3087,6 +3087,14 @@ defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
|
||||
defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
|
||||
defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
|
||||
defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
|
||||
let Predicates = [HasNEON] in {
|
||||
foreach VT = [ v2f32, v4f32, v2f64 ] in
|
||||
def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
|
||||
}
|
||||
let Predicates = [HasNEON, HasFullFP16] in {
|
||||
foreach VT = [ v4f16, v8f16 ] in
|
||||
def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
|
||||
}
|
||||
defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
|
||||
defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
|
||||
defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
|
||||
@ -3396,6 +3404,11 @@ defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
|
||||
defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
|
||||
def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
|
||||
(FABD64 FPR64:$Rn, FPR64:$Rm)>;
|
||||
let Predicates = [HasFullFP16] in {
|
||||
def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
|
||||
}
|
||||
def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
|
||||
def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
|
||||
defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
|
||||
int_aarch64_neon_facge>;
|
||||
defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
|
||||
|
@ -219,6 +219,40 @@ declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) noun
|
||||
declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <2 x float> @fabd_2s_from_fsub_fabs(<2 x float>* %A, <2 x float>* %B) nounwind {
|
||||
;CHECK-LABEL: fabd_2s_from_fsub_fabs:
|
||||
;CHECK: fabd.2s
|
||||
%tmp1 = load <2 x float>, <2 x float>* %A
|
||||
%tmp2 = load <2 x float>, <2 x float>* %B
|
||||
%sub = fsub <2 x float> %tmp1, %tmp2
|
||||
%abs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %sub)
|
||||
ret <2 x float> %abs
|
||||
}
|
||||
|
||||
define <4 x float> @fabd_4s_from_fsub_fabs(<4 x float>* %A, <4 x float>* %B) nounwind {
|
||||
;CHECK-LABEL: fabd_4s_from_fsub_fabs:
|
||||
;CHECK: fabd.4s
|
||||
%tmp1 = load <4 x float>, <4 x float>* %A
|
||||
%tmp2 = load <4 x float>, <4 x float>* %B
|
||||
%sub = fsub <4 x float> %tmp1, %tmp2
|
||||
%abs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %sub)
|
||||
ret <4 x float> %abs
|
||||
}
|
||||
|
||||
define <2 x double> @fabd_2d_from_fsub_fabs(<2 x double>* %A, <2 x double>* %B) nounwind {
|
||||
;CHECK-LABEL: fabd_2d_from_fsub_fabs:
|
||||
;CHECK: fabd.2d
|
||||
%tmp1 = load <2 x double>, <2 x double>* %A
|
||||
%tmp2 = load <2 x double>, <2 x double>* %B
|
||||
%sub = fsub <2 x double> %tmp1, %tmp2
|
||||
%abs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %sub)
|
||||
ret <2 x double> %abs
|
||||
}
|
||||
|
||||
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
|
||||
declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone
|
||||
|
||||
define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
;CHECK-LABEL: sabd_8b:
|
||||
;CHECK: sabd.8b
|
||||
@ -829,6 +863,25 @@ define double @fabdd(double %a, double %b) nounwind {
|
||||
declare double @llvm.aarch64.sisd.fabd.f64(double, double) nounwind readnone
|
||||
declare float @llvm.aarch64.sisd.fabd.f32(float, float) nounwind readnone
|
||||
|
||||
define float @fabds_from_fsub_fabs(float %a, float %b) nounwind {
|
||||
; CHECK-LABEL: fabds_from_fsub_fabs:
|
||||
; CHECK: fabd s0, s0, s1
|
||||
%sub = fsub float %a, %b
|
||||
%abs = tail call float @llvm.fabs.f32(float %sub)
|
||||
ret float %abs
|
||||
}
|
||||
|
||||
define double @fabdd_from_fsub_fabs(double %a, double %b) nounwind {
|
||||
; CHECK-LABEL: fabdd_from_fsub_fabs:
|
||||
; CHECK: fabd d0, d0, d1
|
||||
%sub = fsub double %a, %b
|
||||
%abs = tail call double @llvm.fabs.f64(double %sub)
|
||||
ret double %abs
|
||||
}
|
||||
|
||||
declare float @llvm.fabs.f32(float) nounwind readnone
|
||||
declare double @llvm.fabs.f64(double) nounwind readnone
|
||||
|
||||
define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: uabdl_from_extract_dup:
|
||||
; CHECK-NOT: ext.16b
|
||||
|
@ -6,6 +6,7 @@ declare half @llvm.aarch64.neon.fmin.f16(half, half)
|
||||
declare half @llvm.aarch64.neon.frsqrts.f16(half, half)
|
||||
declare half @llvm.aarch64.neon.frecps.f16(half, half)
|
||||
declare half @llvm.aarch64.neon.fmulx.f16(half, half)
|
||||
declare half @llvm.fabs.f16(half)
|
||||
|
||||
define dso_local half @t_vabdh_f16(half %a, half %b) {
|
||||
; CHECK-LABEL: t_vabdh_f16:
|
||||
@ -16,6 +17,16 @@ entry:
|
||||
ret half %vabdh_f16
|
||||
}
|
||||
|
||||
define dso_local half @t_vabdh_f16_from_fsub_fabs(half %a, half %b) {
|
||||
; CHECK-LABEL: t_vabdh_f16_from_fsub_fabs:
|
||||
; CHECK: fabd h0, h0, h1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%sub = fsub half %a, %b
|
||||
%abs = tail call half @llvm.fabs.f16(half %sub)
|
||||
ret half %abs
|
||||
}
|
||||
|
||||
define dso_local i16 @t_vceqh_f16(half %a, half %b) {
|
||||
; CHECK-LABEL: t_vceqh_f16:
|
||||
; CHECK: fcmp h0, h1
|
||||
|
@ -6,6 +6,10 @@ declare <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half>, <4 x half>)
|
||||
declare <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half>, <8 x half>)
|
||||
declare <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half>, <4 x half>)
|
||||
declare <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half>, <8 x half>)
|
||||
declare <4 x half> @llvm.aarch64.neon.fabd.v4f16(<4 x half>, <4 x half>)
|
||||
declare <8 x half> @llvm.aarch64.neon.fabd.v8f16(<8 x half>, <8 x half>)
|
||||
declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
|
||||
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
|
||||
|
||||
define dso_local <4 x half> @t_vdiv_f16(<4 x half> %a, <4 x half> %b) {
|
||||
; CHECK-LABEL: t_vdiv_f16:
|
||||
@ -78,3 +82,41 @@ entry:
|
||||
%vpmaxnm2.i = tail call <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half> %a, <8 x half> %b)
|
||||
ret <8 x half> %vpmaxnm2.i
|
||||
}
|
||||
|
||||
define dso_local <4 x half> @t_vabd_f16(<4 x half> %a, <4 x half> %b) {
|
||||
; CHECK-LABEL: t_vabd_f16:
|
||||
; CHECK: fabd v0.4h, v0.4h, v1.4h
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vabdh_f16 = tail call <4 x half> @llvm.aarch64.neon.fabd.v4f16(<4 x half> %a, <4 x half> %b)
|
||||
ret <4 x half> %vabdh_f16
|
||||
}
|
||||
|
||||
define dso_local <8 x half> @t_vabdq_f16(<8 x half> %a, <8 x half> %b) {
|
||||
; CHECK-LABEL: t_vabdq_f16:
|
||||
; CHECK: fabd v0.8h, v0.8h, v1.8h
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vabdh_f16 = tail call <8 x half> @llvm.aarch64.neon.fabd.v8f16(<8 x half> %a, <8 x half> %b)
|
||||
ret <8 x half> %vabdh_f16
|
||||
}
|
||||
|
||||
define dso_local <4 x half> @t_vabd_f16_from_fsub_fabs(<4 x half> %a, <4 x half> %b) {
|
||||
; CHECK-LABEL: t_vabd_f16_from_fsub_fabs:
|
||||
; CHECK: fabd v0.4h, v0.4h, v1.4h
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%sub = fsub <4 x half> %a, %b
|
||||
%abs = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %sub)
|
||||
ret <4 x half> %abs
|
||||
}
|
||||
|
||||
define dso_local <8 x half> @t_vabdq_f16_from_fsub_fabs(<8 x half> %a, <8 x half> %b) {
|
||||
; CHECK-LABEL: t_vabdq_f16_from_fsub_fabs:
|
||||
; CHECK: fabd v0.8h, v0.8h, v1.8h
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%sub = fsub <8 x half> %a, %b
|
||||
%abs = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %sub)
|
||||
ret <8 x half> %abs
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user