1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[ARM][AArch64] Complex addition Neon intrinsics for Armv8.3-A

Summary:
Add support for vcadd_* family of intrinsics. This set of intrinsics is
available in Armv8.3-A.

The fp16 versions require the FP16 extension, which has been available
(opt-in) since Armv8.2-A.

Reviewers: t.p.northover

Reviewed By: t.p.northover

Subscribers: t.p.northover, kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D70862
This commit is contained in:
Victor Campos 2019-12-02 12:13:04 +00:00
parent 0c097b7a4f
commit 770dba83e1
6 changed files with 173 additions and 0 deletions

View File

@ -446,6 +446,10 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
def int_aarch64_neon_fmlsl : AdvSIMD_FP16FML_Intrinsic;
def int_aarch64_neon_fmlal2 : AdvSIMD_FP16FML_Intrinsic;
def int_aarch64_neon_fmlsl2 : AdvSIMD_FP16FML_Intrinsic;
// v8.3-A Floating-point complex add
def int_aarch64_neon_vcadd_rot90 : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_neon_vcadd_rot270 : AdvSIMD_2VectorArg_Intrinsic;
}
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".

View File

@ -778,6 +778,10 @@ def int_arm_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_arm_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_arm_vctp64 : Intrinsic<[llvm_v2i1_ty], [llvm_i32_ty], [IntrNoMem]>;
// v8.3-A Floating-point complex add
def int_arm_neon_vcadd_rot90 : Neon_2Arg_Intrinsic;
def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic;
// GNU eabi mcount
def int_arm_gnu_eabi_mcount : Intrinsic<[],
[],

View File

@ -757,6 +757,29 @@ defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla",
null_frag>;
let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
(FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
(FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
(FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
(FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
}
let Predicates = [HasComplxNum, HasNEON] in {
def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
(FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
(FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
foreach Ty = [v4f32, v2f64] in {
def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
(!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
(!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
}
}
// v8.3a Pointer Authentication
// These instructions inhabit part of the hint space and so can be used for
// armv8 targets

View File

@ -5012,6 +5012,27 @@ defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
(VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>;
def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
(VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>;
def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
(VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>;
def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
(VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>;
}
let Predicates = [HasNEON,HasV8_3a] in {
def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
(VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>;
def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
(VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>;
def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
(VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>;
def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
(VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>;
}
// Vector Subtract Operations.
// VSUB : Vector Subtract (integer and floating-point)

View File

@ -0,0 +1,67 @@
; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+fullfp16 -o - | FileCheck %s
define <4 x half> @foo16x4_rot(<4 x half> %a, <4 x half> %b) {
entry:
; CHECK-LABEL: foo16x4_rot
; CHECK-DAG: fcadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #90
; CHECK-DAG: fcadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #270
%vcadd_rot90_v2.i = tail call <4 x half> @llvm.aarch64.neon.vcadd.rot90.v4f16(<4 x half> %a, <4 x half> %b)
%vcadd_rot270_v2.i = tail call <4 x half> @llvm.aarch64.neon.vcadd.rot270.v4f16(<4 x half> %a, <4 x half> %b)
%add = fadd <4 x half> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i
ret <4 x half> %add
}
define <2 x float> @foo32x2_rot(<2 x float> %a, <2 x float> %b) {
entry:
; CHECK-LABEL: foo32x2_rot
; CHECK-DAG: fcadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #90
; CHECK-DAG: fcadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #270
%vcadd_rot90_v2.i = tail call <2 x float> @llvm.aarch64.neon.vcadd.rot90.v2f32(<2 x float> %a, <2 x float> %b)
%vcadd_rot270_v2.i = tail call <2 x float> @llvm.aarch64.neon.vcadd.rot270.v2f32(<2 x float> %a, <2 x float> %b)
%add = fadd <2 x float> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i
ret <2 x float> %add
}
define <8 x half> @foo16x8_rot(<8 x half> %a, <8 x half> %b) {
entry:
; CHECK-LABEL: foo16x8_rot
; CHECK-DAG: fcadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #90
; CHECK-DAG: fcadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #270
%vcaddq_rot90_v2.i = tail call <8 x half> @llvm.aarch64.neon.vcadd.rot90.v8f16(<8 x half> %a, <8 x half> %b)
%vcaddq_rot270_v2.i = tail call <8 x half> @llvm.aarch64.neon.vcadd.rot270.v8f16(<8 x half> %a, <8 x half> %b)
%add = fadd <8 x half> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i
ret <8 x half> %add
}
define <4 x float> @foo32x4_rot(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: foo32x4_rot
; CHECK-DAG: fcadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #90
; CHECK-DAG: fcadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #270
%vcaddq_rot90_v2.i = tail call <4 x float> @llvm.aarch64.neon.vcadd.rot90.v4f32(<4 x float> %a, <4 x float> %b)
%vcaddq_rot270_v2.i = tail call <4 x float> @llvm.aarch64.neon.vcadd.rot270.v4f32(<4 x float> %a, <4 x float> %b)
%add = fadd <4 x float> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i
ret <4 x float> %add
}
define <2 x double> @foo64x2_rot(<2 x double> %a, <2 x double> %b) {
entry:
; CHECK-LABEL: foo64x2_rot
; CHECK-DAG: fcadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #90
; CHECK-DAG: fcadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #270
%vcaddq_rot90_v2.i = tail call <2 x double> @llvm.aarch64.neon.vcadd.rot90.v2f64(<2 x double> %a, <2 x double> %b)
%vcaddq_rot270_v2.i = tail call <2 x double> @llvm.aarch64.neon.vcadd.rot270.v2f64(<2 x double> %a, <2 x double> %b)
%add = fadd <2 x double> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i
ret <2 x double> %add
}
declare <4 x half> @llvm.aarch64.neon.vcadd.rot90.v4f16(<4 x half>, <4 x half>)
declare <4 x half> @llvm.aarch64.neon.vcadd.rot270.v4f16(<4 x half>, <4 x half>)
declare <2 x float> @llvm.aarch64.neon.vcadd.rot90.v2f32(<2 x float>, <2 x float>)
declare <2 x float> @llvm.aarch64.neon.vcadd.rot270.v2f32(<2 x float>, <2 x float>)
declare <8 x half> @llvm.aarch64.neon.vcadd.rot90.v8f16(<8 x half>, <8 x half>)
declare <8 x half> @llvm.aarch64.neon.vcadd.rot270.v8f16(<8 x half>, <8 x half>)
declare <4 x float> @llvm.aarch64.neon.vcadd.rot90.v4f32(<4 x float>, <4 x float>)
declare <4 x float> @llvm.aarch64.neon.vcadd.rot270.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.aarch64.neon.vcadd.rot90.v2f64(<2 x double>, <2 x double>)
declare <2 x double> @llvm.aarch64.neon.vcadd.rot270.v2f64(<2 x double>, <2 x double>)

View File

@ -0,0 +1,54 @@
; RUN: llc %s -mtriple=arm -mattr=+armv8.3-a,+fullfp16 -o - | FileCheck %s
define <4 x half> @foo16x4_rot(<4 x half> %a, <4 x half> %b) {
entry:
; CHECK-LABEL: foo16x4_rot
; CHECK-DAG: vcadd.f16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #90
; CHECK-DAG: vcadd.f16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #270
%vcadd_rot90_v2.i = tail call <4 x half> @llvm.arm.neon.vcadd.rot90.v4f16(<4 x half> %a, <4 x half> %b)
%vcadd_rot270_v2.i = tail call <4 x half> @llvm.arm.neon.vcadd.rot270.v4f16(<4 x half> %a, <4 x half> %b)
%add = fadd <4 x half> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i
ret <4 x half> %add
}
define <2 x float> @foo32x2_rot(<2 x float> %a, <2 x float> %b) {
entry:
; CHECK-LABEL: foo32x2_rot
; CHECK-DAG: vcadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #90
; CHECK-DAG: vcadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #270
%vcadd_rot90_v2.i = tail call <2 x float> @llvm.arm.neon.vcadd.rot90.v2f32(<2 x float> %a, <2 x float> %b)
%vcadd_rot270_v2.i = tail call <2 x float> @llvm.arm.neon.vcadd.rot270.v2f32(<2 x float> %a, <2 x float> %b)
%add = fadd <2 x float> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i
ret <2 x float> %add
}
define <8 x half> @foo16x8_rot(<8 x half> %a, <8 x half> %b) {
entry:
; CHECK-LABEL: foo16x8_rot
; CHECK-DAG: vcadd.f16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #90
; CHECK-DAG: vcadd.f16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270
%vcaddq_rot90_v2.i = tail call <8 x half> @llvm.arm.neon.vcadd.rot90.v8f16(<8 x half> %a, <8 x half> %b)
%vcaddq_rot270_v2.i = tail call <8 x half> @llvm.arm.neon.vcadd.rot270.v8f16(<8 x half> %a, <8 x half> %b)
%add = fadd <8 x half> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i
ret <8 x half> %add
}
define <4 x float> @foo32x4_rot(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: foo32x4_rot
; CHECK-DAG: vcadd.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #90
; CHECK-DAG: vcadd.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270
%vcaddq_rot90_v2.i = tail call <4 x float> @llvm.arm.neon.vcadd.rot90.v4f32(<4 x float> %a, <4 x float> %b)
%vcaddq_rot270_v2.i = tail call <4 x float> @llvm.arm.neon.vcadd.rot270.v4f32(<4 x float> %a, <4 x float> %b)
%add = fadd <4 x float> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i
ret <4 x float> %add
}
declare <4 x half> @llvm.arm.neon.vcadd.rot90.v4f16(<4 x half>, <4 x half>)
declare <4 x half> @llvm.arm.neon.vcadd.rot270.v4f16(<4 x half>, <4 x half>)
declare <2 x float> @llvm.arm.neon.vcadd.rot90.v2f32(<2 x float>, <2 x float>)
declare <2 x float> @llvm.arm.neon.vcadd.rot270.v2f32(<2 x float>, <2 x float>)
declare <8 x half> @llvm.arm.neon.vcadd.rot90.v8f16(<8 x half>, <8 x half>)
declare <8 x half> @llvm.arm.neon.vcadd.rot270.v8f16(<8 x half>, <8 x half>)
declare <4 x float> @llvm.arm.neon.vcadd.rot90.v4f32(<4 x float>, <4 x float>)
declare <4 x float> @llvm.arm.neon.vcadd.rot270.v4f32(<4 x float>, <4 x float>)