mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[AArch64][SVE] Implement floating-point comparison & reduction intrinsics
Summary: Adds intrinsics for the following: - fadda & faddv - fminv, fmaxv, fminnmv & fmaxnmv - facge & facgt - fcmp[eq|ge|gt|ne|uo] Reviewers: sdesmalen, huntergr, dancgr, mgudim Reviewed By: sdesmalen Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69858
This commit is contained in:
parent
a2b449b8f5
commit
de674e0de7
@ -786,6 +786,13 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
||||
llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_Compare_Intrinsic
|
||||
: Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
|
||||
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_anyvector_ty,
|
||||
LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_CNT_Intrinsic
|
||||
: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
|
||||
[LLVMVectorOfBitcastsToInt<0>,
|
||||
@ -793,8 +800,21 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
||||
llvm_anyvector_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_Reduce_Intrinsic
|
||||
: Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
|
||||
llvm_anyvector_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_ReduceWithInit_Intrinsic
|
||||
: Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
|
||||
LLVMMatchType<0>,
|
||||
llvm_anyvector_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_SVE_Unpack_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMSubdivide2VectorType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
@ -1017,10 +1037,30 @@ def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic;
|
||||
def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic;
|
||||
def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic;
|
||||
|
||||
//
|
||||
// Floating-point reductions
|
||||
//
|
||||
|
||||
def int_aarch64_sve_fadda : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
|
||||
def int_aarch64_sve_faddv : AdvSIMD_SVE_Reduce_Intrinsic;
|
||||
def int_aarch64_sve_fmaxv : AdvSIMD_SVE_Reduce_Intrinsic;
|
||||
def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_Reduce_Intrinsic;
|
||||
def int_aarch64_sve_fminv : AdvSIMD_SVE_Reduce_Intrinsic;
|
||||
def int_aarch64_sve_fminnmv : AdvSIMD_SVE_Reduce_Intrinsic;
|
||||
|
||||
//
|
||||
// Floating-point comparisons
|
||||
//
|
||||
|
||||
def int_aarch64_sve_facge : AdvSIMD_SVE_Compare_Intrinsic;
|
||||
def int_aarch64_sve_facgt : AdvSIMD_SVE_Compare_Intrinsic;
|
||||
|
||||
def int_aarch64_sve_fcmpeq : AdvSIMD_SVE_Compare_Intrinsic;
|
||||
def int_aarch64_sve_fcmpge : AdvSIMD_SVE_Compare_Intrinsic;
|
||||
def int_aarch64_sve_fcmpgt : AdvSIMD_SVE_Compare_Intrinsic;
|
||||
def int_aarch64_sve_fcmpne : AdvSIMD_SVE_Compare_Intrinsic;
|
||||
def int_aarch64_sve_fcmpuo : AdvSIMD_SVE_Compare_Intrinsic;
|
||||
|
||||
def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
|
||||
|
||||
//
|
||||
|
@ -169,12 +169,12 @@ let Predicates = [HasSVE] in {
|
||||
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
|
||||
|
||||
// SVE floating point reductions.
|
||||
defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda">;
|
||||
defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv">;
|
||||
defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv">;
|
||||
defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv">;
|
||||
defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv">;
|
||||
defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv">;
|
||||
defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", int_aarch64_sve_fadda>;
|
||||
defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", int_aarch64_sve_faddv>;
|
||||
defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", int_aarch64_sve_fmaxnmv>;
|
||||
defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", int_aarch64_sve_fminnmv>;
|
||||
defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", int_aarch64_sve_fmaxv>;
|
||||
defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", int_aarch64_sve_fminv>;
|
||||
|
||||
// Splat immediate (unpredicated)
|
||||
defm DUP_ZI : sve_int_dup_imm<"dup">;
|
||||
@ -736,13 +736,13 @@ let Predicates = [HasSVE] in {
|
||||
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo">;
|
||||
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls">;
|
||||
|
||||
defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge">;
|
||||
defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt">;
|
||||
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq">;
|
||||
defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne">;
|
||||
defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo">;
|
||||
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge">;
|
||||
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt">;
|
||||
defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge", int_aarch64_sve_fcmpge>;
|
||||
defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt", int_aarch64_sve_fcmpgt>;
|
||||
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq", int_aarch64_sve_fcmpeq>;
|
||||
defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne", int_aarch64_sve_fcmpne>;
|
||||
defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo", int_aarch64_sve_fcmpuo>;
|
||||
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
|
||||
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
|
||||
|
||||
defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge">;
|
||||
defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt">;
|
||||
|
@ -3644,10 +3644,14 @@ class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,
|
||||
let Inst{4-0} = Vd;
|
||||
}
|
||||
|
||||
multiclass sve_fp_fast_red<bits<3> opc, string asm> {
|
||||
multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16>;
|
||||
def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32>;
|
||||
def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64>;
|
||||
|
||||
def : SVE_2_Op_Pat<f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_2_Op_Pat<f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_2_Op_Pat<f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
|
||||
@ -3677,10 +3681,14 @@ class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,
|
||||
let Constraints = "$Vdn = $_Vdn";
|
||||
}
|
||||
|
||||
multiclass sve_fp_2op_p_vd<bits<3> opc, string asm> {
|
||||
multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16>;
|
||||
def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32>;
|
||||
def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64>;
|
||||
|
||||
def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3710,10 +3718,14 @@ class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
|
||||
let Inst{3-0} = Pd;
|
||||
}
|
||||
|
||||
multiclass sve_fp_3op_p_pd<bits<3> opc, string asm> {
|
||||
multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_3op_p_pd<0b01, opc, asm, PPR16, ZPR16>;
|
||||
def _S : sve_fp_3op_p_pd<0b10, opc, asm, PPR32, ZPR32>;
|
||||
def _D : sve_fp_3op_p_pd<0b11, opc, asm, PPR64, ZPR64>;
|
||||
|
||||
def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
|
||||
|
267
test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll
Normal file
267
test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll
Normal file
@ -0,0 +1,267 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; FACGE
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @facge_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: facge_h:
|
||||
; CHECK: facge p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.facge.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i1> @facge_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: facge_s:
|
||||
; CHECK: facge p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.facge.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @facge_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: facge_d:
|
||||
; CHECK: facge p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.facge.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FACGT
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @facgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: facgt_h:
|
||||
; CHECK: facgt p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.facgt.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i1> @facgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: facgt_s:
|
||||
; CHECK: facgt p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.facgt.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @facgt_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: facgt_d:
|
||||
; CHECK: facgt p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.facgt.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FCMEQ
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @fcmeq_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcmeq_h:
|
||||
; CHECK: fcmeq p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpeq.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i1> @fcmeq_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcmeq_s:
|
||||
; CHECK: fcmeq p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpeq.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @fcmeq_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcmeq_d:
|
||||
; CHECK: fcmeq p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpeq.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FCMGE
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @fcmge_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcmge_h:
|
||||
; CHECK: fcmge p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpge.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i1> @fcmge_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcmge_s:
|
||||
; CHECK: fcmge p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpge.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @fcmge_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcmge_d:
|
||||
; CHECK: fcmge p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpge.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FCMGT
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @fcmgt_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcmgt_h:
|
||||
; CHECK: fcmgt p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpgt.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i1> @fcmgt_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcmgt_s:
|
||||
; CHECK: fcmgt p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpgt.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @fcmgt_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcmgt_d:
|
||||
; CHECK: fcmgt p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpgt.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FCMNE
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @fcmne_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcmne_h:
|
||||
; CHECK: fcmne p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpne.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i1> @fcmne_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcmne_s:
|
||||
; CHECK: fcmne p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpne.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @fcmne_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcmne_d:
|
||||
; CHECK: fcmne p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpne.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FCMPUO
|
||||
;
|
||||
|
||||
define <vscale x 8 x i1> @fcmuo_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fcmuo_h:
|
||||
; CHECK: fcmuo p0.h, p0/z, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpuo.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i1> @fcmuo_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fcmuo_s:
|
||||
; CHECK: fcmuo p0.s, p0/z, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i1> @llvm.aarch64.sve.fcmpuo.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x i1> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i1> @fcmuo_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fcmuo_d:
|
||||
; CHECK: fcmuo p0.d, p0/z, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i1> @llvm.aarch64.sve.fcmpuo.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x i1> %out
|
||||
}
|
||||
|
||||
declare <vscale x 8 x i1> @llvm.aarch64.sve.facge.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x i1> @llvm.aarch64.sve.facge.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x i1> @llvm.aarch64.sve.facge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x i1> @llvm.aarch64.sve.facgt.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x i1> @llvm.aarch64.sve.facgt.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x i1> @llvm.aarch64.sve.facgt.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x i1> @llvm.aarch64.sve.fcmpeq.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x i1> @llvm.aarch64.sve.fcmpeq.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x i1> @llvm.aarch64.sve.fcmpeq.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x i1> @llvm.aarch64.sve.fcmpge.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x i1> @llvm.aarch64.sve.fcmpge.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x i1> @llvm.aarch64.sve.fcmpge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x i1> @llvm.aarch64.sve.fcmpgt.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x i1> @llvm.aarch64.sve.fcmpgt.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x i1> @llvm.aarch64.sve.fcmpgt.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x i1> @llvm.aarch64.sve.fcmpne.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x i1> @llvm.aarch64.sve.fcmpne.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x i1> @llvm.aarch64.sve.fcmpne.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x i1> @llvm.aarch64.sve.fcmpuo.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x i1> @llvm.aarch64.sve.fcmpuo.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x i1> @llvm.aarch64.sve.fcmpuo.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
214
test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
Normal file
214
test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
Normal file
@ -0,0 +1,214 @@
|
||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; FADDA
|
||||
;
|
||||
|
||||
define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a) {
|
||||
; CHECK-LABEL: fadda_f16:
|
||||
; CHECK: fadda h0, p0, h0, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = call half @llvm.aarch64.sve.fadda.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
half %init,
|
||||
<vscale x 8 x half> %a)
|
||||
ret half %res
|
||||
}
|
||||
|
||||
define float @fadda_f32(<vscale x 4 x i1> %pg, float %init, <vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fadda_f32:
|
||||
; CHECK: fadda s0, p0, s0, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = call float @llvm.aarch64.sve.fadda.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
float %init,
|
||||
<vscale x 4 x float> %a)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @fadda_f64(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fadda_f64:
|
||||
; CHECK: fadda d0, p0, d0, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double %init,
|
||||
<vscale x 2 x double> %a)
|
||||
ret double %res
|
||||
}
|
||||
|
||||
;
|
||||
; FADDV
|
||||
;
|
||||
|
||||
define half @faddv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
|
||||
; CHECK-LABEL: faddv_f16:
|
||||
; CHECK: faddv h0, p0, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = call half @llvm.aarch64.sve.faddv.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a)
|
||||
ret half %res
|
||||
}
|
||||
|
||||
define float @faddv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: faddv_f32:
|
||||
; CHECK: faddv s0, p0, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @faddv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: faddv_f64:
|
||||
; CHECK: faddv d0, p0, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a)
|
||||
ret double %res
|
||||
}
|
||||
|
||||
;
|
||||
; FMAXNMV
|
||||
;
|
||||
|
||||
define half @fmaxnmv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
|
||||
; CHECK-LABEL: fmaxnmv_f16:
|
||||
; CHECK: fmaxnmv h0, p0, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = call half @llvm.aarch64.sve.fmaxnmv.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a)
|
||||
ret half %res
|
||||
}
|
||||
|
||||
define float @fmaxnmv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fmaxnmv_f32:
|
||||
; CHECK: fmaxnmv s0, p0, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = call float @llvm.aarch64.sve.fmaxnmv.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @fmaxnmv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fmaxnmv_f64:
|
||||
; CHECK: fmaxnmv d0, p0, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call double @llvm.aarch64.sve.fmaxnmv.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a)
|
||||
ret double %res
|
||||
}
|
||||
|
||||
;
|
||||
; FMAXV
|
||||
;
|
||||
|
||||
define half @fmaxv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
|
||||
; CHECK-LABEL: fmaxv_f16:
|
||||
; CHECK: fmaxv h0, p0, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = call half @llvm.aarch64.sve.fmaxv.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a)
|
||||
ret half %res
|
||||
}
|
||||
|
||||
define float @fmaxv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fmaxv_f32:
|
||||
; CHECK: fmaxv s0, p0, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = call float @llvm.aarch64.sve.fmaxv.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @fmaxv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fmaxv_f64:
|
||||
; CHECK: fmaxv d0, p0, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call double @llvm.aarch64.sve.fmaxv.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a)
|
||||
ret double %res
|
||||
}
|
||||
|
||||
;
|
||||
; FMINNMV
|
||||
;
|
||||
|
||||
define half @fminnmv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
|
||||
; CHECK-LABEL: fminnmv_f16:
|
||||
; CHECK: fminnmv h0, p0, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = call half @llvm.aarch64.sve.fminnmv.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a)
|
||||
ret half %res
|
||||
}
|
||||
|
||||
define float @fminnmv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fminnmv_f32:
|
||||
; CHECK: fminnmv s0, p0, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = call float @llvm.aarch64.sve.fminnmv.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @fminnmv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fminnmv_f64:
|
||||
; CHECK: fminnmv d0, p0, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call double @llvm.aarch64.sve.fminnmv.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a)
|
||||
ret double %res
|
||||
}
|
||||
|
||||
;
|
||||
; FMINV
|
||||
;
|
||||
|
||||
define half @fminv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
|
||||
; CHECK-LABEL: fminv_f16:
|
||||
; CHECK: fminv h0, p0, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = call half @llvm.aarch64.sve.fminv.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a)
|
||||
ret half %res
|
||||
}
|
||||
|
||||
define float @fminv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
|
||||
; CHECK-LABEL: fminv_f32:
|
||||
; CHECK: fminv s0, p0, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = call float @llvm.aarch64.sve.fminv.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define double @fminv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
|
||||
; CHECK-LABEL: fminv_f64:
|
||||
; CHECK: fminv d0, p0, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call double @llvm.aarch64.sve.fminv.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a)
|
||||
ret double %res
|
||||
}
|
||||
|
||||
declare half @llvm.aarch64.sve.fadda.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
|
||||
declare float @llvm.aarch64.sve.fadda.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
|
||||
declare double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
|
||||
|
||||
declare half @llvm.aarch64.sve.faddv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
|
||||
declare float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
|
||||
declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
|
||||
|
||||
declare half @llvm.aarch64.sve.fmaxnmv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
|
||||
declare float @llvm.aarch64.sve.fmaxnmv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
|
||||
declare double @llvm.aarch64.sve.fmaxnmv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
|
||||
|
||||
declare half @llvm.aarch64.sve.fmaxv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
|
||||
declare float @llvm.aarch64.sve.fmaxv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
|
||||
declare double @llvm.aarch64.sve.fmaxv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
|
||||
|
||||
declare half @llvm.aarch64.sve.fminnmv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
|
||||
declare float @llvm.aarch64.sve.fminnmv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
|
||||
declare double @llvm.aarch64.sve.fminnmv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
|
||||
|
||||
declare half @llvm.aarch64.sve.fminv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
|
||||
declare float @llvm.aarch64.sve.fminv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
|
||||
declare double @llvm.aarch64.sve.fminv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
|
Loading…
Reference in New Issue
Block a user