mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AArch64][SVE2] Implement remaining SVE2 floating-point intrinsics
Summary: Adds the following intrinsics: - faddp - fmaxp, fminp, fmaxnmp & fminnmp - fmlalb, fmlalt, fmlslb & fmlslt - flogb Reviewers: huntergr, sdesmalen, dancgr, efriedma Reviewed By: sdesmalen Subscribers: efriedma, tschuett, kristof.beyls, hiraditya, cameron.mcinally, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70253
This commit is contained in:
parent
a9d063eaa3
commit
75483b65cd
@ -951,6 +951,25 @@ class AdvSIMD_GatherLoad_64bitOffset_Intrinsic
|
||||
],
|
||||
[IntrReadMem, IntrArgMemOnly]>;
|
||||
|
||||
class SVE2_3VectorArg_Long_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>,
|
||||
LLVMSubdivide2VectorType<0>,
|
||||
LLVMSubdivide2VectorType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class SVE2_3VectorArgIndexed_Long_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>,
|
||||
LLVMSubdivide2VectorType<0>,
|
||||
LLVMSubdivide2VectorType<0>,
|
||||
llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// NOTE: There is no relationship between these intrinsics beyond an attempt
|
||||
// to reuse currently identical class definitions.
|
||||
class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic;
|
||||
|
||||
// This class of intrinsics are not intended to be useful within LLVM IR but
|
||||
// are instead here to support some of the more regid parts of the ACLE.
|
||||
class Builtin_SVCVT<string name, LLVMType OUT, LLVMType IN>
|
||||
@ -1191,4 +1210,33 @@ def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;
|
||||
|
||||
// scalar + vector, 64 bit scaled offsets
|
||||
def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;
|
||||
|
||||
//
|
||||
// SVE2 - Non-widening pairwise arithmetic
|
||||
//
|
||||
|
||||
def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fminp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
def int_aarch64_sve_fminnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||
|
||||
//
|
||||
// SVE2 - Floating-point widening multiply-accumulate
|
||||
//
|
||||
|
||||
def int_aarch64_sve_fmlalb : SVE2_3VectorArg_Long_Intrinsic;
|
||||
def int_aarch64_sve_fmlalb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
|
||||
def int_aarch64_sve_fmlalt : SVE2_3VectorArg_Long_Intrinsic;
|
||||
def int_aarch64_sve_fmlalt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
|
||||
def int_aarch64_sve_fmlslb : SVE2_3VectorArg_Long_Intrinsic;
|
||||
def int_aarch64_sve_fmlslb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
|
||||
def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic;
|
||||
def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
|
||||
|
||||
//
|
||||
// SVE2 - Floating-point integer binary logarithm
|
||||
//
|
||||
|
||||
def int_aarch64_sve_flogb : AdvSIMD_SVE_LOGB_Intrinsic;
|
||||
}
|
||||
|
@ -1427,7 +1427,7 @@ let Predicates = [HasSVE2] in {
|
||||
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
|
||||
|
||||
// SVE2 floating-point base 2 logarithm as integer
|
||||
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
|
||||
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>;
|
||||
|
||||
// SVE2 floating-point convert precision
|
||||
defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">;
|
||||
@ -1436,23 +1436,23 @@ let Predicates = [HasSVE2] in {
|
||||
defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt", "int_aarch64_sve_fcvtlt">;
|
||||
|
||||
// SVE2 floating-point pairwise operations
|
||||
defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">;
|
||||
defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp">;
|
||||
defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp">;
|
||||
defm FMAXP_ZPmZZ : sve2_fp_pairwise_pred<0b110, "fmaxp">;
|
||||
defm FMINP_ZPmZZ : sve2_fp_pairwise_pred<0b111, "fminp">;
|
||||
defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp", int_aarch64_sve_faddp>;
|
||||
defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp", int_aarch64_sve_fmaxnmp>;
|
||||
defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp", int_aarch64_sve_fminnmp>;
|
||||
defm FMAXP_ZPmZZ : sve2_fp_pairwise_pred<0b110, "fmaxp", int_aarch64_sve_fmaxp>;
|
||||
defm FMINP_ZPmZZ : sve2_fp_pairwise_pred<0b111, "fminp", int_aarch64_sve_fminp>;
|
||||
|
||||
// SVE2 floating-point multiply-add long (indexed)
|
||||
def FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb">;
|
||||
def FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt">;
|
||||
def FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb">;
|
||||
def FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt">;
|
||||
defm FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb", int_aarch64_sve_fmlalb_lane>;
|
||||
defm FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt", int_aarch64_sve_fmlalt_lane>;
|
||||
defm FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb", int_aarch64_sve_fmlslb_lane>;
|
||||
defm FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt", int_aarch64_sve_fmlslt_lane>;
|
||||
|
||||
// SVE2 floating-point multiply-add long
|
||||
def FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb">;
|
||||
def FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt">;
|
||||
def FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb">;
|
||||
def FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt">;
|
||||
defm FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb", int_aarch64_sve_fmlalb>;
|
||||
defm FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt", int_aarch64_sve_fmlalt>;
|
||||
defm FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb", int_aarch64_sve_fmlslb>;
|
||||
defm FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt", int_aarch64_sve_fmlslt>;
|
||||
|
||||
// SVE2 bitwise ternary operations
|
||||
defm EOR3_ZZZZ_D : sve2_int_bitwise_ternary_op<0b000, "eor3">;
|
||||
|
@ -310,6 +310,12 @@ class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, (vt3 ImmTy:$Op3))),
|
||||
(inst $Op1, $Op2, ImmTy:$Op3)>;
|
||||
|
||||
class SVE_4_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
ValueType vt2, ValueType vt3, ValueType vt4,
|
||||
Operand ImmTy, Instruction inst>
|
||||
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))),
|
||||
(inst $Op1, $Op2, $Op3, ImmTy:$Op4)>;
|
||||
|
||||
def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1695,10 +1701,14 @@ class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
|
||||
let ElementSize = zprty.ElementSize;
|
||||
}
|
||||
|
||||
multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm> {
|
||||
multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def _H : sve2_fp_pairwise_pred<0b01, opc, asm, ZPR16>;
|
||||
def _S : sve2_fp_pairwise_pred<0b10, opc, asm, ZPR32>;
|
||||
def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>;
|
||||
|
||||
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1707,7 +1717,7 @@ multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm> {
|
||||
|
||||
class sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm>
|
||||
: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm,
|
||||
VectorIndexH:$iop),
|
||||
VectorIndexH32b:$iop),
|
||||
asm, "\t$Zda, $Zn, $Zm$iop",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
@ -1731,6 +1741,12 @@ class sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm>
|
||||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def NAME : sve2_fp_mla_long_by_indexed_elem<opc, asm>;
|
||||
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8f16, nxv8f16, i32, VectorIndexH32b, !cast<Instruction>(NAME)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE2 Floating Point Widening Multiply-Add Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1757,6 +1773,11 @@ class sve2_fp_mla_long<bits<2> opc, string asm>
|
||||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve2_fp_mla_long<bits<2> opc, string asm, SDPatternOperator op> {
|
||||
def NAME : sve2_fp_mla_long<opc, asm>;
|
||||
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8f16, nxv8f16, !cast<Instruction>(NAME)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE Stack Allocation Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1871,10 +1892,14 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
|
||||
def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve2_fp_flogb<string asm> {
|
||||
multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
|
||||
def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>;
|
||||
def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>;
|
||||
def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;
|
||||
|
||||
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve2_fp_convert_down_odd_rounding<string asm, string op> {
|
||||
|
@ -0,0 +1,39 @@
|
||||
;RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; FLOGB
|
||||
;
|
||||
|
||||
define <vscale x 8 x i16> @flogb_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: flogb_f16:
|
||||
; CHECK: flogb z0.h, p0/m, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> %a,
|
||||
<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x i16> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @flogb_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: flogb_f32:
|
||||
; CHECK: flogb z0.s, p0/m, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> %a,
|
||||
<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x i32> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @flogb_f64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: flogb_f64:
|
||||
; CHECK: flogb z0.d, p0/m, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> %a,
|
||||
<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x i64> %out
|
||||
}
|
||||
|
||||
declare <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x double>)
|
127
test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll
Normal file
127
test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll
Normal file
@ -0,0 +1,127 @@
|
||||
;RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; FMLALB (Vectors)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmlalb_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmlalb_h:
|
||||
; CHECK: fmlalb z0.s, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlalb.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLALB (Indexed)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmlalb_lane_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmlalb_lane_h:
|
||||
; CHECK: fmlalb z0.s, z1.h, z2.h[0]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlalb.lane.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c,
|
||||
i32 0)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLALT (Vectors)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmlalt_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmlalt_h:
|
||||
; CHECK: fmlalt z0.s, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlalt.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLALT (Indexed)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmlalt_lane_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmlalt_lane_h:
|
||||
; CHECK: fmlalt z0.s, z1.h, z2.h[1]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlalt.lane.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c,
|
||||
i32 1)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLSLB (Vectors)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmlslb_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmlslb_h:
|
||||
; CHECK: fmlslb z0.s, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlslb.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLSLB (Indexed)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmlslb_lane_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmlslb_lane_h:
|
||||
; CHECK: fmlslb z0.s, z1.h, z2.h[2]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlslb.lane.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c,
|
||||
i32 2)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLSLT (Vectors)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmlslt_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmlslt_h:
|
||||
; CHECK: fmlslt z0.s, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlslt.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMLSLT (Indexed)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @fmlslt_lane_h(<vscale x 4 x float> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fmlslt_lane_h:
|
||||
; CHECK: fmlslt z0.s, z1.h, z2.h[3]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmlslt.lane.nxv4f32(<vscale x 4 x float> %a,
|
||||
<vscale x 8 x half> %b,
|
||||
<vscale x 8 x half> %c,
|
||||
i32 3)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmlalb.nxv4f32(<vscale x 4 x float>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmlalb.lane.nxv4f32(<vscale x 4 x float>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmlalt.nxv4f32(<vscale x 4 x float>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmlalt.lane.nxv4f32(<vscale x 4 x float>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmlslb.nxv4f32(<vscale x 4 x float>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmlslb.lane.nxv4f32(<vscale x 4 x float>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmlslt.nxv4f32(<vscale x 4 x float>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmlslt.lane.nxv4f32(<vscale x 4 x float>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
|
@ -0,0 +1,191 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; FADDP
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @faddp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: faddp_f16:
|
||||
; CHECK: faddp z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.faddp.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @faddp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: faddp_f32:
|
||||
; CHECK: faddp z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.faddp.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @faddp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: faddp_f64:
|
||||
; CHECK: faddp z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.faddp.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMAXP
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fmaxp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fmaxp_f16:
|
||||
; CHECK: fmaxp z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxp.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmaxp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fmaxp_f32:
|
||||
; CHECK: fmaxp z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxp.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmaxp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fmaxp_f64:
|
||||
; CHECK: fmaxp z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxp.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMAXNMP
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fmaxnmp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fmaxnmp_f16:
|
||||
; CHECK: fmaxnmp z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnmp.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmaxnmp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fmaxnmp_f32:
|
||||
; CHECK: fmaxnmp z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnmp.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmaxnmp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fmaxnmp_f64:
|
||||
; CHECK: fmaxnmp z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnmp.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMINP
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fminp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fminp_f16:
|
||||
; CHECK: fminp z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fminp.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fminp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fminp_f32:
|
||||
; CHECK: fminp z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fminp.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fminp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fminp_f64:
|
||||
; CHECK: fminp z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fminp.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
;
|
||||
; FMINNMP
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @fminnmp_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fminnmp_f16:
|
||||
; CHECK: fminnmp z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fminnmp.nxv8f16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x half> %a,
|
||||
<vscale x 8 x half> %b)
|
||||
ret <vscale x 8 x half> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fminnmp_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fminnmp_f32:
|
||||
; CHECK: fminnmp z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnmp.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x float> %a,
|
||||
<vscale x 4 x float> %b)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fminnmp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fminnmp_f64:
|
||||
; CHECK: fminnmp z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnmp.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x double> %a,
|
||||
<vscale x 2 x double> %b)
|
||||
ret <vscale x 2 x double> %out
|
||||
}
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.faddp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.faddp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.faddp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmaxp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fmaxnmp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnmp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnmp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fminp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fminp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fminp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fminnmp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fminnmp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fminnmp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
Loading…
x
Reference in New Issue
Block a user