1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[SVE] Add lowering for scalable vector fadd, fdiv, fmul and fsub operations.

Lower the operations to predicated variants.  This is prep work
required for fixed length code generation but also fixes a bug
whereby these operations fail selection when "unpacked" vector
types (e.g. MVT::nxv2f32) are used.

This patch also adds the missing "unpacked" patterns for FMA.

Differential Revision: https://reviews.llvm.org/D83765
This commit is contained in:
Paul Walker 2020-07-16 11:22:22 +00:00
parent 6ce66f09d3
commit bb56ec9a61
5 changed files with 267 additions and 35 deletions

View File

@ -948,7 +948,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
}
}
@ -1483,11 +1487,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FADD_PRED)
MAKE_CASE(AArch64ISD::FADDA_PRED)
MAKE_CASE(AArch64ISD::FADDV_PRED)
MAKE_CASE(AArch64ISD::FDIV_PRED)
MAKE_CASE(AArch64ISD::FMA_PRED)
MAKE_CASE(AArch64ISD::FMAXV_PRED)
MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
MAKE_CASE(AArch64ISD::FMINV_PRED)
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
MAKE_CASE(AArch64ISD::FMUL_PRED)
MAKE_CASE(AArch64ISD::FSUB_PRED)
MAKE_CASE(AArch64ISD::NOT)
MAKE_CASE(AArch64ISD::BIT)
MAKE_CASE(AArch64ISD::CBZ)
@ -3468,16 +3475,23 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::UMULO:
return LowerXALUO(Op, DAG);
case ISD::FADD:
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
if (Op.getValueType().isScalableVector() ||
useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
case ISD::FSUB:
if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
case ISD::FMUL:
if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
case ISD::FMA:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
case ISD::FDIV:
if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:

View File

@ -75,9 +75,12 @@ enum NodeType : unsigned {
// Arithmetic instructions
ADD_PRED,
FADD_PRED,
FDIV_PRED,
FMA_PRED,
FMUL_PRED,
FSUB_PRED,
SDIV_PRED,
UDIV_PRED,
FMA_PRED,
SMIN_MERGE_OP1,
UMIN_MERGE_OP1,
SMAX_MERGE_OP1,

View File

@ -175,7 +175,10 @@ def SDT_AArch64FMA : SDTypeProfile<1, 4, [
// Predicated operations with the result of inactive lanes being unspecified.
def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
@ -361,6 +364,9 @@ let Predicates = [HasSVE] in {
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">;
defm FADD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fadd_p>;
defm FSUB_ZPZZ : sve_fp_bin_pred_hfd<AArch64fsub_p>;
defm FMUL_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmul_p>;
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
@ -377,10 +383,10 @@ let Predicates = [HasSVE] in {
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
}
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>;
defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
@ -404,8 +410,14 @@ let Predicates = [HasSVE] in {
// regalloc.
def : Pat<(nxv8f16 (AArch64fma_p nxv8i1:$P, nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3)),
(FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv4f16 (AArch64fma_p nxv4i1:$P, nxv4f16:$Op1, nxv4f16:$Op2, nxv4f16:$Op3)),
(FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv2f16 (AArch64fma_p nxv2i1:$P, nxv2f16:$Op1, nxv2f16:$Op2, nxv2f16:$Op3)),
(FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv4f32 (AArch64fma_p nxv4i1:$P, nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3)),
(FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv2f32 (AArch64fma_p nxv2i1:$P, nxv2f32:$Op1, nxv2f32:$Op2, nxv2f32:$Op3)),
(FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv2f64 (AArch64fma_p nxv2i1:$P, nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3)),
(FMLA_ZPmZZ_D $P, $Op3, $Op1, $Op2)>;

View File

@ -340,6 +340,12 @@ class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
(inst $Op1, $Op2)>;
class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
ValueType pt, ValueType vt1, ValueType vt2,
Instruction inst>
: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)),
(inst $Op1, $Op2)>;
class SVE_2_Op_Pat_Reduce_To_Neon<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, Instruction inst, SubRegIndex sub>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
@ -1665,7 +1671,8 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
SDPatternOperator predicated_op = null_frag> {
def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
@ -1674,6 +1681,9 @@ multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
def : SVE_2_Op_Pred_All_Active<nxv8f16, predicated_op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pred_All_Active<nxv4f32, predicated_op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pred_All_Active<nxv2f64, predicated_op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
@ -7804,7 +7814,10 @@ multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}

View File

@ -5,8 +5,8 @@
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
define <vscale x 8 x half> @fadd_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fadd_h:
define <vscale x 8 x half> @fadd_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fadd_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd z0.h, z0.h, z1.h
; CHECK-NEXT: ret
@ -14,8 +14,28 @@ define <vscale x 8 x half> @fadd_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
ret <vscale x 8 x half> %res
}
define <vscale x 4 x float> @fadd_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fadd_s:
define <vscale x 4 x half> @fadd_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
; CHECK-LABEL: fadd_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%res = fadd <vscale x 4 x half> %a, %b
ret <vscale x 4 x half> %res
}
define <vscale x 2 x half> @fadd_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
; CHECK-LABEL: fadd_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%res = fadd <vscale x 2 x half> %a, %b
ret <vscale x 2 x half> %res
}
define <vscale x 4 x float> @fadd_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fadd_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd z0.s, z0.s, z1.s
; CHECK-NEXT: ret
@ -23,8 +43,18 @@ define <vscale x 4 x float> @fadd_s(<vscale x 4 x float> %a, <vscale x 4 x float
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fadd_d:
define <vscale x 2 x float> @fadd_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
; CHECK-LABEL: fadd_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%res = fadd <vscale x 2 x float> %a, %b
ret <vscale x 2 x float> %res
}
define <vscale x 2 x double> @fadd_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fadd_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd z0.d, z0.d, z1.d
; CHECK-NEXT: ret
@ -32,8 +62,68 @@ define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x dou
ret <vscale x 2 x double> %res
}
define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fsub_h:
define <vscale x 8 x half> @fdiv_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fdiv_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%res = fdiv <vscale x 8 x half> %a, %b
ret <vscale x 8 x half> %res
}
define <vscale x 4 x half> @fdiv_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
; CHECK-LABEL: fdiv_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%res = fdiv <vscale x 4 x half> %a, %b
ret <vscale x 4 x half> %res
}
define <vscale x 2 x half> @fdiv_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
; CHECK-LABEL: fdiv_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%res = fdiv <vscale x 2 x half> %a, %b
ret <vscale x 2 x half> %res
}
define <vscale x 4 x float> @fdiv_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fdiv_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%res = fdiv <vscale x 4 x float> %a, %b
ret <vscale x 4 x float> %res
}
define <vscale x 2 x float> @fdiv_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
; CHECK-LABEL: fdiv_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%res = fdiv <vscale x 2 x float> %a, %b
ret <vscale x 2 x float> %res
}
define <vscale x 2 x double> @fdiv_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fdiv_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%res = fdiv <vscale x 2 x double> %a, %b
ret <vscale x 2 x double> %res
}
define <vscale x 8 x half> @fsub_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fsub_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: ret
@ -41,8 +131,28 @@ define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
ret <vscale x 8 x half> %res
}
define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fsub_s:
define <vscale x 4 x half> @fsub_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
; CHECK-LABEL: fsub_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%res = fsub <vscale x 4 x half> %a, %b
ret <vscale x 4 x half> %res
}
define <vscale x 2 x half> @fsub_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
; CHECK-LABEL: fsub_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%res = fsub <vscale x 2 x half> %a, %b
ret <vscale x 2 x half> %res
}
define <vscale x 4 x float> @fsub_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fsub_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: ret
@ -50,8 +160,18 @@ define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fsub_d:
define <vscale x 2 x float> @fsub_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
; CHECK-LABEL: fsub_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%res = fsub <vscale x 2 x float> %a, %b
ret <vscale x 2 x float> %res
}
define <vscale x 2 x double> @fsub_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fsub_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: ret
@ -59,8 +179,8 @@ define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x dou
ret <vscale x 2 x double> %res
}
define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fmul_h:
define <vscale x 8 x half> @fmul_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fmul_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fmul z0.h, z0.h, z1.h
; CHECK-NEXT: ret
@ -68,8 +188,28 @@ define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
ret <vscale x 8 x half> %res
}
define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fmul_s:
define <vscale x 4 x half> @fmul_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
; CHECK-LABEL: fmul_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%res = fmul <vscale x 4 x half> %a, %b
ret <vscale x 4 x half> %res
}
define <vscale x 2 x half> @fmul_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
; CHECK-LABEL: fmul_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%res = fmul <vscale x 2 x half> %a, %b
ret <vscale x 2 x half> %res
}
define <vscale x 4 x float> @fmul_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fmul_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmul z0.s, z0.s, z1.s
; CHECK-NEXT: ret
@ -77,8 +217,18 @@ define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fmul_d:
define <vscale x 2 x float> @fmul_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
; CHECK-LABEL: fmul_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%res = fmul <vscale x 2 x float> %a, %b
ret <vscale x 2 x float> %res
}
define <vscale x 2 x double> @fmul_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fmul_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fmul z0.d, z0.d, z1.d
; CHECK-NEXT: ret
@ -86,8 +236,8 @@ define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x dou
ret <vscale x 2 x double> %res
}
define <vscale x 8 x half> @fma_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fma_half:
define <vscale x 8 x half> @fma_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
; CHECK-LABEL: fma_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h
@ -96,8 +246,31 @@ define <vscale x 8 x half> @fma_half(<vscale x 8 x half> %a, <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
ret <vscale x 8 x half> %r
}
define <vscale x 4 x float> @fma_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fma_float:
define <vscale x 4 x half> @fma_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) {
; CHECK-LABEL: fma_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%r = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c)
ret <vscale x 4 x half> %r
}
define <vscale x 2 x half> @fma_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) {
; CHECK-LABEL: fma_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%r = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c)
ret <vscale x 2 x half> %r
}
define <vscale x 4 x float> @fma_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
; CHECK-LABEL: fma_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fmla z2.s, p0/m, z0.s, z1.s
@ -106,8 +279,20 @@ define <vscale x 4 x float> @fma_float(<vscale x 4 x float> %a, <vscale x 4 x fl
%r = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
ret <vscale x 4 x float> %r
}
define <vscale x 2 x double> @fma_double_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fma_double_1:
define <vscale x 2 x float> @fma_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) {
; CHECK-LABEL: fma_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmla z2.s, p0/m, z0.s, z1.s
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%r = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c)
ret <vscale x 2 x float> %r
}
define <vscale x 2 x double> @fma_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fma_nxv2f64_1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmla z2.d, p0/m, z0.d, z1.d
@ -116,8 +301,9 @@ define <vscale x 2 x double> @fma_double_1(<vscale x 2 x double> %a, <vscale x 2
%r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
ret <vscale x 2 x double> %r
}
define <vscale x 2 x double> @fma_double_2(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fma_double_2:
define <vscale x 2 x double> @fma_nxv2f64_2(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fma_nxv2f64_2:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmla z2.d, p0/m, z1.d, z0.d
@ -126,8 +312,9 @@ define <vscale x 2 x double> @fma_double_2(<vscale x 2 x double> %a, <vscale x 2
%r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %b, <vscale x 2 x double> %a, <vscale x 2 x double> %c)
ret <vscale x 2 x double> %r
}
define <vscale x 2 x double> @fma_double_3(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fma_double_3:
define <vscale x 2 x double> @fma_nxv2f64_3(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
; CHECK-LABEL: fma_nxv2f64_3:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmla z0.d, p0/m, z2.d, z1.d
@ -231,7 +418,10 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x
declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
; Function Attrs: nounwind readnone
declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2