mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[SVE] Add lowering for scalable vector fadd, fdiv, fmul and fsub operations.
Lower the operations to predicated variants. This is prep work required for fixed length code generation but also fixes a bug whereby these operations fail selection when "unpacked" vector types (e.g. MVT::nxv2f32) are used. This patch also adds the missing "unpacked" patterns for FMA. Differential Revision: https://reviews.llvm.org/D83765
This commit is contained in:
parent
6ce66f09d3
commit
bb56ec9a61
@ -948,7 +948,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
setOperationAction(ISD::FADD, VT, Custom);
|
||||
setOperationAction(ISD::FDIV, VT, Custom);
|
||||
setOperationAction(ISD::FMA, VT, Custom);
|
||||
setOperationAction(ISD::FMUL, VT, Custom);
|
||||
setOperationAction(ISD::FSUB, VT, Custom);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1483,11 +1487,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
MAKE_CASE(AArch64ISD::FADD_PRED)
|
||||
MAKE_CASE(AArch64ISD::FADDA_PRED)
|
||||
MAKE_CASE(AArch64ISD::FADDV_PRED)
|
||||
MAKE_CASE(AArch64ISD::FDIV_PRED)
|
||||
MAKE_CASE(AArch64ISD::FMA_PRED)
|
||||
MAKE_CASE(AArch64ISD::FMAXV_PRED)
|
||||
MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
|
||||
MAKE_CASE(AArch64ISD::FMINV_PRED)
|
||||
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
|
||||
MAKE_CASE(AArch64ISD::FMUL_PRED)
|
||||
MAKE_CASE(AArch64ISD::FSUB_PRED)
|
||||
MAKE_CASE(AArch64ISD::NOT)
|
||||
MAKE_CASE(AArch64ISD::BIT)
|
||||
MAKE_CASE(AArch64ISD::CBZ)
|
||||
@ -3468,16 +3475,23 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
||||
case ISD::UMULO:
|
||||
return LowerXALUO(Op, DAG);
|
||||
case ISD::FADD:
|
||||
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
|
||||
if (Op.getValueType().isScalableVector() ||
|
||||
useSVEForFixedLengthVectorVT(Op.getValueType()))
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
|
||||
return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
|
||||
case ISD::FSUB:
|
||||
if (Op.getValueType().isScalableVector())
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
|
||||
return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
|
||||
case ISD::FMUL:
|
||||
if (Op.getValueType().isScalableVector())
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
|
||||
return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
|
||||
case ISD::FMA:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
|
||||
case ISD::FDIV:
|
||||
if (Op.getValueType().isScalableVector())
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
|
||||
return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
|
||||
case ISD::FP_ROUND:
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
|
@ -75,9 +75,12 @@ enum NodeType : unsigned {
|
||||
// Arithmetic instructions
|
||||
ADD_PRED,
|
||||
FADD_PRED,
|
||||
FDIV_PRED,
|
||||
FMA_PRED,
|
||||
FMUL_PRED,
|
||||
FSUB_PRED,
|
||||
SDIV_PRED,
|
||||
UDIV_PRED,
|
||||
FMA_PRED,
|
||||
SMIN_MERGE_OP1,
|
||||
UMIN_MERGE_OP1,
|
||||
SMAX_MERGE_OP1,
|
||||
|
@ -175,7 +175,10 @@ def SDT_AArch64FMA : SDTypeProfile<1, 4, [
|
||||
// Predicated operations with the result of inactive lanes being unspecified.
|
||||
def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
|
||||
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
|
||||
def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
|
||||
def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
|
||||
def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
|
||||
def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
|
||||
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
|
||||
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
|
||||
|
||||
@ -361,6 +364,9 @@ let Predicates = [HasSVE] in {
|
||||
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">;
|
||||
|
||||
defm FADD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fadd_p>;
|
||||
defm FSUB_ZPZZ : sve_fp_bin_pred_hfd<AArch64fsub_p>;
|
||||
defm FMUL_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmul_p>;
|
||||
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
|
||||
|
||||
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
|
||||
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
|
||||
@ -377,10 +383,10 @@ let Predicates = [HasSVE] in {
|
||||
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
|
||||
}
|
||||
|
||||
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
|
||||
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
|
||||
defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>;
|
||||
defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
|
||||
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
|
||||
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
|
||||
defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
|
||||
defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
|
||||
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
|
||||
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
|
||||
|
||||
@ -404,8 +410,14 @@ let Predicates = [HasSVE] in {
|
||||
// regalloc.
|
||||
def : Pat<(nxv8f16 (AArch64fma_p nxv8i1:$P, nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3)),
|
||||
(FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
|
||||
def : Pat<(nxv4f16 (AArch64fma_p nxv4i1:$P, nxv4f16:$Op1, nxv4f16:$Op2, nxv4f16:$Op3)),
|
||||
(FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
|
||||
def : Pat<(nxv2f16 (AArch64fma_p nxv2i1:$P, nxv2f16:$Op1, nxv2f16:$Op2, nxv2f16:$Op3)),
|
||||
(FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
|
||||
def : Pat<(nxv4f32 (AArch64fma_p nxv4i1:$P, nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3)),
|
||||
(FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
|
||||
def : Pat<(nxv2f32 (AArch64fma_p nxv2i1:$P, nxv2f32:$Op1, nxv2f32:$Op2, nxv2f32:$Op3)),
|
||||
(FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
|
||||
def : Pat<(nxv2f64 (AArch64fma_p nxv2i1:$P, nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3)),
|
||||
(FMLA_ZPmZZ_D $P, $Op3, $Op1, $Op2)>;
|
||||
|
||||
|
@ -340,6 +340,12 @@ class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
|
||||
(inst $Op1, $Op2)>;
|
||||
|
||||
class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
|
||||
ValueType pt, ValueType vt1, ValueType vt2,
|
||||
Instruction inst>
|
||||
: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)),
|
||||
(inst $Op1, $Op2)>;
|
||||
|
||||
class SVE_2_Op_Pat_Reduce_To_Neon<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
ValueType vt2, Instruction inst, SubRegIndex sub>
|
||||
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
|
||||
@ -1665,7 +1671,8 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
|
||||
let Inst{4-0} = Zd;
|
||||
}
|
||||
|
||||
multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
|
||||
SDPatternOperator predicated_op = null_frag> {
|
||||
def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
|
||||
def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
|
||||
def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
|
||||
@ -1674,6 +1681,9 @@ multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
|
||||
def : SVE_2_Op_Pred_All_Active<nxv8f16, predicated_op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_2_Op_Pred_All_Active<nxv4f32, predicated_op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_2_Op_Pred_All_Active<nxv2f64, predicated_op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
@ -7804,7 +7814,10 @@ multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
|
||||
def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
|
||||
|
||||
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
|
||||
def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
|
||||
def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
|
||||
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
|
||||
def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
|
||||
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
|
||||
}
|
||||
|
||||
|
@ -5,8 +5,8 @@
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
define <vscale x 8 x half> @fadd_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fadd_h:
|
||||
define <vscale x 8 x half> @fadd_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fadd_nxv8f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fadd z0.h, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
@ -14,8 +14,28 @@ define <vscale x 8 x half> @fadd_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
|
||||
ret <vscale x 8 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fadd_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fadd_s:
|
||||
define <vscale x 4 x half> @fadd_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
|
||||
; CHECK-LABEL: fadd_nxv4f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fadd <vscale x 4 x half> %a, %b
|
||||
ret <vscale x 4 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @fadd_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
|
||||
; CHECK-LABEL: fadd_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fadd <vscale x 2 x half> %a, %b
|
||||
ret <vscale x 2 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fadd_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fadd_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fadd z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
@ -23,8 +43,18 @@ define <vscale x 4 x float> @fadd_s(<vscale x 4 x float> %a, <vscale x 4 x float
|
||||
ret <vscale x 4 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fadd_d:
|
||||
define <vscale x 2 x float> @fadd_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
|
||||
; CHECK-LABEL: fadd_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fadd <vscale x 2 x float> %a, %b
|
||||
ret <vscale x 2 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fadd_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fadd_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fadd z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
@ -32,8 +62,68 @@ define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x dou
|
||||
ret <vscale x 2 x double> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fsub_h:
|
||||
define <vscale x 8 x half> @fdiv_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fdiv_nxv8f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fdiv <vscale x 8 x half> %a, %b
|
||||
ret <vscale x 8 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x half> @fdiv_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
|
||||
; CHECK-LABEL: fdiv_nxv4f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fdiv <vscale x 4 x half> %a, %b
|
||||
ret <vscale x 4 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @fdiv_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
|
||||
; CHECK-LABEL: fdiv_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fdiv <vscale x 2 x half> %a, %b
|
||||
ret <vscale x 2 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fdiv_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fdiv_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fdiv <vscale x 4 x float> %a, %b
|
||||
ret <vscale x 4 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @fdiv_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
|
||||
; CHECK-LABEL: fdiv_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fdiv <vscale x 2 x float> %a, %b
|
||||
ret <vscale x 2 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fdiv_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fdiv_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = fdiv <vscale x 2 x double> %a, %b
|
||||
ret <vscale x 2 x double> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @fsub_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fsub_nxv8f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fsub z0.h, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
@ -41,8 +131,28 @@ define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
|
||||
ret <vscale x 8 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fsub_s:
|
||||
define <vscale x 4 x half> @fsub_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
|
||||
; CHECK-LABEL: fsub_nxv4f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fsub <vscale x 4 x half> %a, %b
|
||||
ret <vscale x 4 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @fsub_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
|
||||
; CHECK-LABEL: fsub_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fsub <vscale x 2 x half> %a, %b
|
||||
ret <vscale x 2 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fsub_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fsub_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fsub z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
@ -50,8 +160,18 @@ define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float
|
||||
ret <vscale x 4 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fsub_d:
|
||||
define <vscale x 2 x float> @fsub_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
|
||||
; CHECK-LABEL: fsub_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fsub <vscale x 2 x float> %a, %b
|
||||
ret <vscale x 2 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fsub_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fsub_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fsub z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
@ -59,8 +179,8 @@ define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x dou
|
||||
ret <vscale x 2 x double> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fmul_h:
|
||||
define <vscale x 8 x half> @fmul_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
||||
; CHECK-LABEL: fmul_nxv8f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmul z0.h, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
@ -68,8 +188,28 @@ define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
|
||||
ret <vscale x 8 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fmul_s:
|
||||
define <vscale x 4 x half> @fmul_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
|
||||
; CHECK-LABEL: fmul_nxv4f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fmul <vscale x 4 x half> %a, %b
|
||||
ret <vscale x 4 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @fmul_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
|
||||
; CHECK-LABEL: fmul_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = fmul <vscale x 2 x half> %a, %b
|
||||
ret <vscale x 2 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fmul_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
|
||||
; CHECK-LABEL: fmul_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmul z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
@ -77,8 +217,18 @@ define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float
|
||||
ret <vscale x 4 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fmul_d:
|
||||
define <vscale x 2 x float> @fmul_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
|
||||
; CHECK-LABEL: fmul_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = fmul <vscale x 2 x float> %a, %b
|
||||
ret <vscale x 2 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fmul_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
|
||||
; CHECK-LABEL: fmul_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmul z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
@ -86,8 +236,8 @@ define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x dou
|
||||
ret <vscale x 2 x double> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @fma_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fma_half:
|
||||
define <vscale x 8 x half> @fma_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
|
||||
; CHECK-LABEL: fma_nxv8f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h
|
||||
@ -96,8 +246,31 @@ define <vscale x 8 x half> @fma_half(<vscale x 8 x half> %a, <vscale x 8 x half>
|
||||
%r = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
|
||||
ret <vscale x 8 x half> %r
|
||||
}
|
||||
define <vscale x 4 x float> @fma_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fma_float:
|
||||
|
||||
define <vscale x 4 x half> @fma_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) {
|
||||
; CHECK-LABEL: fma_nxv4f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c)
|
||||
ret <vscale x 4 x half> %r
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @fma_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) {
|
||||
; CHECK-LABEL: fma_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fmla z2.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c)
|
||||
ret <vscale x 2 x half> %r
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @fma_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
|
||||
; CHECK-LABEL: fma_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: fmla z2.s, p0/m, z0.s, z1.s
|
||||
@ -106,8 +279,20 @@ define <vscale x 4 x float> @fma_float(<vscale x 4 x float> %a, <vscale x 4 x fl
|
||||
%r = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
|
||||
ret <vscale x 4 x float> %r
|
||||
}
|
||||
define <vscale x 2 x double> @fma_double_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fma_double_1:
|
||||
|
||||
define <vscale x 2 x float> @fma_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) {
|
||||
; CHECK-LABEL: fma_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fmla z2.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%r = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c)
|
||||
ret <vscale x 2 x float> %r
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @fma_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fma_nxv2f64_1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fmla z2.d, p0/m, z0.d, z1.d
|
||||
@ -116,8 +301,9 @@ define <vscale x 2 x double> @fma_double_1(<vscale x 2 x double> %a, <vscale x 2
|
||||
%r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
|
||||
ret <vscale x 2 x double> %r
|
||||
}
|
||||
define <vscale x 2 x double> @fma_double_2(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fma_double_2:
|
||||
|
||||
define <vscale x 2 x double> @fma_nxv2f64_2(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fma_nxv2f64_2:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fmla z2.d, p0/m, z1.d, z0.d
|
||||
@ -126,8 +312,9 @@ define <vscale x 2 x double> @fma_double_2(<vscale x 2 x double> %a, <vscale x 2
|
||||
%r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %b, <vscale x 2 x double> %a, <vscale x 2 x double> %c)
|
||||
ret <vscale x 2 x double> %r
|
||||
}
|
||||
define <vscale x 2 x double> @fma_double_3(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fma_double_3:
|
||||
|
||||
define <vscale x 2 x double> @fma_nxv2f64_3(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
|
||||
; CHECK-LABEL: fma_nxv2f64_3:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fmla z0.d, p0/m, z2.d, z1.d
|
||||
@ -231,7 +418,10 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x
|
||||
|
||||
declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
|
||||
declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
|
||||
declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2
|
||||
|
Loading…
Reference in New Issue
Block a user