mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[AArch64][SVE] Break false dependencies for inactive lanes of unary operations
Differential Revision: https://reviews.llvm.org/D105889
This commit is contained in:
parent
81afdbc83c
commit
5e51e7ed64
@ -466,6 +466,9 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
|
||||
case AArch64::DestructiveBinaryImm:
|
||||
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
|
||||
break;
|
||||
case AArch64::DestructiveUnaryPassthru:
|
||||
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
|
||||
break;
|
||||
case AArch64::DestructiveTernaryCommWithRev:
|
||||
std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
|
||||
if (DstReg == MI.getOperand(3).getReg()) {
|
||||
@ -494,6 +497,7 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
|
||||
DstReg != MI.getOperand(DOPIdx).getReg() ||
|
||||
MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
|
||||
break;
|
||||
case AArch64::DestructiveUnaryPassthru:
|
||||
case AArch64::DestructiveBinaryImm:
|
||||
DOPRegIsUnique = true;
|
||||
break;
|
||||
@ -578,6 +582,11 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
|
||||
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
|
||||
|
||||
switch (DType) {
|
||||
case AArch64::DestructiveUnaryPassthru:
|
||||
DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
|
||||
.add(MI.getOperand(PredIdx))
|
||||
.add(MI.getOperand(SrcIdx));
|
||||
break;
|
||||
case AArch64::DestructiveBinaryImm:
|
||||
case AArch64::DestructiveBinaryComm:
|
||||
case AArch64::DestructiveBinaryCommWithRev:
|
||||
|
@ -36,6 +36,7 @@ def DestructiveBinary : DestructiveInstTypeEnum<5>;
|
||||
def DestructiveBinaryComm : DestructiveInstTypeEnum<6>;
|
||||
def DestructiveBinaryCommWithRev : DestructiveInstTypeEnum<7>;
|
||||
def DestructiveTernaryCommWithRev : DestructiveInstTypeEnum<8>;
|
||||
def DestructiveUnaryPassthru : DestructiveInstTypeEnum<9>;
|
||||
|
||||
class FalseLanesEnum<bits<2> val> {
|
||||
bits<2> Value = val;
|
||||
|
@ -482,6 +482,7 @@ enum DestructiveInstType {
|
||||
DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6),
|
||||
DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7),
|
||||
DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8),
|
||||
DestructiveUnaryPassthru = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9),
|
||||
};
|
||||
|
||||
enum FalseLaneType {
|
||||
|
@ -1715,12 +1715,12 @@ let Predicates = [HasSVE] in {
|
||||
def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
|
||||
(LD1RQ_D_IMM $gp, $base, simm4s16:$imm)>;
|
||||
|
||||
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
|
||||
def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_UNDEF_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
|
||||
|
||||
// General case that we ideally never want to match.
|
||||
def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>;
|
||||
|
@ -340,6 +340,15 @@ class SVE_1_Op_Passthru_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
|
||||
: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)),
|
||||
(inst $Op3, $Op1, $Op2)>;
|
||||
|
||||
|
||||
multiclass SVE_1_Op_PassthruUndef_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
|
||||
ValueType vts, Instruction inst> {
|
||||
def : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd undef))),
|
||||
(inst (IMPLICIT_DEF), $Op1, $Op2)>;
|
||||
def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, vtd:$Op3)),
|
||||
(inst $Op3, $Op1, $Op2)>;
|
||||
}
|
||||
|
||||
// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
|
||||
// type of rounding. This is matched by timm0_1 in pattern below and ignored.
|
||||
class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
|
||||
@ -389,6 +398,14 @@ class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
|
||||
(inst $Op1, $Op2, $Op3)>;
|
||||
|
||||
multiclass SVE_3_Op_Undef_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
ValueType vt2, ValueType vt3, Instruction inst> {
|
||||
def : Pat<(vtd (op (vt1 undef), vt2:$Op1, vt3:$Op2)),
|
||||
(inst (IMPLICIT_DEF), $Op1, $Op2)>;
|
||||
def : Pat<(vtd (op vt1:$Op1, (vt2 (SVEAllActive:$Op2)), vt3:$Op3)),
|
||||
(inst $Op1, $Op2, $Op3)>;
|
||||
}
|
||||
|
||||
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
ValueType vt2, ValueType vt3, ValueType vt4,
|
||||
Instruction inst>
|
||||
@ -447,6 +464,14 @@ class SVE_InReg_Extend<ValueType vt, SDPatternOperator op, ValueType pt,
|
||||
: Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)),
|
||||
(inst $PassThru, $Pg, $Src)>;
|
||||
|
||||
multiclass SVE_InReg_Extend_PassthruUndef<ValueType vt, SDPatternOperator op, ValueType pt,
|
||||
ValueType inreg_vt, Instruction inst> {
|
||||
def : Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, (vt undef))),
|
||||
(inst (IMPLICIT_DEF), $Pg, $Src)>;
|
||||
def : Pat<(vt (op (pt (SVEAllActive:$Pg)), vt:$Src, inreg_vt, vt:$PassThru)),
|
||||
(inst $PassThru, $Pg, $Src)>;
|
||||
}
|
||||
|
||||
class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op,
|
||||
ValueType pt, ValueType it,
|
||||
ComplexPattern cast, Instruction inst>
|
||||
@ -524,6 +549,15 @@ let hasNoSchedulingInfo = 1 in {
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Pseudos for passthru operands
|
||||
//
|
||||
let hasNoSchedulingInfo = 1 in {
|
||||
class PredOneOpPassthruPseudo<string name, ZPRRegOp zprty>
|
||||
: SVEPseudo2Instr<name, 0>,
|
||||
Pseudo<(outs zprty:$Zd), (ins zprty:$Passthru, PPR3bAny:$Pg, zprty:$Zs), []>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE Predicate Misc Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3252,26 +3286,46 @@ class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
let DestructiveInstType = DestructiveOther;
|
||||
let DestructiveInstType = DestructiveUnaryPassthru;
|
||||
let ElementSize = zprty.ElementSize;
|
||||
}
|
||||
|
||||
multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
|
||||
def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>,
|
||||
SVEPseudo2Instr<NAME # _S, 1>;
|
||||
|
||||
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
|
||||
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
|
||||
|
||||
defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
|
||||
}
|
||||
|
||||
multiclass sve2_int_un_pred_arit<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>;
|
||||
def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>;
|
||||
def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
|
||||
def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>;
|
||||
def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>,
|
||||
SVEPseudo2Instr<NAME # _B, 1>;
|
||||
def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>,
|
||||
SVEPseudo2Instr<NAME # _H, 1>;
|
||||
def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>,
|
||||
SVEPseudo2Instr<NAME # _S, 1>;
|
||||
def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>,
|
||||
SVEPseudo2Instr<NAME # _D, 1>;
|
||||
|
||||
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
|
||||
def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
|
||||
def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
|
||||
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
|
||||
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
|
||||
|
||||
defm : SVE_3_Op_Undef_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
|
||||
defm : SVE_3_Op_Undef_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
|
||||
defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
|
||||
defm : SVE_3_Op_Undef_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3872,67 +3926,122 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
let DestructiveInstType = DestructiveOther;
|
||||
let DestructiveInstType = DestructiveUnaryPassthru;
|
||||
let ElementSize = zprty.ElementSize;
|
||||
}
|
||||
|
||||
multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>;
|
||||
def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>;
|
||||
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
|
||||
def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>,
|
||||
SVEPseudo2Instr<NAME # _B, 1>;
|
||||
def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>,
|
||||
SVEPseudo2Instr<NAME # _H, 1>;
|
||||
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
|
||||
SVEPseudo2Instr<NAME # _S, 1>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
|
||||
SVEPseudo2Instr<NAME # _D, 1>;
|
||||
|
||||
def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
|
||||
def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
|
||||
def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
|
||||
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
|
||||
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
|
||||
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>;
|
||||
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
|
||||
def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>,
|
||||
SVEPseudo2Instr<NAME # _H, 1>;
|
||||
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
|
||||
SVEPseudo2Instr<NAME # _S, 1>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
|
||||
SVEPseudo2Instr<NAME # _D, 1>;
|
||||
|
||||
def : SVE_InReg_Extend<nxv8i16, op, nxv8i1, nxv8i8, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>;
|
||||
|
||||
def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
|
||||
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
|
||||
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
|
||||
|
||||
defm : SVE_InReg_Extend_PassthruUndef<nxv8i16, op, nxv8i1, nxv8i8, !cast<Pseudo>(NAME # _UNDEF_H)>;
|
||||
defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i8, !cast<Pseudo>(NAME # _UNDEF_S)>;
|
||||
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i8, !cast<Pseudo>(NAME # _UNDEF_D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
|
||||
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
|
||||
SVEPseudo2Instr<NAME # _S, 1>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
|
||||
SVEPseudo2Instr<NAME # _D, 1>;
|
||||
|
||||
def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>;
|
||||
|
||||
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
|
||||
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
|
||||
|
||||
defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i16, !cast<Pseudo>(NAME # _UNDEF_S)>;
|
||||
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i16, !cast<Pseudo>(NAME # _UNDEF_D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
|
||||
SVEPseudo2Instr<NAME # _D, 1>;
|
||||
|
||||
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>;
|
||||
|
||||
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
|
||||
|
||||
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i32, !cast<Pseudo>(NAME # _UNDEF_D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>;
|
||||
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
|
||||
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
|
||||
def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>,
|
||||
SVEPseudo2Instr<NAME # _B, 1>;
|
||||
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>,
|
||||
SVEPseudo2Instr<NAME # _H, 1>;
|
||||
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>,
|
||||
SVEPseudo2Instr<NAME # _S, 1>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>,
|
||||
SVEPseudo2Instr<NAME # _D, 1>;
|
||||
|
||||
def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
|
||||
def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
|
||||
def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
|
||||
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
|
||||
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
|
||||
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
|
||||
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
|
||||
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>,
|
||||
SVEPseudo2Instr<NAME # _H, 1>;
|
||||
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>,
|
||||
SVEPseudo2Instr<NAME # _S, 1>;
|
||||
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>,
|
||||
SVEPseudo2Instr<NAME # _D, 1>;
|
||||
|
||||
def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
|
||||
@ -3940,6 +4049,17 @@ multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator
|
||||
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
|
||||
def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
|
||||
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
|
||||
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
|
||||
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
|
||||
defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -33,6 +33,7 @@ define <vscale x 2 x i64> @sti32ldi32ext(<vscale x 2 x i32>* nocapture %P, <vsca
|
||||
; CHECK-LABEL: sti32ldi32ext:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: movprfx z1, z0
|
||||
; CHECK-NEXT: sxtw z1.d, p0/m, z0.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
|
@ -32,6 +32,7 @@ define <vscale x 2 x i64> @no_dag_combine_sext(<vscale x 2 x i1> %pg,
|
||||
; CHECK-LABEL: no_dag_combine_sext
|
||||
; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
|
||||
; CHECK-NEXT: st1b { z1.d }, p1, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -9,6 +9,7 @@ define <vscale x 2 x i64> @masked_sgather_sext(i8* %base, <vscale x 2 x i64> %of
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: movprfx z2, z0
|
||||
; CHECK-NEXT: sxtb z2.d, p0/m, z0.d
|
||||
; CHECK-NEXT: add z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
|
||||
|
1000
test/CodeGen/AArch64/sve-unary-movprfx.ll
Normal file
1000
test/CodeGen/AArch64/sve-unary-movprfx.ll
Normal file
File diff suppressed because it is too large
Load Diff
273
test/CodeGen/AArch64/sve2-unary-movprfx.ll
Normal file
273
test/CodeGen/AArch64/sve2-unary-movprfx.ll
Normal file
@ -0,0 +1,273 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
;
|
||||
; SQABS (sve2_int_un_pred_arit)
|
||||
;
|
||||
|
||||
; Check movprfx is not inserted when dstReg == srcReg
|
||||
define <vscale x 16 x i8> @sqabs_i8_dupreg(<vscale x 16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: sqabs_i8_dupreg:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: sqabs z0.b, p0/m, z0.b
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
|
||||
%ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
|
||||
ret <vscale x 16 x i8> %ret
|
||||
}
|
||||
|
||||
; Check movprfx is inserted when passthru is undef
|
||||
define <vscale x 16 x i8> @sqabs_i8_undef(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i8_undef:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
|
||||
%ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
|
||||
ret <vscale x 16 x i8> %ret
|
||||
}
|
||||
|
||||
; Check movprfx is inserted when predicate is all active, making the passthru dead
|
||||
define <vscale x 16 x i8> @sqabs_i8_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i8_active:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
|
||||
%ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
|
||||
ret <vscale x 16 x i8> %ret
|
||||
}
|
||||
|
||||
; Check movprfx is not inserted when predicate is not all active, making the passthru used
|
||||
define <vscale x 16 x i8> @sqabs_i8_not_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i8_not_active:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
%pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
|
||||
%ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg.to, <vscale x 16 x i8> %b)
|
||||
ret <vscale x 16 x i8> %ret
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @sqabs_i16_dupreg(<vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: sqabs_i16_dupreg:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: sqabs z0.h, p0/m, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
|
||||
%ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
|
||||
ret <vscale x 8 x i16> %ret
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @sqabs_i16_undef(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i16_undef:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
|
||||
%ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
|
||||
ret <vscale x 8 x i16> %ret
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @sqabs_i16_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i16_active:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
|
||||
%ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
|
||||
ret <vscale x 8 x i16> %ret
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @sqabs_i16_not_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i16_not_active:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
%pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
|
||||
%pg.from = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg.to)
|
||||
%ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg.from, <vscale x 8 x i16> %b)
|
||||
ret <vscale x 8 x i16> %ret
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sqabs_i32_dupreg(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: sqabs_i32_dupreg:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: sqabs z0.s, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
|
||||
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
|
||||
ret <vscale x 4 x i32> %ret
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sqabs_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i32_undef:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
|
||||
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %ret
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sqabs_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i32_active:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
|
||||
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %ret
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sqabs_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i32_not_active:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
%pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
|
||||
%pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
|
||||
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %ret
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sqabs_i64_dupreg(<vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: sqabs_i64_dupreg:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: sqabs z0.d, p0/m, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
|
||||
ret <vscale x 2 x i64> %ret
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sqabs_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i64_undef:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
|
||||
ret <vscale x 2 x i64> %ret
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sqabs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
|
||||
; CHECK-LABEL: sqabs_i64_active:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
|
||||
ret <vscale x 2 x i64> %ret
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sqabs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
|
||||
; CHECK-LABEL: sqabs_i64_not_active:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK: sqabs z0.d, p0/m, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
|
||||
ret <vscale x 2 x i64> %ret
|
||||
}
|
||||
|
||||
;
|
||||
; URECPE (sve2_int_un_pred_arit_s)
|
||||
;
|
||||
|
||||
define <vscale x 4 x i32> @urecpe_i32_dupreg(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: urecpe_i32_dupreg:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: urecpe z0.s, p0/m, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
|
||||
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
|
||||
ret <vscale x 4 x i32> %ret
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @urecpe_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: urecpe_i32_undef:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
|
||||
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %ret
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @urecpe_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: urecpe_i32_active:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: movprfx z0, z1
|
||||
; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
|
||||
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %ret
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @urecpe_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: urecpe_i32_not_active:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
|
||||
%pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
|
||||
%pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
|
||||
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %ret
|
||||
}
|
||||
|
||||
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
|
||||
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
|
||||
declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
|
||||
declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
|
||||
|
||||
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
|
||||
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
|
||||
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
|
||||
|
||||
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
|
||||
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
|
||||
declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
|
||||
|
||||
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
|
||||
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+sve2" }
|
Loading…
Reference in New Issue
Block a user