1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[AArch64][SVE] Break false dependencies for inactive lanes of unary operations

Differential Revision: https://reviews.llvm.org/D105889
This commit is contained in:
Bradley Smith 2021-07-08 16:12:54 +00:00
parent 81afdbc83c
commit 5e51e7ed64
10 changed files with 1437 additions and 30 deletions

View File

@ -466,6 +466,9 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
case AArch64::DestructiveBinaryImm:
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
break;
case AArch64::DestructiveUnaryPassthru:
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
break;
case AArch64::DestructiveTernaryCommWithRev:
std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
if (DstReg == MI.getOperand(3).getReg()) {
@ -494,6 +497,7 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
DstReg != MI.getOperand(DOPIdx).getReg() ||
MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
break;
case AArch64::DestructiveUnaryPassthru:
case AArch64::DestructiveBinaryImm:
DOPRegIsUnique = true;
break;
@ -578,6 +582,11 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
switch (DType) {
case AArch64::DestructiveUnaryPassthru:
DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
.add(MI.getOperand(PredIdx))
.add(MI.getOperand(SrcIdx));
break;
case AArch64::DestructiveBinaryImm:
case AArch64::DestructiveBinaryComm:
case AArch64::DestructiveBinaryCommWithRev:

View File

@ -36,6 +36,7 @@ def DestructiveBinary : DestructiveInstTypeEnum<5>;
def DestructiveBinaryComm : DestructiveInstTypeEnum<6>;
def DestructiveBinaryCommWithRev : DestructiveInstTypeEnum<7>;
def DestructiveTernaryCommWithRev : DestructiveInstTypeEnum<8>;
def DestructiveUnaryPassthru : DestructiveInstTypeEnum<9>;
class FalseLanesEnum<bits<2> val> {
bits<2> Value = val;

View File

@ -482,6 +482,7 @@ enum DestructiveInstType {
DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6),
DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7),
DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8),
DestructiveUnaryPassthru = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9),
};
enum FalseLaneType {

View File

@ -1715,12 +1715,12 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
(LD1RQ_D_IMM $gp, $base, simm4s16:$imm)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_UNDEF_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
// General case that we ideally never want to match.
def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>;

View File

@ -340,6 +340,15 @@ class SVE_1_Op_Passthru_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)),
(inst $Op3, $Op1, $Op2)>;
multiclass SVE_1_Op_PassthruUndef_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
ValueType vts, Instruction inst> {
def : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd undef))),
(inst (IMPLICIT_DEF), $Op1, $Op2)>;
def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, vtd:$Op3)),
(inst $Op3, $Op1, $Op2)>;
}
// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
// type of rounding. This is matched by timm0_1 in pattern below and ignored.
class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
@ -389,6 +398,14 @@ class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
(inst $Op1, $Op2, $Op3)>;
multiclass SVE_3_Op_Undef_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst> {
def : Pat<(vtd (op (vt1 undef), vt2:$Op1, vt3:$Op2)),
(inst (IMPLICIT_DEF), $Op1, $Op2)>;
def : Pat<(vtd (op vt1:$Op1, (vt2 (SVEAllActive:$Op2)), vt3:$Op3)),
(inst $Op1, $Op2, $Op3)>;
}
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, ValueType vt4,
Instruction inst>
@ -447,6 +464,14 @@ class SVE_InReg_Extend<ValueType vt, SDPatternOperator op, ValueType pt,
: Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)),
(inst $PassThru, $Pg, $Src)>;
multiclass SVE_InReg_Extend_PassthruUndef<ValueType vt, SDPatternOperator op, ValueType pt,
ValueType inreg_vt, Instruction inst> {
def : Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, (vt undef))),
(inst (IMPLICIT_DEF), $Pg, $Src)>;
def : Pat<(vt (op (pt (SVEAllActive:$Pg)), vt:$Src, inreg_vt, vt:$PassThru)),
(inst $PassThru, $Pg, $Src)>;
}
class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op,
ValueType pt, ValueType it,
ComplexPattern cast, Instruction inst>
@ -524,6 +549,15 @@ let hasNoSchedulingInfo = 1 in {
}
}
//
// Pseudos for passthru operands
//
let hasNoSchedulingInfo = 1 in {
class PredOneOpPassthruPseudo<string name, ZPRRegOp zprty>
: SVEPseudo2Instr<name, 0>,
Pseudo<(outs zprty:$Zd), (ins zprty:$Passthru, PPR3bAny:$Pg, zprty:$Zs), []>;
}
//===----------------------------------------------------------------------===//
// SVE Predicate Misc Group
//===----------------------------------------------------------------------===//
@ -3252,26 +3286,46 @@ class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = zprty.ElementSize;
}
multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm,
SDPatternOperator op> {
def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>,
SVEPseudo2Instr<NAME # _S, 1>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
}
multiclass sve2_int_un_pred_arit<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>;
def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>;
def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>;
def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>,
SVEPseudo2Instr<NAME # _B, 1>;
def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>,
SVEPseudo2Instr<NAME # _H, 1>;
def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>,
SVEPseudo2Instr<NAME # _S, 1>;
def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>,
SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
defm : SVE_3_Op_Undef_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
defm : SVE_3_Op_Undef_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
defm : SVE_3_Op_Undef_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
//===----------------------------------------------------------------------===//
@ -3872,67 +3926,122 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = zprty.ElementSize;
}
multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>;
def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>,
SVEPseudo2Instr<NAME # _B, 1>;
def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>,
SVEPseudo2Instr<NAME # _H, 1>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
SVEPseudo2Instr<NAME # _S, 1>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
SDPatternOperator op> {
def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>,
SVEPseudo2Instr<NAME # _H, 1>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
SVEPseudo2Instr<NAME # _S, 1>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_InReg_Extend<nxv8i16, op, nxv8i1, nxv8i8, !cast<Instruction>(NAME # _H)>;
def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>;
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>;
def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
defm : SVE_InReg_Extend_PassthruUndef<nxv8i16, op, nxv8i1, nxv8i8, !cast<Pseudo>(NAME # _UNDEF_H)>;
defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i8, !cast<Pseudo>(NAME # _UNDEF_S)>;
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i8, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
SDPatternOperator op> {
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
SVEPseudo2Instr<NAME # _S, 1>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>;
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>;
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i16, !cast<Pseudo>(NAME # _UNDEF_S)>;
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i16, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
SDPatternOperator op> {
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>;
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i32, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>;
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>,
SVEPseudo2Instr<NAME # _B, 1>;
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>,
SVEPseudo2Instr<NAME # _H, 1>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>,
SVEPseudo2Instr<NAME # _S, 1>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>,
SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator op> {
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>,
SVEPseudo2Instr<NAME # _H, 1>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>,
SVEPseudo2Instr<NAME # _S, 1>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>,
SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
@ -3940,6 +4049,17 @@ multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
//===----------------------------------------------------------------------===//

View File

@ -33,6 +33,7 @@ define <vscale x 2 x i64> @sti32ldi32ext(<vscale x 2 x i32>* nocapture %P, <vsca
; CHECK-LABEL: sti32ldi32ext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: sxtw z1.d, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
; CHECK-NEXT: mov z0.d, z1.d

View File

@ -32,6 +32,7 @@ define <vscale x 2 x i64> @no_dag_combine_sext(<vscale x 2 x i1> %pg,
; CHECK-LABEL: no_dag_combine_sext
; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
; CHECK-NEXT: st1b { z1.d }, p1, [x0]
; CHECK-NEXT: ret

View File

@ -9,6 +9,7 @@ define <vscale x 2 x i64> @masked_sgather_sext(i8* %base, <vscale x 2 x i64> %of
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sxtb z2.d, p0/m, z0.d
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: sxtb z0.d, p0/m, z0.d

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,273 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
;
; SQABS (sve2_int_un_pred_arit)
;
; Check movprfx is not inserted when dstReg == srcReg
define <vscale x 16 x i8> @sqabs_i8_dupreg(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: sqabs_i8_dupreg:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: sqabs z0.b, p0/m, z0.b
; CHECK-NEXT: ret
%pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
ret <vscale x 16 x i8> %ret
}
; Check movprfx is inserted when passthru is undef
define <vscale x 16 x i8> @sqabs_i8_undef(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: sqabs_i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
; CHECK-NEXT: ret
%pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %ret
}
; Check movprfx is inserted when predicate is all active, making the passthru dead
define <vscale x 16 x i8> @sqabs_i8_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: sqabs_i8_active:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
; CHECK-NEXT: ret
%pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %ret
}
; Check movprfx is not inserted when predicate is not all active, making the passthru used
define <vscale x 16 x i8> @sqabs_i8_not_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: sqabs_i8_not_active:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
; CHECK-NEXT: ret
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
%ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg.to, <vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %ret
}
define <vscale x 8 x i16> @sqabs_i16_dupreg(<vscale x 8 x i16> %a) #0 {
; CHECK-LABEL: sqabs_i16_dupreg:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sqabs z0.h, p0/m, z0.h
; CHECK-NEXT: ret
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
ret <vscale x 8 x i16> %ret
}
define <vscale x 8 x i16> @sqabs_i16_undef(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: sqabs_i16_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %ret
}
define <vscale x 8 x i16> @sqabs_i16_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: sqabs_i16_active:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %ret
}
define <vscale x 8 x i16> @sqabs_i16_not_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: sqabs_i16_not_active:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
%pg.from = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg.to)
%ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg.from, <vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %ret
}
define <vscale x 4 x i32> @sqabs_i32_dupreg(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: sqabs_i32_dupreg:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sqabs z0.s, p0/m, z0.s
; CHECK-NEXT: ret
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
ret <vscale x 4 x i32> %ret
}
define <vscale x 4 x i32> @sqabs_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: sqabs_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %ret
}
define <vscale x 4 x i32> @sqabs_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: sqabs_i32_active:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %ret
}
define <vscale x 4 x i32> @sqabs_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: sqabs_i32_not_active:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
%pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %ret
}
define <vscale x 2 x i64> @sqabs_i64_dupreg(<vscale x 2 x i64> %a) #0 {
; CHECK-LABEL: sqabs_i64_dupreg:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sqabs z0.d, p0/m, z0.d
; CHECK-NEXT: ret
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
ret <vscale x 2 x i64> %ret
}
define <vscale x 2 x i64> @sqabs_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: sqabs_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
}
define <vscale x 2 x i64> @sqabs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: sqabs_i64_active:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
}
define <vscale x 2 x i64> @sqabs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: sqabs_i64_not_active:
; CHECK: // %bb.0:
; CHECK: sqabs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
}
;
; URECPE (sve2_int_un_pred_arit_s)
;
define <vscale x 4 x i32> @urecpe_i32_dupreg(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: urecpe_i32_dupreg:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: urecpe z0.s, p0/m, z0.s
; CHECK-NEXT: ret
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
ret <vscale x 4 x i32> %ret
}
define <vscale x 4 x i32> @urecpe_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: urecpe_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %ret
}
define <vscale x 4 x i32> @urecpe_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: urecpe_i32_active:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %ret
}
define <vscale x 4 x i32> @urecpe_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: urecpe_i32_not_active:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
%pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
%ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %ret
}
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
attributes #0 = { nounwind "target-features"="+sve2" }