mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
[SVE] Change definition of reduction ISD nodes to have an SVE vector result type.
The current nodes, AArch64::SMAXV_PRED for example, are defined to return a NEON vector result. This is incorrect because they modify the complete SVE register and are thus changed to represent such. This patch also adds nodes for UADDV_PRED and SADDV_PRED, which unifies the handling of all SVE reductions. NOTE: Floating-point reductions are already implemented correctly, so this patch is essentially making everything consistent with those. Differential Revision: https://reviews.llvm.org/D87843
This commit is contained in:
parent
6eee9eb46a
commit
37b56bbc5e
@ -116,6 +116,27 @@ EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
|
||||
/// Value type used for condition codes.
|
||||
static const MVT MVT_CC = MVT::i32;
|
||||
|
||||
static inline EVT getPackedSVEVectorVT(EVT VT) {
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("unexpected element type for vector");
|
||||
case MVT::i8:
|
||||
return MVT::nxv16i8;
|
||||
case MVT::i16:
|
||||
return MVT::nxv8i16;
|
||||
case MVT::i32:
|
||||
return MVT::nxv4i32;
|
||||
case MVT::i64:
|
||||
return MVT::nxv2i64;
|
||||
case MVT::f16:
|
||||
return MVT::nxv8f16;
|
||||
case MVT::f32:
|
||||
return MVT::nxv4f32;
|
||||
case MVT::f64:
|
||||
return MVT::nxv2f64;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if VT's elements occupy the lowest bit positions of its
|
||||
/// associated register class without any intervening space.
|
||||
///
|
||||
@ -1587,6 +1608,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
MAKE_CASE(AArch64ISD::UMINV)
|
||||
MAKE_CASE(AArch64ISD::SMAXV)
|
||||
MAKE_CASE(AArch64ISD::UMAXV)
|
||||
MAKE_CASE(AArch64ISD::SADDV_PRED)
|
||||
MAKE_CASE(AArch64ISD::UADDV_PRED)
|
||||
MAKE_CASE(AArch64ISD::SMAXV_PRED)
|
||||
MAKE_CASE(AArch64ISD::UMAXV_PRED)
|
||||
MAKE_CASE(AArch64ISD::SMINV_PRED)
|
||||
@ -12221,34 +12244,6 @@ static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
|
||||
DAG.getConstant(0, dl, MVT::i64));
|
||||
}
|
||||
|
||||
static SDValue LowerSVEIntReduction(SDNode *N, unsigned Opc,
|
||||
SelectionDAG &DAG) {
|
||||
SDLoc dl(N);
|
||||
LLVMContext &Ctx = *DAG.getContext();
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Pred = N->getOperand(1);
|
||||
SDValue Data = N->getOperand(2);
|
||||
EVT DataVT = Data.getValueType();
|
||||
|
||||
if (DataVT.getVectorElementType().isScalarInteger() &&
|
||||
(VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)) {
|
||||
if (!TLI.isTypeLegal(DataVT))
|
||||
return SDValue();
|
||||
|
||||
EVT OutputVT = EVT::getVectorVT(Ctx, VT,
|
||||
AArch64::NeonBitsPerVector / VT.getSizeInBits());
|
||||
SDValue Reduce = DAG.getNode(Opc, dl, OutputVT, Pred, Data);
|
||||
SDValue Zero = DAG.getConstant(0, dl, MVT::i64);
|
||||
SDValue Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Reduce, Zero);
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
|
||||
SDLoc DL(N);
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
@ -12392,6 +12387,25 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
|
||||
return DAG.getZExtOrTrunc(Res, DL, VT);
|
||||
}
|
||||
|
||||
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
|
||||
SelectionDAG &DAG) {
|
||||
SDLoc DL(N);
|
||||
|
||||
SDValue Pred = N->getOperand(1);
|
||||
SDValue VecToReduce = N->getOperand(2);
|
||||
|
||||
// NOTE: The integer reduction's result type is not always linked to the
|
||||
// operand's element type so we construct it from the intrinsic's result type.
|
||||
EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
|
||||
SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
|
||||
|
||||
// SVE reductions set the whole vector register with the first element
|
||||
// containing the reduction result, which we'll now extract.
|
||||
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
|
||||
Zero);
|
||||
}
|
||||
|
||||
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
|
||||
SelectionDAG &DAG) {
|
||||
SDLoc DL(N);
|
||||
@ -12505,20 +12519,28 @@ static SDValue performIntrinsicCombine(SDNode *N,
|
||||
case Intrinsic::aarch64_crc32h:
|
||||
case Intrinsic::aarch64_crc32ch:
|
||||
return tryCombineCRC32(0xffff, N, DAG);
|
||||
case Intrinsic::aarch64_sve_saddv:
|
||||
// There is no i64 version of SADDV because the sign is irrelevant.
|
||||
if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
|
||||
return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
|
||||
else
|
||||
return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
|
||||
case Intrinsic::aarch64_sve_uaddv:
|
||||
return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
|
||||
case Intrinsic::aarch64_sve_smaxv:
|
||||
return LowerSVEIntReduction(N, AArch64ISD::SMAXV_PRED, DAG);
|
||||
return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
|
||||
case Intrinsic::aarch64_sve_umaxv:
|
||||
return LowerSVEIntReduction(N, AArch64ISD::UMAXV_PRED, DAG);
|
||||
return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
|
||||
case Intrinsic::aarch64_sve_sminv:
|
||||
return LowerSVEIntReduction(N, AArch64ISD::SMINV_PRED, DAG);
|
||||
return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
|
||||
case Intrinsic::aarch64_sve_uminv:
|
||||
return LowerSVEIntReduction(N, AArch64ISD::UMINV_PRED, DAG);
|
||||
return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
|
||||
case Intrinsic::aarch64_sve_orv:
|
||||
return LowerSVEIntReduction(N, AArch64ISD::ORV_PRED, DAG);
|
||||
return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
|
||||
case Intrinsic::aarch64_sve_eorv:
|
||||
return LowerSVEIntReduction(N, AArch64ISD::EORV_PRED, DAG);
|
||||
return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
|
||||
case Intrinsic::aarch64_sve_andv:
|
||||
return LowerSVEIntReduction(N, AArch64ISD::ANDV_PRED, DAG);
|
||||
return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
|
||||
case Intrinsic::aarch64_sve_index:
|
||||
return LowerSVEIntrinsicIndex(N, DAG);
|
||||
case Intrinsic::aarch64_sve_dup:
|
||||
|
@ -226,6 +226,8 @@ enum NodeType : unsigned {
|
||||
SMAXV,
|
||||
UMAXV,
|
||||
|
||||
SADDV_PRED,
|
||||
UADDV_PRED,
|
||||
SMAXV_PRED,
|
||||
UMAXV_PRED,
|
||||
SMINV_PRED,
|
||||
|
@ -152,6 +152,8 @@ def AArch64fmaxv_p : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>;
|
||||
def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>;
|
||||
def AArch64fminv_p : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>;
|
||||
def AArch64fminnmv_p : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>;
|
||||
def AArch64saddv_p : SDNode<"AArch64ISD::SADDV_PRED", SDT_AArch64Reduce>;
|
||||
def AArch64uaddv_p : SDNode<"AArch64ISD::UADDV_PRED", SDT_AArch64Reduce>;
|
||||
def AArch64smaxv_p : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;
|
||||
def AArch64umaxv_p : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;
|
||||
def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
|
||||
@ -307,8 +309,8 @@ let Predicates = [HasSVE] in {
|
||||
defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls, sub, AArch64mul_p_oneuse>;
|
||||
|
||||
// SVE predicated integer reductions.
|
||||
defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>;
|
||||
defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", int_aarch64_sve_uaddv, int_aarch64_sve_saddv>;
|
||||
defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>;
|
||||
defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", AArch64uaddv_p>;
|
||||
defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_p>;
|
||||
defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_p>;
|
||||
defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_p>;
|
||||
|
@ -348,11 +348,6 @@ class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
|
||||
: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)),
|
||||
(inst $Op1, $Op2)>;
|
||||
|
||||
class SVE_2_Op_Pat_Reduce_To_Neon<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
ValueType vt2, Instruction inst, SubRegIndex sub>
|
||||
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
|
||||
(INSERT_SUBREG (vtd (IMPLICIT_DEF)), (inst $Op1, $Op2), sub)>;
|
||||
|
||||
class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
|
||||
ValueType vt2, ValueType vt3, Instruction inst>
|
||||
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
|
||||
@ -4535,7 +4530,6 @@ multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE Floating Point Accumulating Reduction Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -7173,8 +7167,8 @@ multiclass sve_int_bin_cons_misc_0_c_fexpa<string asm, SDPatternOperator op> {
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
|
||||
ZPRRegOp zprty, RegisterClass regtype>
|
||||
: I<(outs regtype:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
|
||||
ZPRRegOp zprty, FPRasZPROperand dstOpType>
|
||||
: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
|
||||
asm, "\t$Vd, $Pg, $Zn",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
@ -7192,51 +7186,54 @@ class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
|
||||
let Inst{4-0} = Vd;
|
||||
}
|
||||
|
||||
multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>;
|
||||
def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>;
|
||||
def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>;
|
||||
multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>;
|
||||
def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>;
|
||||
def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>;
|
||||
|
||||
def : SVE_2_Op_Pat<i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||
def : SVE_2_Op_Pat<i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_2_Op_Pat<i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||
def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm, SDPatternOperator op, SDPatternOperator opSaddv> {
|
||||
def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>;
|
||||
def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>;
|
||||
def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>;
|
||||
def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64>;
|
||||
multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>;
|
||||
def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>;
|
||||
def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>;
|
||||
def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64asZPR>;
|
||||
|
||||
def : SVE_2_Op_Pat<i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||
def : SVE_2_Op_Pat<i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_2_Op_Pat<i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_2_Op_Pat<i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
def : SVE_2_Op_Pat<i64, opSaddv, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||
def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_reduce_1<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8>;
|
||||
def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16>;
|
||||
def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32>;
|
||||
def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64>;
|
||||
multiclass sve_int_reduce_1<bits<3> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8asZPR>;
|
||||
def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16asZPR>;
|
||||
def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32asZPR>;
|
||||
def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64asZPR>;
|
||||
|
||||
def : SVE_2_Op_Pat_Reduce_To_Neon<v16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B), bsub>;
|
||||
def : SVE_2_Op_Pat_Reduce_To_Neon<v8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H), hsub>;
|
||||
def : SVE_2_Op_Pat_Reduce_To_Neon<v4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S), ssub>;
|
||||
def : SVE_2_Op_Pat_Reduce_To_Neon<v2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D), dsub>;
|
||||
def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
multiclass sve_int_reduce_2<bits<3> opc, string asm, SDPatternOperator op> {
|
||||
def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8>;
|
||||
def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16>;
|
||||
def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>;
|
||||
def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>;
|
||||
multiclass sve_int_reduce_2<bits<3> opc, string asm,
|
||||
SDPatternOperator op> {
|
||||
def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8asZPR>;
|
||||
def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16asZPR>;
|
||||
def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32asZPR>;
|
||||
def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64asZPR>;
|
||||
|
||||
def : SVE_2_Op_Pat_Reduce_To_Neon<v16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B), bsub>;
|
||||
def : SVE_2_Op_Pat_Reduce_To_Neon<v8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H), hsub>;
|
||||
def : SVE_2_Op_Pat_Reduce_To_Neon<v4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S), ssub>;
|
||||
def : SVE_2_Op_Pat_Reduce_To_Neon<v2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D), dsub>;
|
||||
def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||
}
|
||||
|
||||
class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
|
||||
|
@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
@ -6,9 +7,10 @@
|
||||
|
||||
define i64 @saddv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: saddv_i8:
|
||||
; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.b
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: saddv d0, p0, z0.b
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> %pg,
|
||||
<vscale x 16 x i8> %a)
|
||||
ret i64 %out
|
||||
@ -16,9 +18,10 @@ define i64 @saddv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
|
||||
define i64 @saddv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: saddv_i16:
|
||||
; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.h
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: saddv d0, p0, z0.h
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x i16> %a)
|
||||
ret i64 %out
|
||||
@ -27,19 +30,21 @@ define i64 @saddv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
|
||||
define i64 @saddv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: saddv_i32:
|
||||
; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.s
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: saddv d0, p0, z0.s
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %a)
|
||||
ret i64 %out
|
||||
}
|
||||
|
||||
define i64 @saddv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: saddv_i64
|
||||
; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.d
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-LABEL: saddv_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %a)
|
||||
ret i64 %out
|
||||
@ -47,9 +52,10 @@ define i64 @saddv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
|
||||
define i64 @uaddv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: uaddv_i8:
|
||||
; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.b
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.b
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> %pg,
|
||||
<vscale x 16 x i8> %a)
|
||||
ret i64 %out
|
||||
@ -57,9 +63,10 @@ define i64 @uaddv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
|
||||
define i64 @uaddv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: uaddv_i16:
|
||||
; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.h
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.h
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x i16> %a)
|
||||
ret i64 %out
|
||||
@ -68,9 +75,10 @@ define i64 @uaddv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
|
||||
define i64 @uaddv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: uaddv_i32:
|
||||
; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.s
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.s
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %a)
|
||||
ret i64 %out
|
||||
@ -78,9 +86,10 @@ define i64 @uaddv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
|
||||
define i64 @uaddv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: uaddv_i64:
|
||||
; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.d
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %a)
|
||||
ret i64 %out
|
||||
@ -88,9 +97,10 @@ define i64 @uaddv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
|
||||
define i8 @smaxv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: smaxv_i8:
|
||||
; CHECK: smaxv b[[REDUCE:[0-9]+]], p0, z0.b
|
||||
; CHECK: umov w0, v[[REDUCE]].b[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smaxv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> %pg,
|
||||
<vscale x 16 x i8> %a)
|
||||
ret i8 %out
|
||||
@ -98,9 +108,10 @@ define i8 @smaxv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
|
||||
define i16 @smaxv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: smaxv_i16:
|
||||
; CHECK: smaxv h[[REDUCE:[0-9]+]], p0, z0.h
|
||||
; CHECK: umov w0, v[[REDUCE]].h[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smaxv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x i16> %a)
|
||||
ret i16 %out
|
||||
@ -108,9 +119,10 @@ define i16 @smaxv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
|
||||
define i32 @smaxv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: smaxv_i32:
|
||||
; CHECK: smaxv s[[REDUCE:[0-9]+]], p0, z0.s
|
||||
; CHECK: fmov w0, s[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smaxv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %a)
|
||||
ret i32 %out
|
||||
@ -118,9 +130,10 @@ define i32 @smaxv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
|
||||
define i64 @smaxv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: smaxv_i64:
|
||||
; CHECK: smaxv d[[REDUCE:[0-9]+]], p0, z0.d
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smaxv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %a)
|
||||
ret i64 %out
|
||||
@ -128,9 +141,10 @@ define i64 @smaxv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
|
||||
define i8 @umaxv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: umaxv_i8:
|
||||
; CHECK: umaxv b[[REDUCE:[0-9]+]], p0, z0.b
|
||||
; CHECK: umov w0, v[[REDUCE]].b[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umaxv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> %pg,
|
||||
<vscale x 16 x i8> %a)
|
||||
ret i8 %out
|
||||
@ -138,9 +152,10 @@ define i8 @umaxv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
|
||||
define i16 @umaxv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: umaxv_i16:
|
||||
; CHECK: umaxv h[[REDUCE:[0-9]+]], p0, z0.h
|
||||
; CHECK: umov w0, v[[REDUCE]].h[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umaxv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x i16> %a)
|
||||
ret i16 %out
|
||||
@ -148,9 +163,10 @@ define i16 @umaxv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
|
||||
define i32 @umaxv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: umaxv_i32:
|
||||
; CHECK: umaxv s[[REDUCE:[0-9]+]], p0, z0.s
|
||||
; CHECK: fmov w0, s[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umaxv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %a)
|
||||
ret i32 %out
|
||||
@ -158,9 +174,10 @@ define i32 @umaxv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
|
||||
define i64 @umaxv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: umaxv_i64:
|
||||
; CHECK: umaxv d[[REDUCE:[0-9]+]], p0, z0.d
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umaxv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %a)
|
||||
ret i64 %out
|
||||
@ -168,9 +185,10 @@ define i64 @umaxv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
|
||||
define i8 @sminv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: sminv_i8:
|
||||
; CHECK: sminv b[[REDUCE:[0-9]+]], p0, z0.b
|
||||
; CHECK: umov w0, v[[REDUCE]].b[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sminv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> %pg,
|
||||
<vscale x 16 x i8> %a)
|
||||
ret i8 %out
|
||||
@ -178,9 +196,10 @@ define i8 @sminv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
|
||||
define i16 @sminv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: sminv_i16:
|
||||
; CHECK: sminv h[[REDUCE:[0-9]+]], p0, z0.h
|
||||
; CHECK: umov w0, v[[REDUCE]].h[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sminv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x i16> %a)
|
||||
ret i16 %out
|
||||
@ -188,9 +207,10 @@ define i16 @sminv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
|
||||
define i32 @sminv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: sminv_i32:
|
||||
; CHECK: sminv s[[REDUCE:[0-9]+]], p0, z0.s
|
||||
; CHECK: fmov w0, s[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sminv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %a)
|
||||
ret i32 %out
|
||||
@ -198,9 +218,10 @@ define i32 @sminv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
|
||||
define i64 @sminv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: sminv_i64:
|
||||
; CHECK: sminv d[[REDUCE:[0-9]+]], p0, z0.d
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sminv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %a)
|
||||
ret i64 %out
|
||||
@ -208,9 +229,10 @@ define i64 @sminv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
|
||||
define i8 @uminv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: uminv_i8:
|
||||
; CHECK: uminv b[[REDUCE:[0-9]+]], p0, z0.b
|
||||
; CHECK: umov w0, v[[REDUCE]].b[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uminv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> %pg,
|
||||
<vscale x 16 x i8> %a)
|
||||
ret i8 %out
|
||||
@ -218,9 +240,10 @@ define i8 @uminv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
|
||||
define i16 @uminv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: uminv_i16:
|
||||
; CHECK: uminv h[[REDUCE:[0-9]+]], p0, z0.h
|
||||
; CHECK: umov w0, v[[REDUCE]].h[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uminv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x i16> %a)
|
||||
ret i16 %out
|
||||
@ -228,9 +251,10 @@ define i16 @uminv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
|
||||
define i32 @uminv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: uminv_i32:
|
||||
; CHECK: uminv s[[REDUCE:[0-9]+]], p0, z0.s
|
||||
; CHECK: fmov w0, s[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uminv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %a)
|
||||
ret i32 %out
|
||||
@ -238,9 +262,10 @@ define i32 @uminv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
|
||||
define i64 @uminv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: uminv_i64:
|
||||
; CHECK: uminv d[[REDUCE:[0-9]+]], p0, z0.d
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uminv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %a)
|
||||
ret i64 %out
|
||||
@ -248,9 +273,10 @@ define i64 @uminv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
|
||||
define i8 @orv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: orv_i8:
|
||||
; CHECK: orv b[[REDUCE:[0-9]+]], p0, z0.b
|
||||
; CHECK: umov w0, v[[REDUCE]].b[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: orv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> %pg,
|
||||
<vscale x 16 x i8> %a)
|
||||
ret i8 %out
|
||||
@ -258,9 +284,10 @@ define i8 @orv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
|
||||
define i16 @orv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: orv_i16:
|
||||
; CHECK: orv h[[REDUCE:[0-9]+]], p0, z0.h
|
||||
; CHECK: umov w0, v[[REDUCE]].h[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: orv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x i16> %a)
|
||||
ret i16 %out
|
||||
@ -268,9 +295,10 @@ define i16 @orv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
|
||||
define i32 @orv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: orv_i32:
|
||||
; CHECK: orv s[[REDUCE:[0-9]+]], p0, z0.s
|
||||
; CHECK: fmov w0, s[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: orv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %a)
|
||||
ret i32 %out
|
||||
@ -278,9 +306,10 @@ define i32 @orv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
|
||||
define i64 @orv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: orv_i64:
|
||||
; CHECK: orv d[[REDUCE:[0-9]+]], p0, z0.d
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: orv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %a)
|
||||
ret i64 %out
|
||||
@ -288,9 +317,10 @@ define i64 @orv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
|
||||
define i8 @eorv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: eorv_i8:
|
||||
; CHECK: eorv b[[REDUCE:[0-9]+]], p0, z0.b
|
||||
; CHECK: umov w0, v[[REDUCE]].b[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: eorv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> %pg,
|
||||
<vscale x 16 x i8> %a)
|
||||
ret i8 %out
|
||||
@ -298,9 +328,10 @@ define i8 @eorv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
|
||||
define i16 @eorv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: eorv_i16:
|
||||
; CHECK: eorv h[[REDUCE:[0-9]+]], p0, z0.h
|
||||
; CHECK: umov w0, v[[REDUCE]].h[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: eorv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x i16> %a)
|
||||
ret i16 %out
|
||||
@ -308,9 +339,10 @@ define i16 @eorv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
|
||||
define i32 @eorv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: eorv_i32:
|
||||
; CHECK: eorv s[[REDUCE:[0-9]+]], p0, z0.s
|
||||
; CHECK: fmov w0, s[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: eorv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %a)
|
||||
ret i32 %out
|
||||
@ -318,9 +350,10 @@ define i32 @eorv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
|
||||
define i64 @eorv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: eorv_i64:
|
||||
; CHECK: eorv d[[REDUCE:[0-9]+]], p0, z0.d
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: eorv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %a)
|
||||
ret i64 %out
|
||||
@ -328,9 +361,10 @@ define i64 @eorv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
|
||||
define i8 @andv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: andv_i8:
|
||||
; CHECK: andv b[[REDUCE:[0-9]+]], p0, z0.b
|
||||
; CHECK: umov w0, v[[REDUCE]].b[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: andv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> %pg,
|
||||
<vscale x 16 x i8> %a)
|
||||
ret i8 %out
|
||||
@ -338,9 +372,10 @@ define i8 @andv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
|
||||
|
||||
define i16 @andv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: andv_i16:
|
||||
; CHECK: andv h[[REDUCE:[0-9]+]], p0, z0.h
|
||||
; CHECK: umov w0, v[[REDUCE]].h[0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: andv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> %pg,
|
||||
<vscale x 8 x i16> %a)
|
||||
ret i16 %out
|
||||
@ -348,9 +383,10 @@ define i16 @andv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
|
||||
|
||||
define i32 @andv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: andv_i32:
|
||||
; CHECK: andv s[[REDUCE:[0-9]+]], p0, z0.s
|
||||
; CHECK: fmov w0, s[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: andv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %a)
|
||||
ret i32 %out
|
||||
@ -358,9 +394,10 @@ define i32 @andv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
|
||||
|
||||
define i64 @andv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: andv_i64:
|
||||
; CHECK: andv d[[REDUCE:[0-9]+]], p0, z0.d
|
||||
; CHECK: fmov x0, d[[REDUCE]]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: andv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%out = call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %a)
|
||||
ret i64 %out
|
||||
|
Loading…
Reference in New Issue
Block a user