diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 222b5bca7a6..323e900a5f7 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -258,6 +258,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); + setOperationAction(ISD::SETCC, VT, Custom); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); @@ -334,6 +335,12 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); + + // Predicate types + const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1}; + for (auto VT : pTypes) { + addRegisterClass(VT, &ARM::VCCRRegClass); + } } ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, @@ -1500,6 +1507,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; + case ARMISD::VCNE: return "ARMISD::VCNE"; + case ARMISD::VCNEZ: return "ARMISD::VCNEZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; @@ -1601,6 +1610,11 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(DL); + + // MVE has a predicate register. + if (Subtarget->hasMVEIntegerOps() && + (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) + return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); return VT.changeVectorElementTypeToInteger(); } @@ -5849,7 +5863,8 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); } -static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { SDValue TmpOp0, TmpOp1; bool Invert = false; bool Swap = false; @@ -5858,11 +5873,23 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); + EVT CmpVT; + if (ST->hasNEON()) + CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); + else { + assert(ST->hasMVEIntegerOps() && + "No hardware support for integer vector comparison!"); + + if (Op.getValueType().getVectorElementType() != MVT::i1) + return SDValue(); + + CmpVT = VT; + } + if (Op0.getValueType().getVectorElementType() == MVT::i64 && (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) { // Special-case integer 64-bit equality comparisons. They aren't legal, @@ -5930,7 +5957,12 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); - case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; + case ISD::SETNE: + if (ST->hasMVEIntegerOps()) { + Opc = ARMISD::VCNE; break; + } else { + Invert = true; LLVM_FALLTHROUGH; + } case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGT: Opc = ARMISD::VCGT; break; @@ -5943,7 +5975,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { } // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). - if (Opc == ARMISD::VCEQ) { + if (ST->hasNEON() && Opc == ARMISD::VCEQ) { SDValue AndOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) AndOp = Op0; @@ -5982,6 +6014,9 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue Result; if (SingleOp.getNode()) { switch (Opc) { + case ARMISD::VCNE: + assert(ST->hasMVEIntegerOps() && "Unexpected DAG node"); + Result = DAG.getNode(ARMISD::VCNEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCEQ: Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGE: @@ -8436,7 +8471,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); - case ISD::SETCC: return LowerVSETCC(Op, DAG); + case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget); case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); @@ -13594,6 +13629,14 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, if (!Subtarget->hasMVEIntegerOps()) return false; + + // These are for predicates + if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1)) { + if (Fast) + *Fast = true; + return true; + } + if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 && Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 && Ty != MVT::v2f64 && diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 84f2f7239fe..983387cee8e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -131,6 +131,8 @@ class VectorType; VCEQ, // Vector compare equal. VCEQZ, // Vector compare equal to zero. + VCNE, // Vector compare not equal (MVE) + VCNEZ, // Vector compare not equal to zero (MVE) VCGE, // Vector compare greater than or equal. VCGEZ, // Vector compare greater than or equal to zero. VCLEZ, // Vector compare less than or equal to zero. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 74f0c3dd964..8290a858f96 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -265,9 +265,26 @@ def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>; def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>; def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>; +def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; +def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; + +def ARMvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; +def ARMvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; +def ARMvcne : SDNode<"ARMISD::VCNE", SDTARMVCMP>; +def ARMvcnez : SDNode<"ARMISD::VCNEZ", SDTARMVCMPZ>; +def ARMvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; +def ARMvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; +def ARMvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; +def ARMvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; +def ARMvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; +def ARMvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; +def ARMvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; +def ARMvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; + def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMLoLoop, [SDNPHasChain]>; def ARMLE : SDNode<"ARMISD::LE", SDT_ARMLoLoop, [SDNPHasChain]>; def ARMLoopDec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index f96b5b91486..e9af0ed16b4 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -2982,6 +2982,40 @@ def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>; def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>; def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>; +multiclass unpred_vcmp_z { + def i8 : Pat<(v16i1 (opnode (v16i8 MQPR:$v1))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>; + def i16 : Pat<(v8i1 (opnode (v8i16 MQPR:$v1))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>; + def i32 : Pat<(v4i1 (opnode (v4i32 MQPR:$v1))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>; +} + +multiclass unpred_vcmp_r { + def i8 : Pat<(v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>; + def i16 : Pat<(v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>; + def i32 : Pat<(v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>; +} + +let Predicates = [HasMVEInt] in { + defm MVE_VCEQZ : unpred_vcmp_z; + defm MVE_VCNEZ : unpred_vcmp_z; + defm MVE_VCLEZ : unpred_vcmp_z; + defm MVE_VCGTZ : unpred_vcmp_z; + defm MVE_VCLTZ : unpred_vcmp_z; + defm MVE_VCGEZ : unpred_vcmp_z; + + defm MVE_VCEQ : unpred_vcmp_r; + defm MVE_VCNE : unpred_vcmp_r; + defm MVE_VCGT : unpred_vcmp_r; + defm MVE_VCGE : unpred_vcmp_r; + defm MVE_VCGTU : unpred_vcmp_r; + defm MVE_VCGEU : unpred_vcmp_r; +} + // end of MVE compares // start of MVE_qDest_qSrc @@ -4369,6 +4403,15 @@ foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32", def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm", (MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; +} + def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary, "vpnot", "", "", vpred_n, "", []> { let Inst{31-0} = 0b11111110001100010000111101001101; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 68342bc94c3..8d7a4d0da9d 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -478,19 +478,6 @@ def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; -def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; - -def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; -def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; -def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; -def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; -def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; -def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; -def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; -def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; -def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; -def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; // Types for vector shift by immediates. The "SHX" version is for long and @@ -5027,66 +5014,66 @@ def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), // VCEQ : Vector Compare Equal defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; + IIC_VSUBi4Q, "vceq", "i", ARMvceq, 1>; def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, - NEONvceq, 1>; + ARMvceq, 1>; def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, - NEONvceq, 1>; + ARMvceq, 1>; def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, - NEONvceq, 1>, + ARMvceq, 1>, Requires<[HasNEON, HasFullFP16]>; def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, - NEONvceq, 1>, + ARMvceq, 1>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$Vd, $Vm, #0", NEONvceqz>; + "$Vd, $Vm, #0", ARMvceqz>; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; + IIC_VSUBi4Q, "vcge", "s", ARMvcge, 0>; defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; + IIC_VSUBi4Q, "vcge", "u", ARMvcgeu, 0>; def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, - NEONvcge, 0>; + ARMvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, - NEONvcge, 0>; + ARMvcge, 0>; def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, - NEONvcge, 0>, + ARMvcge, 0>, Requires<[HasNEON, HasFullFP16]>; def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, - NEONvcge, 0>, + ARMvcge, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", - "$Vd, $Vm, #0", NEONvcgez>; + "$Vd, $Vm, #0", ARMvcgez>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", - "$Vd, $Vm, #0", NEONvclez>; + "$Vd, $Vm, #0", ARMvclez>; } // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; + IIC_VSUBi4Q, "vcgt", "s", ARMvcgt, 0>; defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; + IIC_VSUBi4Q, "vcgt", "u", ARMvcgtu, 0>; def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, - NEONvcgt, 0>; + ARMvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, - NEONvcgt, 0>; + ARMvcgt, 0>; def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, - NEONvcgt, 0>, + ARMvcgt, 0>, Requires<[HasNEON, HasFullFP16]>; def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, - NEONvcgt, 0>, + ARMvcgt, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", - "$Vd, $Vm, #0", NEONvcgtz>; + "$Vd, $Vm, #0", ARMvcgtz>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", - "$Vd, $Vm, #0", NEONvcltz>; + "$Vd, $Vm, #0", ARMvcltz>; } // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) diff --git a/test/CodeGen/Thumb2/mve-vcmp.ll b/test/CodeGen/Thumb2/mve-vcmp.ll new file mode 100644 index 00000000000..79535eaa1ea --- /dev/null +++ b/test/CodeGen/Thumb2/mve-vcmp.ll @@ -0,0 +1,449 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @vcmp_eq_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_eq_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 eq, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <4 x i32> %src, %srcb + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ne_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ne_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 ne, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ne <4 x i32> %src, %srcb + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_sgt_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_sgt_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 gt, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %src, %srcb + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_sge_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_sge_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 ge, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp sge <4 x i32> %src, %srcb + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_slt_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_slt_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 gt, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp slt <4 x i32> %src, %srcb + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_sle_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_sle_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 ge, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp sle <4 x i32> %src, %srcb + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ugt_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ugt_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u32 hi, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ugt <4 x i32> %src, %srcb + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_uge_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_uge_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u32 cs, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp uge <4 x i32> %src, %srcb + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ult_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ult_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u32 hi, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ult <4 x i32> %src, %srcb + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ule_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ule_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u32 cs, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ule <4 x i32> %src, %srcb + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + + +define arm_aapcs_vfpcc <8 x i16> @vcmp_eq_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_eq_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 eq, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <8 x i16> %src, %srcb + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ne_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ne_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 ne, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ne <8 x i16> %src, %srcb + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_sgt_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_sgt_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 gt, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %src, %srcb + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_sge_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_sge_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 ge, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp sge <8 x i16> %src, %srcb + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_slt_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_slt_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 gt, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp slt <8 x i16> %src, %srcb + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_sle_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_sle_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 ge, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp sle <8 x i16> %src, %srcb + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ugt_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ugt_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u16 hi, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ugt <8 x i16> %src, %srcb + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_uge_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_uge_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u16 cs, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp uge <8 x i16> %src, %srcb + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ult_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ult_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u16 hi, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ult <8 x i16> %src, %srcb + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ule_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ule_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u16 cs, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ule <8 x i16> %src, %srcb + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + + +define arm_aapcs_vfpcc <16 x i8> @vcmp_eq_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_eq_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 eq, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <16 x i8> %src, %srcb + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ne_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ne_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 ne, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ne <16 x i8> %src, %srcb + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_sgt_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_sgt_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 gt, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp sgt <16 x i8> %src, %srcb + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_sge_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_sge_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 ge, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp sge <16 x i8> %src, %srcb + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_slt_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_slt_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 gt, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp slt <16 x i8> %src, %srcb + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_sle_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_sle_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 ge, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp sle <16 x i8> %src, %srcb + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ugt_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ugt_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u8 hi, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ugt <16 x i8> %src, %srcb + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_uge_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_uge_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u8 cs, q0, q1 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp uge <16 x i8> %src, %srcb + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ult_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ult_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u8 hi, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ult <16 x i8> %src, %srcb + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ule_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ule_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.u8 cs, q1, q0 +; CHECK-NEXT: vpsel q0, q2, q3 +; CHECK-NEXT: bx lr +entry: + %c = icmp ule <16 x i8> %src, %srcb + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + + +define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, <2 x i64> %srcb, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vcmp_eq_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, s5 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: vmov r1, s4 +; CHECK-NEXT: eors r1, r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov r0, s7 +; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: eors r1, r2 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vbic q0, q3, q4 +; CHECK-NEXT: vand q1, q2, q4 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <2 x i64> %src, %srcb + %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + +define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, <2 x i64> %srcb, <2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vcmp_eq_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, s5 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: vmov r1, s4 +; CHECK-NEXT: eors r1, r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: vmov r0, s7 +; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: eors r1, r2 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vbic q0, q3, q4 +; CHECK-NEXT: vand q1, q2, q4 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <2 x i64> %src, %srcb + %s = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %s +} diff --git a/test/CodeGen/Thumb2/mve-vcmpz.ll b/test/CodeGen/Thumb2/mve-vcmpz.ll new file mode 100644 index 00000000000..65c370348cb --- /dev/null +++ b/test/CodeGen/Thumb2/mve-vcmpz.ll @@ -0,0 +1,424 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @vcmp_eqz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_eqz_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 eq, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <4 x i32> %src, zeroinitializer + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_nez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_nez_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ne <4 x i32> %src, zeroinitializer + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_sgtz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_sgtz_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 gt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %src, zeroinitializer + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_sgez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_sgez_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 ge, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sge <4 x i32> %src, zeroinitializer + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_sltz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_sltz_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 lt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp slt <4 x i32> %src, zeroinitializer + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_slez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_slez_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s32 le, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sle <4 x i32> %src, zeroinitializer + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ugtz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ugtz_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ugt <4 x i32> %src, zeroinitializer + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ugez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ugez_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %c = icmp uge <4 x i32> %src, zeroinitializer + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ultz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ultz_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ult <4 x i32> %src, zeroinitializer + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @vcmp_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vcmp_ulez_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ule <4 x i32> %src, zeroinitializer + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + + +define arm_aapcs_vfpcc <8 x i16> @vcmp_eqz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_eqz_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 eq, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <8 x i16> %src, zeroinitializer + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_nez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_nez_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ne <8 x i16> %src, zeroinitializer + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_sgtz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_sgtz_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 gt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %src, zeroinitializer + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_sgez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_sgez_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 ge, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sge <8 x i16> %src, zeroinitializer + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_sltz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_sltz_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 lt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp slt <8 x i16> %src, zeroinitializer + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_slez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_slez_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s16 le, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sle <8 x i16> %src, zeroinitializer + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ugtz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ugtz_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ugt <8 x i16> %src, zeroinitializer + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ugez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ugez_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %c = icmp uge <8 x i16> %src, zeroinitializer + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ultz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ultz_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ult <8 x i16> %src, zeroinitializer + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <8 x i16> @vcmp_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vcmp_ulez_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u16 cs, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ule <8 x i16> %src, zeroinitializer + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %s +} + + +define arm_aapcs_vfpcc <16 x i8> @vcmp_eqz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_eqz_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 eq, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <16 x i8> %src, zeroinitializer + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_nez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_nez_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ne <16 x i8> %src, zeroinitializer + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_sgtz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_sgtz_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 gt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sgt <16 x i8> %src, zeroinitializer + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_sgez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_sgez_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 ge, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sge <16 x i8> %src, zeroinitializer + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_sltz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_sltz_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 lt, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp slt <16 x i8> %src, zeroinitializer + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_slez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_slez_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.s8 le, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp sle <16 x i8> %src, zeroinitializer + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ugtz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ugtz_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcmp.i8 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ugt <16 x i8> %src, zeroinitializer + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ugez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ugez_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %c = icmp uge <16 x i8> %src, zeroinitializer + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ultz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ultz_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ult <16 x i8> %src, zeroinitializer + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <16 x i8> @vcmp_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vcmp_ulez_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u8 cs, q3, q0 +; CHECK-NEXT: vpsel q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %c = icmp ule <16 x i8> %src, zeroinitializer + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %s +} + + +define arm_aapcs_vfpcc <2 x i64> @vcmp_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vcmp_eqz_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vbic q0, q2, q3 +; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <2 x i64> %src, zeroinitializer + %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + +define arm_aapcs_vfpcc <2 x i32> @vcmp_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vcmp_eqz_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q3[0], r0 +; CHECK-NEXT: vmov.32 q3[1], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.32 q3[3], r0 +; CHECK-NEXT: vbic q0, q2, q3 +; CHECK-NEXT: vand q1, q1, q3 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <2 x i64> %src, zeroinitializer + %s = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %s +} diff --git a/test/CodeGen/Thumb2/mve-vpsel.ll b/test/CodeGen/Thumb2/mve-vpsel.ll new file mode 100644 index 00000000000..5c4055a5b26 --- /dev/null +++ b/test/CodeGen/Thumb2/mve-vpsel.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @vpsel_i8(<16 x i1> *%mask, <16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: vpsel_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr p0, [r0] +; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = load <16 x i1>, <16 x i1>* %mask, align 4 + %1 = select <16 x i1> %0, <16 x i8> %src1, <16 x i8> %src2 + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @vpsel_i16(<8 x i1> *%mask, <8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: vpsel_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr p0, [r0] +; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = load <8 x i1>, <8 x i1>* %mask, align 4 + %1 = select <8 x i1> %0, <8 x i16> %src1, <8 x i16> %src2 + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @vpsel_i32(<4 x i1> *%mask, <4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: vpsel_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr p0, [r0] +; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = load <4 x i1>, <4 x i1>* %mask, align 4 + %1 = select <4 x i1> %0, <4 x i32> %src1, <4 x i32> %src2 + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @foo(<4 x i32> %vec.ind) { +; CHECK-LABEL: foo: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: vand q2, q0, q2 +; CHECK-NEXT: vcmp.i32 eq, q2, zr +; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: bx lr + %tmp = and <4 x i32> %vec.ind, + %tmp1 = icmp eq <4 x i32> %tmp, zeroinitializer + %tmp2 = select <4 x i1> %tmp1, <4 x i32> %vec.ind, <4 x i32> zeroinitializer + ret <4 x i32> %tmp2 +}