From 2e989371420f9e5c9da05be9cfe6d5993f52cb69 Mon Sep 17 00:00:00 2001 From: Kit Barton Date: Tue, 3 Mar 2015 19:55:45 +0000 Subject: [PATCH] Add the following 64-bit vector integer arithmetic instructions added in POWER8: vaddudm vsubudm vmulesw vmulosw vmuleuw vmulouw vmuluwm vmaxsd vmaxud vminsd vminud vcmpequd vcmpequd. vcmpgtsd vcmpgtsd. vcmpgtud vcmpgtud. vrld vsld vsrd vsrad Phabricator review: http://reviews.llvm.org/D7959 llvm-svn: 231115 --- include/llvm/IR/IntrinsicsPowerPC.td | 53 +++- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 8 +- lib/Target/PowerPC/PPCISelLowering.cpp | 78 +++++- lib/Target/PowerPC/PPCInstrAltivec.td | 45 ++- lib/Target/PowerPC/PPCSchedule.td | 14 + lib/Target/PowerPC/README_ALTIVEC.txt | 26 ++ .../CodeGen/PowerPC/vec_add_sub_doubleword.ll | 62 +++++ test/CodeGen/PowerPC/vec_cmpd.ll | 258 ++++++++++++++++++ test/CodeGen/PowerPC/vec_minmax.ll | 34 +++ test/CodeGen/PowerPC/vec_mul_even_odd.ll | 41 +++ test/CodeGen/PowerPC/vec_rotate_shift.ll | 33 +++ .../PowerPC/ppc64-encoding-vmx.txt | 60 ++++ test/MC/PowerPC/ppc64-encoding-vmx.s | 74 ++++- 13 files changed, 768 insertions(+), 18 deletions(-) create mode 100644 test/CodeGen/PowerPC/vec_add_sub_doubleword.ll create mode 100644 test/CodeGen/PowerPC/vec_cmpd.ll create mode 100644 test/CodeGen/PowerPC/vec_minmax.ll create mode 100644 test/CodeGen/PowerPC/vec_mul_even_odd.ll create mode 100644 test/CodeGen/PowerPC/vec_rotate_shift.ll diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td index 110d55d562a..36fb1e94ae2 100644 --- a/include/llvm/IR/IntrinsicsPowerPC.td +++ b/include/llvm/IR/IntrinsicsPowerPC.td @@ -94,6 +94,12 @@ class PowerPC_Vec_WWW_Intrinsic [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +/// PowerPC_Vec_DDD_Intrinsic - A PowerPC intrinsic that takes two v2i64 +/// vectors and returns one. These intrinsics have no side effects. +class PowerPC_Vec_DDD_Intrinsic + : PowerPC_Vec_Intrinsic; //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Class Definitions. @@ -198,7 +204,17 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vcmpgtfp : GCCBuiltin<"__builtin_altivec_vcmpgtfp">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - + + def int_ppc_altivec_vcmpequd : GCCBuiltin<"__builtin_altivec_vcmpequd">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpgtsd : GCCBuiltin<"__builtin_altivec_vcmpgtsd">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpgtud : GCCBuiltin<"__builtin_altivec_vcmpgtud">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; @@ -242,7 +258,17 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vcmpgtfp_p : GCCBuiltin<"__builtin_altivec_vcmpgtfp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], [IntrNoMem]>; - + + def int_ppc_altivec_vcmpequd_p : GCCBuiltin<"__builtin_altivec_vcmpequd_p">, + Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpgtsd_p : GCCBuiltin<"__builtin_altivec_vcmpgtsd_p">, + Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpgtud_p : GCCBuiltin<"__builtin_altivec_vcmpgtud_p">, + Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpequw_p : GCCBuiltin<"__builtin_altivec_vcmpequw_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>; @@ -287,18 +313,22 @@ def int_ppc_altivec_vmaxfp : PowerPC_Vec_FFF_Intrinsic<"vmaxfp">; def int_ppc_altivec_vmaxsb : PowerPC_Vec_BBB_Intrinsic<"vmaxsb">; def int_ppc_altivec_vmaxsh : PowerPC_Vec_HHH_Intrinsic<"vmaxsh">; def int_ppc_altivec_vmaxsw : PowerPC_Vec_WWW_Intrinsic<"vmaxsw">; +def int_ppc_altivec_vmaxsd : PowerPC_Vec_DDD_Intrinsic<"vmaxsd">; def int_ppc_altivec_vmaxub : PowerPC_Vec_BBB_Intrinsic<"vmaxub">; def int_ppc_altivec_vmaxuh : PowerPC_Vec_HHH_Intrinsic<"vmaxuh">; def int_ppc_altivec_vmaxuw : PowerPC_Vec_WWW_Intrinsic<"vmaxuw">; +def int_ppc_altivec_vmaxud : PowerPC_Vec_DDD_Intrinsic<"vmaxud">; // Vector minimum. def int_ppc_altivec_vminfp : PowerPC_Vec_FFF_Intrinsic<"vminfp">; def int_ppc_altivec_vminsb : PowerPC_Vec_BBB_Intrinsic<"vminsb">; def int_ppc_altivec_vminsh : PowerPC_Vec_HHH_Intrinsic<"vminsh">; def int_ppc_altivec_vminsw : PowerPC_Vec_WWW_Intrinsic<"vminsw">; +def int_ppc_altivec_vminsd : PowerPC_Vec_DDD_Intrinsic<"vminsd">; def int_ppc_altivec_vminub : PowerPC_Vec_BBB_Intrinsic<"vminub">; def int_ppc_altivec_vminuh : PowerPC_Vec_HHH_Intrinsic<"vminuh">; def int_ppc_altivec_vminuw : PowerPC_Vec_WWW_Intrinsic<"vminuw">; +def int_ppc_altivec_vminud : PowerPC_Vec_DDD_Intrinsic<"vminud">; // Saturating adds. def int_ppc_altivec_vaddubs : PowerPC_Vec_BBB_Intrinsic<"vaddubs">; @@ -361,12 +391,18 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulesh : GCCBuiltin<"__builtin_altivec_vmulesh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_vmuleuh : GCCBuiltin<"__builtin_altivec_vmuleuh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], @@ -374,12 +410,21 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulosh : GCCBuiltin<"__builtin_altivec_vmulosh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_vmulouh : GCCBuiltin<"__builtin_altivec_vmulouh">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_ppc_altivec_vmuluwm : GCCBuiltin<"__builtin_altivec_vmuluwm">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; // Vector Sum Intructions. def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">, @@ -473,6 +518,7 @@ def int_ppc_altivec_vslo : PowerPC_Vec_WWW_Intrinsic<"vslo">; def int_ppc_altivec_vslb : PowerPC_Vec_BBB_Intrinsic<"vslb">; def int_ppc_altivec_vslh : PowerPC_Vec_HHH_Intrinsic<"vslh">; def int_ppc_altivec_vslw : PowerPC_Vec_WWW_Intrinsic<"vslw">; +def int_ppc_altivec_vsld : PowerPC_Vec_DDD_Intrinsic<"vsld">; // Right Shifts. def int_ppc_altivec_vsr : PowerPC_Vec_WWW_Intrinsic<"vsr">; @@ -481,14 +527,17 @@ def int_ppc_altivec_vsro : PowerPC_Vec_WWW_Intrinsic<"vsro">; def int_ppc_altivec_vsrb : PowerPC_Vec_BBB_Intrinsic<"vsrb">; def int_ppc_altivec_vsrh : PowerPC_Vec_HHH_Intrinsic<"vsrh">; def int_ppc_altivec_vsrw : PowerPC_Vec_WWW_Intrinsic<"vsrw">; +def int_ppc_altivec_vsrd : PowerPC_Vec_DDD_Intrinsic<"vsrd">; def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">; def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">; def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">; +def int_ppc_altivec_vsrad : PowerPC_Vec_DDD_Intrinsic<"vsrad">; // Rotates. def int_ppc_altivec_vrlb : PowerPC_Vec_BBB_Intrinsic<"vrlb">; def int_ppc_altivec_vrlh : PowerPC_Vec_HHH_Intrinsic<"vrlh">; def int_ppc_altivec_vrlw : PowerPC_Vec_WWW_Intrinsic<"vrlw">; +def int_ppc_altivec_vrld : PowerPC_Vec_DDD_Intrinsic<"vrld">; let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". // Miscellaneous. diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b10e85437ba..4bd303f211d 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2105,7 +2105,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { // getVCmpInst: return the vector compare instruction for the specified // vector type and condition code. Since this is for altivec specific code, -// only support the altivec types (v16i8, v8i16, v4i32, and v4f32). +// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate) { Swap = false; @@ -2184,6 +2184,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPEQUH; else if (VecVT == MVT::v4i32) return PPC::VCMPEQUW; + else if (VecVT == MVT::v2i64) + return PPC::VCMPEQUD; break; case ISD::SETGT: if (VecVT == MVT::v16i8) @@ -2192,6 +2194,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPGTSH; else if (VecVT == MVT::v4i32) return PPC::VCMPGTSW; + else if (VecVT == MVT::v2i64) + return PPC::VCMPGTSD; break; case ISD::SETUGT: if (VecVT == MVT::v16i8) @@ -2200,6 +2204,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPGTUH; else if (VecVT == MVT::v4i32) return PPC::VCMPGTUW; + else if (VecVT == MVT::v2i64) + return PPC::VCMPGTUD; break; default: break; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index bc745447c2f..70770a902af 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -574,15 +574,20 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); - // VSX v2i64 only supports non-arithmetic operations. - setOperationAction(ISD::ADD, MVT::v2i64, Expand); - setOperationAction(ISD::SUB, MVT::v2i64, Expand); - setOperationAction(ISD::SHL, MVT::v2i64, Expand); setOperationAction(ISD::SRA, MVT::v2i64, Expand); setOperationAction(ISD::SRL, MVT::v2i64, Expand); - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + if (Subtarget.hasP8Altivec()) { + setOperationAction(ISD::SETCC, MVT::v2i64, Legal); + } + else { + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + + // VSX v2i64 only supports non-arithmetic operations. + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SUB, MVT::v2i64, Expand); + } setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); @@ -7027,7 +7032,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, /// altivec comparison. If it is, return true and fill in Opc/isDot with /// information about the intrinsic. static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, - bool &isDot) { + bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = cast(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; @@ -7040,29 +7045,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequd_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 199; + isDot = 1; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsd_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 967; + isDot = 1; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtud_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 711; + isDot = 1; + } + else + return false; + break; + // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequd: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 199; + isDot = 0; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsd: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 967; + isDot = 0; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtud: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 711; + isDot = 0; + } + else + return false; + + break; } return true; } @@ -7076,7 +7135,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDLoc dl(Op); int CompareOpc; bool isDot; - if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) + if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget)) return SDValue(); // Don't custom lower most intrinsics. // If this is a non-dot comparison, make the VCMP node and we are done. @@ -10166,7 +10225,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && - getAltivecCompareInfo(LHS, CompareOpc, isDot)) { + getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { assert(isDot && "Can't compare against a vector result!"); // If this is a comparison against something other than 0/1, then we know @@ -10279,14 +10338,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::ppc_altivec_vcmpequb_p: case Intrinsic::ppc_altivec_vcmpequh_p: case Intrinsic::ppc_altivec_vcmpequw_p: + case Intrinsic::ppc_altivec_vcmpequd_p: case Intrinsic::ppc_altivec_vcmpgefp_p: case Intrinsic::ppc_altivec_vcmpgtfp_p: case Intrinsic::ppc_altivec_vcmpgtsb_p: case Intrinsic::ppc_altivec_vcmpgtsh_p: case Intrinsic::ppc_altivec_vcmpgtsw_p: + case Intrinsic::ppc_altivec_vcmpgtsd_p: case Intrinsic::ppc_altivec_vcmpgtub_p: case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: + case Intrinsic::ppc_altivec_vcmpgtud_p: KnownZero = ~1U; // All bits but the low one are known to be zero. break; } diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index f6acd6e485e..4e784a59350 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -750,7 +750,7 @@ def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>; def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; - + let isCodeGenOnly = 1 in { def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins), "vxor $vD, $vD, $vD", IIC_VecFP, @@ -941,6 +941,40 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)), def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">; let Predicates = [HasP8Altivec] in { +let isCommutable = 1 in { +def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw, + v2i64, v4i32>; +def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw, + v2i64, v4i32>; +def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw, + v2i64, v4i32>; +def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw, + v2i64, v4i32>; +def VMULUWM : VX1_Int_Ty<137, "vmuluwm", int_ppc_altivec_vmuluwm, + v4i32>; +def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>; +def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>; +def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>; +def VMIDUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>; +} // isCommutable + +def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>; +def VSLD : VX1_Int_Ty<1476, "vsld", int_ppc_altivec_vsld, v2i64>; +def VSRD : VX1_Int_Ty<1732, "vsrd", int_ppc_altivec_vsrd, v2i64>; +def VSRAD : VX1_Int_Ty<964, "vsrad", int_ppc_altivec_vsrad, v2i64>; + + +// Vector Integer Arithmetic Instructions +let isCommutable = 1 in { +def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vaddudm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>; +} // isCommutable + +def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsubudm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>; + // Count Leading Zeros def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB), "vclzb $vD, $vB", IIC_VecGeneral, @@ -992,4 +1026,13 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vorc $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (or v4i32:$vA, (vnot_ppc v4i32:$vB)))]>; + +// i64 element comparisons. +def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>; +def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>; +def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>; +def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>; +def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>; +def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>; + } // end HasP8Altivec diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 2f3a1f983bc..f34ff3e1baa 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -381,6 +381,7 @@ include "PPCScheduleE5500.td" // vaddsbs IIC_VecGeneral // vaddshs IIC_VecGeneral // vaddsws IIC_VecGeneral +// vaddudm IIC_VecGeneral // vaddubm IIC_VecGeneral // vaddubs IIC_VecGeneral // vadduhm IIC_VecGeneral @@ -402,14 +403,17 @@ include "PPCScheduleE5500.td" // vcmpequb IIC_VecGeneral // vcmpequh IIC_VecGeneral // vcmpequw IIC_VecGeneral +// vcmpequd IIC_VecGeneral // vcmpgefp IIC_VecFPCompare // vcmpgtfp IIC_VecFPCompare // vcmpgtsb IIC_VecGeneral // vcmpgtsh IIC_VecGeneral // vcmpgtsw IIC_VecGeneral +// vcmpgtsd IIC_VecGeneral // vcmpgtub IIC_VecGeneral // vcmpgtuh IIC_VecGeneral // vcmpgtuw IIC_VecGeneral +// vcmpgtud IIC_VecGeneral // vctsxs IIC_VecFP // vctuxs IIC_VecFP // vexptefp IIC_VecFP @@ -419,18 +423,22 @@ include "PPCScheduleE5500.td" // vmaxsb IIC_VecGeneral // vmaxsh IIC_VecGeneral // vmaxsw IIC_VecGeneral +// vmaxsd IIC_VecGeneral // vmaxub IIC_VecGeneral // vmaxuh IIC_VecGeneral // vmaxuw IIC_VecGeneral +// vmaxud IIC_VecGeneral // vmhaddshs IIC_VecComplex // vmhraddshs IIC_VecComplex // vminfp IIC_VecFPCompare // vminsb IIC_VecGeneral // vminsh IIC_VecGeneral // vminsw IIC_VecGeneral +// vminsd IIC_VecGeneral // vminub IIC_VecGeneral // vminuh IIC_VecGeneral // vminuw IIC_VecGeneral +// vminud IIC_VecGeneral // vmladduhm IIC_VecComplex // vmrghb IIC_VecPerm // vmrghh IIC_VecPerm @@ -447,12 +455,17 @@ include "PPCScheduleE5500.td" // vmsumuhs IIC_VecComplex // vmulesb IIC_VecComplex // vmulesh IIC_VecComplex +// vmulesw IIC_VecComplex // vmuleub IIC_VecComplex // vmuleuh IIC_VecComplex +// vmuleuw IIC_VecComplex // vmulosb IIC_VecComplex // vmulosh IIC_VecComplex +// vmulosw IIC_VecComplex // vmuloub IIC_VecComplex // vmulouh IIC_VecComplex +// vmulouw IIC_VecComplex +// vmuluwm IIC_VecComplex // vnor IIC_VecGeneral // vor IIC_VecGeneral // vperm IIC_VecPerm @@ -501,6 +514,7 @@ include "PPCScheduleE5500.td" // vsubshs IIC_VecGeneral // vsubsws IIC_VecGeneral // vsububm IIC_VecGeneral +// vsubudm IIC_VecGeneral // vsububs IIC_VecGeneral // vsubuhm IIC_VecGeneral // vsubuhs IIC_VecGeneral diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt index 1e4c6fb9844..cb3ad5bd3a1 100644 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -209,3 +209,29 @@ vector float f(vector float a, vector float b) { return b; } +//===----------------------------------------------------------------------===// + +The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll: + +define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind { + %tmpvec = insertelement <2 x i64> , i64 %val, i32 0 + %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1 + %result = add <2 x i64> %x, %tmpvec2 + ret <2 x i64> %result + +This will generate the following instruction sequence: + std 5, -8(1) + std 5, -16(1) + addi 3, 1, -16 + ori 2, 2, 0 + lxvd2x 35, 0, 3 + vaddudm 2, 2, 3 + blr + +This will almost certainly cause a load-hit-store hazard. +Since val is a value parameter, it should not need to be saved onto +the stack, unless it's being done set up the vector register. Instead, +it would be better to splat teh value into a vector register, and then +remove the (dead) stores to the stack. + + diff --git a/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll b/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll new file mode 100644 index 00000000000..013e16e2a73 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll @@ -0,0 +1,62 @@ +; Check VMX 64-bit integer operations +; +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s + +define <2 x i64> @test_add(<2 x i64> %x, <2 x i64> %y) nounwind { + %result = add <2 x i64> %x, %y + ret <2 x i64> %result +; CHECK: vaddudm 2, 2, 3 +} + +define <2 x i64> @increment_by_one(<2 x i64> %x) nounwind { + %result = add <2 x i64> %x, + ret <2 x i64> %result +; CHECK vaddudm 2, 2, 3 +} + +define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind { + %tmpvec = insertelement <2 x i64> , i64 %val, i32 0 + %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1 + %result = add <2 x i64> %x, %tmpvec2 + ret <2 x i64> %result +; CHECK: vaddudm 2, 2, 3 +; FIXME: This is currently generating the following instruction sequence +; +; std 5, -8(1) +; std 5, -16(1) +; addi 3, 1, -16 +; ori 2, 2, 0 +; lxvd2x 35, 0, 3 +; vaddudm 2, 2, 3 +; blr +; +; This will almost certainly cause a load-hit-store hazard. +; Since val is a value parameter, it should not need to be +; saved onto the stack at all (unless we're using this to set +; up the vector register). Instead, it would be better to splat +; the value into a vector register. +} + +define <2 x i64> @test_sub(<2 x i64> %x, <2 x i64> %y) nounwind { + %result = sub <2 x i64> %x, %y + ret <2 x i64> %result +; CHECK: vsubudm 2, 2, 3 +} + +define <2 x i64> @decrement_by_one(<2 x i64> %x) nounwind { + %result = sub <2 x i64> %x, + ret <2 x i64> %result +; CHECK vsubudm 2, 2, 3 +} + +define <2 x i64> @decrement_by_val(<2 x i64> %x, i64 %val) nounwind { + %tmpvec = insertelement <2 x i64> , i64 %val, i32 0 + %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1 + %result = sub <2 x i64> %x, %tmpvec2 + ret <2 x i64> %result +; CHECK vsubudm 2, 2, 3 +} + + + diff --git a/test/CodeGen/PowerPC/vec_cmpd.ll b/test/CodeGen/PowerPC/vec_cmpd.ll new file mode 100644 index 00000000000..4a06ed9ffaf --- /dev/null +++ b/test/CodeGen/PowerPC/vec_cmpd.ll @@ -0,0 +1,258 @@ +; Test the doubleword comparison instructions that were added in POWER8 +; +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s + +define <2 x i64> @v2si64_cmp(<2 x i64> %x, <2 x i64> %y) nounwind readnone { + %cmp = icmp eq <2 x i64> %x, %y + %result = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %result +; CHECK-LABEL: v2si64_cmp: +; CHECK: vcmpequd 2, 2, 3 +} + +define <4 x i64> @v4si64_cmp(<4 x i64> %x, <4 x i64> %y) nounwind readnone { + %cmp = icmp eq <4 x i64> %x, %y + %result = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %result +; CHECK-LABEL: v4si64_cmp +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <8 x i64> @v8si64_cmp(<8 x i64> %x, <8 x i64> %y) nounwind readnone { + %cmp = icmp eq <8 x i64> %x, %y + %result = sext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %result +; CHECK-LABEL: v8si64_cmp +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <16 x i64> @v16si64_cmp(<16 x i64> %x, <16 x i64> %y) nounwind readnone { + %cmp = icmp eq <16 x i64> %x, %y + %result = sext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %result +; CHECK-LABEL: v16si64_cmp +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <32 x i64> @v32si64_cmp(<32 x i64> %x, <32 x i64> %y) nounwind readnone { + %cmp = icmp eq <32 x i64> %x, %y + %result = sext <32 x i1> %cmp to <32 x i64> + ret <32 x i64> %result +; CHECK-LABEL: v32si64_cmp +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +; Greater than signed +define <2 x i64> @v2si64_cmp_gt(<2 x i64> %x, <2 x i64> %y) nounwind readnone { + %cmp = icmp sgt <2 x i64> %x, %y + %result = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %result +; CHECK-LABEL: v2si64_cmp_gt +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <4 x i64> @v4si64_cmp_gt(<4 x i64> %x, <4 x i64> %y) nounwind readnone { + %cmp = icmp sgt <4 x i64> %x, %y + %result = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %result +; CHECK-LABEL: v4si64_cmp_gt +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <8 x i64> @v8si64_cmp_gt(<8 x i64> %x, <8 x i64> %y) nounwind readnone { + %cmp = icmp sgt <8 x i64> %x, %y + %result = sext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %result +; CHECK-LABEL: v8si64_cmp_gt +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <16 x i64> @v16si64_cmp_gt(<16 x i64> %x, <16 x i64> %y) nounwind readnone { + %cmp = icmp sgt <16 x i64> %x, %y + %result = sext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %result +; CHECK-LABEL: v16si64_cmp_gt +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <32 x i64> @v32si64_cmp_gt(<32 x i64> %x, <32 x i64> %y) nounwind readnone { + %cmp = icmp sgt <32 x i64> %x, %y + %result = sext <32 x i1> %cmp to <32 x i64> + ret <32 x i64> %result +; CHECK-LABEL: v32si64_cmp_gt +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +; Greater than unsigned +define <2 x i64> @v2ui64_cmp_gt(<2 x i64> %x, <2 x i64> %y) nounwind readnone { + %cmp = icmp ugt <2 x i64> %x, %y + %result = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %result +; CHECK-LABEL: v2ui64_cmp_gt +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <4 x i64> @v4ui64_cmp_gt(<4 x i64> %x, <4 x i64> %y) nounwind readnone { + %cmp = icmp ugt <4 x i64> %x, %y + %result = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %result +; CHECK-LABEL: v4ui64_cmp_gt +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <8 x i64> @v8ui64_cmp_gt(<8 x i64> %x, <8 x i64> %y) nounwind readnone { + %cmp = icmp ugt <8 x i64> %x, %y + %result = sext <8 x i1> %cmp to <8 x i64> + ret <8 x i64> %result +; CHECK-LABEL: v8ui64_cmp_gt +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <16 x i64> @v16ui64_cmp_gt(<16 x i64> %x, <16 x i64> %y) nounwind readnone { + %cmp = icmp ugt <16 x i64> %x, %y + %result = sext <16 x i1> %cmp to <16 x i64> + ret <16 x i64> %result +; CHECK-LABEL: v16ui64_cmp_gt +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <32 x i64> @v32ui64_cmp_gt(<32 x i64> %x, <32 x i64> %y) nounwind readnone { + %cmp = icmp ugt <32 x i64> %x, %y + %result = sext <32 x i1> %cmp to <32 x i64> + ret <32 x i64> %result +; CHECK-LABEL: v32ui64_cmp_gt +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +; Check the intrinsics also +declare <2 x i64> @llvm.ppc.altivec.vcmpequd(<2 x i64>, <2 x i64>) nounwind readnone +declare i32 @llvm.ppc.altivec.vcmpequd.p(i32, <2 x i64>, <2 x i64>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64>, <2 x i64>) nounwind readnone +declare i32 @llvm.ppc.altivec.vcmpgtsd.p(i32, <2 x i64>, <2 x i64>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64>, <2 x i64>) nounwind readnone +declare i32 @llvm.ppc.altivec.vcmpgtud.p(i32, <2 x i64>, <2 x i64>) nounwind readnone + +define <2 x i64> @test_vcmpequd(<2 x i64> %x, <2 x i64> %y) { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpequd(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK-LABEL: test_vcmpequd: +; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define i32 @test_vcmpequd_p(<2 x i64> %x, <2 x i64> %y) { + %tmp = tail call i32 @llvm.ppc.altivec.vcmpequd.p(i32 2, <2 x i64> %x, <2 x i64> %y) + ret i32 %tmp +; CHECK-LABEL: test_vcmpequd_p: +; CHECK: vcmpequd. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <2 x i64> @test_vcmpgtsd(<2 x i64> %x, <2 x i64> %y) { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK-LABEL: test_vcmpgtsd +; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define i32 @test_vcmpgtsd_p(<2 x i64> %x, <2 x i64> %y) { + %tmp = tail call i32 @llvm.ppc.altivec.vcmpgtsd.p(i32 2, <2 x i64> %x, <2 x i64> %y) + ret i32 %tmp +; CHECK-LABEL: test_vcmpgtsd_p +; CHECK: vcmpgtsd. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define <2 x i64> @test_vcmpgtud(<2 x i64> %x, <2 x i64> %y) { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK-LABEL: test_vcmpgtud +; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + +define i32 @test_vcmpgtud_p(<2 x i64> %x, <2 x i64> %y) { + %tmp = tail call i32 @llvm.ppc.altivec.vcmpgtud.p(i32 2, <2 x i64> %x, <2 x i64> %y) + ret i32 %tmp +; CHECK-LABEL: test_vcmpgtud_p +; CHECK: vcmpgtud. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +} + + + + diff --git a/test/CodeGen/PowerPC/vec_minmax.ll b/test/CodeGen/PowerPC/vec_minmax.ll new file mode 100644 index 00000000000..e9ba6a01a9b --- /dev/null +++ b/test/CodeGen/PowerPC/vec_minmax.ll @@ -0,0 +1,34 @@ +; Test the vector min/max doubleword instructions added for P8 +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s + +declare <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64>, <2 x i64>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64>, <2 x i64>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64>, <2 x i64>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64>, <2 x i64>) nounwind readnone + +define <2 x i64> @test_vmaxsd(<2 x i64> %x, <2 x i64> %y) { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK: vmaxsd 2, 2, 3 +} + +define <2 x i64> @test_vmaxud(<2 x i64> %x, <2 x i64> %y) { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK: vmaxud 2, 2, 3 +} + +define <2 x i64> @test_vminsd(<2 x i64> %x, <2 x i64> %y) { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK: vminsd 2, 2, 3 +} + +define <2 x i64> @test_vminud(<2 x i64> %x, <2 x i64> %y) { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK: vminud 2, 2, 3 +} + + diff --git a/test/CodeGen/PowerPC/vec_mul_even_odd.ll b/test/CodeGen/PowerPC/vec_mul_even_odd.ll new file mode 100644 index 00000000000..2746ae54943 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_mul_even_odd.ll @@ -0,0 +1,41 @@ +; Check the vector multiply even/odd word instructions that were added in P8 +; +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s + +declare <2 x i64> @llvm.ppc.altivec.vmuleuw(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vmulesw(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vmulouw(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vmulosw(<4 x i32>, <4 x i32>) nounwind readnone +declare <4 x i32> @llvm.ppc.altivec.vmuluwm(<4 x i32>, <4 x i32>) nounwind readnone + +define <2 x i64> @test_vmuleuw(<4 x i32> %x, <4 x i32> %y) nounwind readnone { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmuleuw(<4 x i32> %x, <4 x i32> %y) + ret <2 x i64> %tmp +; CHECK: vmuleuw 2, 2, 3 +} + +define <2 x i64> @test_vmulesw(<4 x i32> %x, <4 x i32> %y) nounwind readnone { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulesw(<4 x i32> %x, <4 x i32> %y) + ret <2 x i64> %tmp +; CHECK: vmulesw 2, 2, 3 +} + +define <2 x i64> @test_vmulouw(<4 x i32> %x, <4 x i32> %y) nounwind readnone { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulouw(<4 x i32> %x, <4 x i32> %y) + ret <2 x i64> %tmp +; CHECK: vmulouw 2, 2, 3 +} + +define <2 x i64> @test_vmulosw(<4 x i32> %x, <4 x i32> %y) nounwind readnone { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulosw(<4 x i32> %x, <4 x i32> %y) + ret <2 x i64> %tmp +; CHECK: vmulosw 2, 2, 3 +} + +define <4 x i32> @test_vmuluwm(<4 x i32> %x, <4 x i32> %y) nounwind readnone { + %tmp = tail call <4 x i32> @llvm.ppc.altivec.vmuluwm(<4 x i32> %x, <4 x i32> %y) + ret <4 x i32> %tmp +; CHECK: vmuluwm 2, 2, 3 +} + diff --git a/test/CodeGen/PowerPC/vec_rotate_shift.ll b/test/CodeGen/PowerPC/vec_rotate_shift.ll new file mode 100644 index 00000000000..4dd307c5e5f --- /dev/null +++ b/test/CodeGen/PowerPC/vec_rotate_shift.ll @@ -0,0 +1,33 @@ +; Test the vector rotate and shift doubleword instructions that were added in P8 +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s + +declare <2 x i64> @llvm.ppc.altivec.vrld(<2 x i64>, <2 x i64>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vsld(<2 x i64>, <2 x i64>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vsrd(<2 x i64>, <2 x i64>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vsrad(<2 x i64>, <2 x i64>) nounwind readnone + +define <2 x i64> @test_vrld(<2 x i64> %x, <2 x i64> %y) nounwind readnone { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vrld(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK: vrld 2, 2, 3 +} + +define <2 x i64> @test_vsld(<2 x i64> %x, <2 x i64> %y) nounwind readnone { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vsld(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK: vsld 2, 2, 3 +} + +define <2 x i64> @test_vsrd(<2 x i64> %x, <2 x i64> %y) nounwind readnone { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vsrd(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK: vsrd 2, 2, 3 +} + +define <2 x i64> @test_vsrad(<2 x i64> %x, <2 x i64> %y) nounwind readnone { + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vsrad(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +; CHECK: vsrad 2, 2, 3 +} + diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt index fe62fdf3c71..0e56f8ed361 100644 --- a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt +++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt @@ -159,6 +159,9 @@ # CHECK: vadduwm 2, 3, 4 0x10 0x43 0x20 0x80 +# CHECK: vaddudm 2, 3, 4 +0x10 0x43 0x20 0xc0 + # CHECK: vaddubs 2, 3, 4 0x10 0x43 0x22 0x00 @@ -189,6 +192,9 @@ # CHECK: vsubuwm 2, 3, 4 0x10 0x43 0x24 0x80 +# CHECK: vsubudm 2, 3, 4 +0x10 0x43 0x24 0xc0 + # CHECK: vsububs 2, 3, 4 0x10 0x43 0x26 0x00 @@ -204,24 +210,39 @@ # CHECK: vmulesh 2, 3, 4 0x10 0x43 0x23 0x48 +# CHECK: vmulesw 2, 3, 4 +0x10 0x43 0x23 0x88 + # CHECK: vmuleub 2, 3, 4 0x10 0x43 0x22 0x08 # CHECK: vmuleuh 2, 3, 4 0x10 0x43 0x22 0x48 +# CHECK: vmuleuw 2, 3, 4 +0x10 0x43 0x22 0x88 + # CHECK: vmulosb 2, 3, 4 0x10 0x43 0x21 0x08 # CHECK: vmulosh 2, 3, 4 0x10 0x43 0x21 0x48 +# CHECK: vmulosw 2, 3, 4 +0x10 0x43 0x21 0x88 + # CHECK: vmuloub 2, 3, 4 0x10 0x43 0x20 0x08 # CHECK: vmulouh 2, 3, 4 0x10 0x43 0x20 0x48 +# CHECK: vmulouw 2, 3, 4 +0x10 0x43 0x20 0x88 + +# CHECK: vmuluwm 2, 3, 4 +0x10 0x43 0x20 0x89 + # CHECK: vmhaddshs 2, 3, 4, 5 0x10 0x43 0x21 0x60 @@ -291,6 +312,9 @@ # CHECK: vmaxsw 2, 3, 4 0x10 0x43 0x21 0x82 +# CHECK: vmaxsd 2, 3, 4 +0x10 0x43 0x21 0xc2 + # CHECK: vmaxub 2, 3, 4 0x10 0x43 0x20 0x02 @@ -300,6 +324,9 @@ # CHECK: vmaxuw 2, 3, 4 0x10 0x43 0x20 0x82 +# CHECK: vmaxud 2, 3, 4 +0x10 0x43 0x20 0xc2 + # CHECK: vminsb 2, 3, 4 0x10 0x43 0x23 0x02 @@ -309,6 +336,9 @@ # CHECK: vminsw 2, 3, 4 0x10 0x43 0x23 0x82 +# CHECK: vminsd 2, 3, 4 +0x10 0x43 0x23 0xc2 + # CHECK: vminub 2, 3, 4 0x10 0x43 0x22 0x02 @@ -318,6 +348,9 @@ # CHECK: vminuw 2, 3, 4 0x10 0x43 0x22 0x82 +# CHECK: vminud 2, 3, 4 +0x10 0x43 0x22 0xc2 + # CHECK: vcmpequb 2, 3, 4 0x10 0x43 0x20 0x06 @@ -336,6 +369,12 @@ # CHECK: vcmpequw. 2, 3, 4 0x10 0x43 0x24 0x86 +# CHECK: vcmpequd 2, 3, 4 +0x10 0x43 0x20 0xc7 + +# CHECK: vcmpequd. 2, 3, 4 +0x10 0x43 0x24 0xc7 + # CHECK: vcmpgtsb 2, 3, 4 0x10 0x43 0x23 0x06 @@ -354,6 +393,12 @@ # CHECK: vcmpgtsw. 2, 3, 4 0x10 0x43 0x27 0x86 +# CHECK: vcmpgtsd 2, 3, 4 +0x10 0x43 0x23 0xc7 + +# CHECK: vcmpgtsd. 2, 3, 4 +0x10 0x43 0x27 0xc7 + # CHECK: vcmpgtub 2, 3, 4 0x10 0x43 0x22 0x06 @@ -372,6 +417,12 @@ # CHECK: vcmpgtuw. 2, 3, 4 0x10 0x43 0x26 0x86 +# CHECK: vcmpgtud 2, 3, 4 +0x10 0x43 0x22 0xc7 + +# CHECK: vcmpgtud. 2, 3, 4 +0x10 0x43 0x26 0xc7 + # CHECK: vand 2, 3, 4 0x10 0x43 0x24 0x04 @@ -414,6 +465,9 @@ # CHECK: vslw 2, 3, 4 0x10 0x43 0x21 0x84 +# CHECK: vrld 2, 3, 4 +0x10 0x43 0x20 0xc4 + # CHECK: vsrb 2, 3, 4 0x10 0x43 0x22 0x04 @@ -423,6 +477,9 @@ # CHECK: vsrw 2, 3, 4 0x10 0x43 0x22 0x84 +# CHECK: vsrd 2, 3, 4 +0x10 0x43 0x26 0xc4 + # CHECK: vsrab 2, 3, 4 0x10 0x43 0x23 0x04 @@ -432,6 +489,9 @@ # CHECK: vsraw 2, 3, 4 0x10 0x43 0x23 0x84 +# CHECK: vsrad 2, 3, 4 +0x10 0x43 0x23 0xc4 + # CHECK: vaddfp 2, 3, 4 0x10 0x43 0x20 0x0a diff --git a/test/MC/PowerPC/ppc64-encoding-vmx.s b/test/MC/PowerPC/ppc64-encoding-vmx.s index 7641d1d7ac0..7267f34186d 100644 --- a/test/MC/PowerPC/ppc64-encoding-vmx.s +++ b/test/MC/PowerPC/ppc64-encoding-vmx.s @@ -176,6 +176,9 @@ # CHECK-BE: vadduwm 2, 3, 4 # encoding: [0x10,0x43,0x20,0x80] # CHECK-LE: vadduwm 2, 3, 4 # encoding: [0x80,0x20,0x43,0x10] vadduwm 2, 3, 4 +# CHECK-BE: vaddudm 2, 3, 4 # encoding: [0x10,0x43,0x20,0xc0] +# CHECK-LE: vaddudm 2, 3, 4 # encoding: [0xc0,0x20,0x43,0x10] + vaddudm 2, 3, 4 # CHECK-BE: vaddubs 2, 3, 4 # encoding: [0x10,0x43,0x22,0x00] # CHECK-LE: vaddubs 2, 3, 4 # encoding: [0x00,0x22,0x43,0x10] vaddubs 2, 3, 4 @@ -207,6 +210,9 @@ # CHECK-BE: vsubuwm 2, 3, 4 # encoding: [0x10,0x43,0x24,0x80] # CHECK-LE: vsubuwm 2, 3, 4 # encoding: [0x80,0x24,0x43,0x10] vsubuwm 2, 3, 4 +# CHECK-BE: vsubudm 2, 3, 4 # encoding: [0x10,0x43,0x24,0xc0] +# CHECK-LE: vsubudm 2, 3, 4 # encoding: [0xc0,0x24,0x43,0x10] + vsubudm 2, 3, 4 # CHECK-BE: vsububs 2, 3, 4 # encoding: [0x10,0x43,0x26,0x00] # CHECK-LE: vsububs 2, 3, 4 # encoding: [0x00,0x26,0x43,0x10] vsububs 2, 3, 4 @@ -223,24 +229,39 @@ # CHECK-BE: vmulesh 2, 3, 4 # encoding: [0x10,0x43,0x23,0x48] # CHECK-LE: vmulesh 2, 3, 4 # encoding: [0x48,0x23,0x43,0x10] vmulesh 2, 3, 4 +# CHECK-BE: vmulesw 2, 3, 4 # encoding: [0x10,0x43,0x23,0x88] +# CHECK-LE: vmulesw 2, 3, 4 # encoding: [0x88,0x23,0x43,0x10] + vmulesw 2, 3, 4 # CHECK-BE: vmuleub 2, 3, 4 # encoding: [0x10,0x43,0x22,0x08] # CHECK-LE: vmuleub 2, 3, 4 # encoding: [0x08,0x22,0x43,0x10] vmuleub 2, 3, 4 # CHECK-BE: vmuleuh 2, 3, 4 # encoding: [0x10,0x43,0x22,0x48] # CHECK-LE: vmuleuh 2, 3, 4 # encoding: [0x48,0x22,0x43,0x10] vmuleuh 2, 3, 4 +# CHECK-BE: vmuleuw 2, 3, 4 # encoding: [0x10,0x43,0x22,0x88] +# CHECK-LE: vmuleuw 2, 3, 4 # encoding: [0x88,0x22,0x43,0x10] + vmuleuw 2, 3, 4 # CHECK-BE: vmulosb 2, 3, 4 # encoding: [0x10,0x43,0x21,0x08] # CHECK-LE: vmulosb 2, 3, 4 # encoding: [0x08,0x21,0x43,0x10] vmulosb 2, 3, 4 # CHECK-BE: vmulosh 2, 3, 4 # encoding: [0x10,0x43,0x21,0x48] # CHECK-LE: vmulosh 2, 3, 4 # encoding: [0x48,0x21,0x43,0x10] vmulosh 2, 3, 4 +# CHECK-BE: vmulosw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x88] +# CHECK-LE: vmulosw 2, 3, 4 # encoding: [0x88,0x21,0x43,0x10] + vmulosw 2, 3, 4 # CHECK-BE: vmuloub 2, 3, 4 # encoding: [0x10,0x43,0x20,0x08] # CHECK-LE: vmuloub 2, 3, 4 # encoding: [0x08,0x20,0x43,0x10] vmuloub 2, 3, 4 # CHECK-BE: vmulouh 2, 3, 4 # encoding: [0x10,0x43,0x20,0x48] # CHECK-LE: vmulouh 2, 3, 4 # encoding: [0x48,0x20,0x43,0x10] vmulouh 2, 3, 4 +# CHECK-BE: vmulouw 2, 3, 4 # encoding: [0x10,0x43,0x20,0x88] +# CHECK-LE: vmulouw 2, 3, 4 # encoding: [0x88,0x20,0x43,0x10] + vmulouw 2, 3, 4 +# CHECK-BE: vmuluwm 2, 3, 4 # encoding: [0x10,0x43,0x20,0x89] +# CHECK-LE: vmuluwm 2, 3, 4 # encoding: [0x89,0x20,0x43,0x10] + vmuluwm 2, 3, 4 # CHECK-BE: vmhaddshs 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x60] # CHECK-LE: vmhaddshs 2, 3, 4, 5 # encoding: [0x60,0x21,0x43,0x10] @@ -314,6 +335,9 @@ # CHECK-BE: vmaxsw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x82] # CHECK-LE: vmaxsw 2, 3, 4 # encoding: [0x82,0x21,0x43,0x10] vmaxsw 2, 3, 4 +# CHECK-BE: vmaxsd 2, 3, 4 # encoding: [0x10,0x43,0x21,0xc2] +# CHECK-LE: vmaxsd 2, 3, 4 # encoding: [0xc2,0x21,0x43,0x10] + vmaxsd 2, 3, 4 # CHECK-BE: vmaxub 2, 3, 4 # encoding: [0x10,0x43,0x20,0x02] # CHECK-LE: vmaxub 2, 3, 4 # encoding: [0x02,0x20,0x43,0x10] vmaxub 2, 3, 4 @@ -323,7 +347,10 @@ # CHECK-BE: vmaxuw 2, 3, 4 # encoding: [0x10,0x43,0x20,0x82] # CHECK-LE: vmaxuw 2, 3, 4 # encoding: [0x82,0x20,0x43,0x10] vmaxuw 2, 3, 4 - +# CHECK-BE: vmaxud 2, 3, 4 # encoding: [0x10,0x43,0x20,0xc2] +# CHECK-LE: vmaxud 2, 3, 4 # encoding: [0xc2,0x20,0x43,0x10] + vmaxud 2, 3, 4 + # CHECK-BE: vminsb 2, 3, 4 # encoding: [0x10,0x43,0x23,0x02] # CHECK-LE: vminsb 2, 3, 4 # encoding: [0x02,0x23,0x43,0x10] vminsb 2, 3, 4 @@ -333,6 +360,9 @@ # CHECK-BE: vminsw 2, 3, 4 # encoding: [0x10,0x43,0x23,0x82] # CHECK-LE: vminsw 2, 3, 4 # encoding: [0x82,0x23,0x43,0x10] vminsw 2, 3, 4 +# CHECK-BE: vminsd 2, 3, 4 # encoding: [0x10,0x43,0x23,0xc2] +# CHECK-LE: vminsd 2, 3, 4 # encoding: [0xc2,0x23,0x43,0x10] + vminsd 2, 3, 4 # CHECK-BE: vminub 2, 3, 4 # encoding: [0x10,0x43,0x22,0x02] # CHECK-LE: vminub 2, 3, 4 # encoding: [0x02,0x22,0x43,0x10] vminub 2, 3, 4 @@ -342,6 +372,9 @@ # CHECK-BE: vminuw 2, 3, 4 # encoding: [0x10,0x43,0x22,0x82] # CHECK-LE: vminuw 2, 3, 4 # encoding: [0x82,0x22,0x43,0x10] vminuw 2, 3, 4 +# CHECK-BE: vminud 2, 3, 4 # encoding: [0x10,0x43,0x22,0xc2] +# CHECK-LE: vminud 2, 3, 4 # encoding: [0xc2,0x22,0x43,0x10] + vminud 2, 3, 4 # Vector integer compare instructions @@ -363,6 +396,12 @@ # CHECK-BE: vcmpequw. 2, 3, 4 # encoding: [0x10,0x43,0x24,0x86] # CHECK-LE: vcmpequw. 2, 3, 4 # encoding: [0x86,0x24,0x43,0x10] vcmpequw. 2, 3, 4 +# CHECK-BE: vcmpequd 2, 3, 4 # encoding: [0x10,0x43,0x20,0xc7] +# CHECK-LE: vcmpequd 2, 3, 4 # encoding: [0xc7,0x20,0x43,0x10] + vcmpequd 2, 3, 4 +# CHECK-BE: vcmpequd. 2, 3, 4 # encoding: [0x10,0x43,0x24,0xc7] +# CHECK-LE: vcmpequd. 2, 3, 4 # encoding: [0xc7,0x24,0x43,0x10] + vcmpequd. 2, 3, 4 # CHECK-BE: vcmpgtsb 2, 3, 4 # encoding: [0x10,0x43,0x23,0x06] # CHECK-LE: vcmpgtsb 2, 3, 4 # encoding: [0x06,0x23,0x43,0x10] vcmpgtsb 2, 3, 4 @@ -381,6 +420,12 @@ # CHECK-BE: vcmpgtsw. 2, 3, 4 # encoding: [0x10,0x43,0x27,0x86] # CHECK-LE: vcmpgtsw. 2, 3, 4 # encoding: [0x86,0x27,0x43,0x10] vcmpgtsw. 2, 3, 4 +# CHECK-BE: vcmpgtsd 2, 3, 4 # encoding: [0x10,0x43,0x23,0xc7] +# CHECK-LE: vcmpgtsd 2, 3, 4 # encoding: [0xc7,0x23,0x43,0x10] + vcmpgtsd 2, 3, 4 +# CHECK-BE: vcmpgtsd. 2, 3, 4 # encoding: [0x10,0x43,0x27,0xc7] +# CHECK-LE: vcmpgtsd. 2, 3, 4 # encoding: [0xc7,0x27,0x43,0x10] + vcmpgtsd. 2, 3, 4 # CHECK-BE: vcmpgtub 2, 3, 4 # encoding: [0x10,0x43,0x22,0x06] # CHECK-LE: vcmpgtub 2, 3, 4 # encoding: [0x06,0x22,0x43,0x10] vcmpgtub 2, 3, 4 @@ -399,7 +444,13 @@ # CHECK-BE: vcmpgtuw. 2, 3, 4 # encoding: [0x10,0x43,0x26,0x86] # CHECK-LE: vcmpgtuw. 2, 3, 4 # encoding: [0x86,0x26,0x43,0x10] vcmpgtuw. 2, 3, 4 - +# CHECK-BE: vcmpgtud 2, 3, 4 # encoding: [0x10,0x43,0x22,0xc7] +# CHECK-LE: vcmpgtud 2, 3, 4 # encoding: [0xc7,0x22,0x43,0x10] + vcmpgtud 2, 3, 4 +# CHECK-BE: vcmpgtud. 2, 3, 4 # encoding: [0x10,0x43,0x26,0xc7] +# CHECK-LE: vcmpgtud. 2, 3, 4 # encoding: [0xc7,0x26,0x43,0x10] + vcmpgtud. 2, 3, 4 + # Vector integer logical instructions # CHECK-BE: vand 2, 3, 4 # encoding: [0x10,0x43,0x24,0x04] @@ -438,7 +489,9 @@ # CHECK-BE: vrlw 2, 3, 4 # encoding: [0x10,0x43,0x20,0x84] # CHECK-LE: vrlw 2, 3, 4 # encoding: [0x84,0x20,0x43,0x10] vrlw 2, 3, 4 - +# CHECK-BE: vrld 2, 3, 4 # encoding: [0x10,0x43,0x20,0xc4] +# CHECK-LE: vrld 2, 3, 4 # encoding: [0xc4,0x20,0x43,0x10] + vrld 2, 3, 4 # CHECK-BE: vslb 2, 3, 4 # encoding: [0x10,0x43,0x21,0x04] # CHECK-LE: vslb 2, 3, 4 # encoding: [0x04,0x21,0x43,0x10] vslb 2, 3, 4 @@ -448,6 +501,9 @@ # CHECK-BE: vslw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x84] # CHECK-LE: vslw 2, 3, 4 # encoding: [0x84,0x21,0x43,0x10] vslw 2, 3, 4 +# CHECK-BE: vsld 2, 3, 4 # encoding: [0x10,0x43,0x25,0xc4] +# CHECK-LE: vsld 2, 3, 4 # encoding: [0xc4,0x25,0x43,0x10] + vsld 2, 3, 4 # CHECK-BE: vsrb 2, 3, 4 # encoding: [0x10,0x43,0x22,0x04] # CHECK-LE: vsrb 2, 3, 4 # encoding: [0x04,0x22,0x43,0x10] vsrb 2, 3, 4 @@ -457,6 +513,9 @@ # CHECK-BE: vsrw 2, 3, 4 # encoding: [0x10,0x43,0x22,0x84] # CHECK-LE: vsrw 2, 3, 4 # encoding: [0x84,0x22,0x43,0x10] vsrw 2, 3, 4 +# CHECK-BE: vsrd 2, 3, 4 # encoding: [0x10,0x43,0x26,0xc4] +# CHECK-LE: vsrd 2, 3, 4 # encoding: [0xc4,0x26,0x43,0x10] + vsrd 2, 3, 4 # CHECK-BE: vsrab 2, 3, 4 # encoding: [0x10,0x43,0x23,0x04] # CHECK-LE: vsrab 2, 3, 4 # encoding: [0x04,0x23,0x43,0x10] vsrab 2, 3, 4 @@ -466,6 +525,9 @@ # CHECK-BE: vsraw 2, 3, 4 # encoding: [0x10,0x43,0x23,0x84] # CHECK-LE: vsraw 2, 3, 4 # encoding: [0x84,0x23,0x43,0x10] vsraw 2, 3, 4 +# CHECK-BE: vsrad 2, 3, 4 # encoding: [0x10,0x43,0x23,0xc4] +# CHECK-LE: vsrad 2, 3, 4 # encoding: [0xc4,0x23,0x43,0x10] + vsrad 2, 3, 4 # Vector floating-point instructions @@ -576,16 +638,16 @@ # CHECK-BE: vpopcnth 2, 3 # encoding: [0x10,0x40,0x1f,0x43] # CHECK-LE: vpopcnth 2, 3 # encoding: [0x43,0x1f,0x40,0x10] - vpopcnth 2, 3 + vpopcnth 2, 3 # CHECK-BE: vpopcntw 2, 3 # encoding: [0x10,0x40,0x1f,0x83] # CHECK-LE: vpopcntw 2, 3 # encoding: [0x83,0x1f,0x40,0x10] vpopcntw 2, 3 - + # BCHECK-BE: vpopcntd 2, 3 # encoding: [0x10,0x40,0x1f,0xC3] # BCHECK-LE: vpopcntd 2, 3 # encoding: [0xC3,0x1f,0x40,0x10] # vpopcntd 2, 3 - + # Vector status and control register instructions # CHECK-BE: mtvscr 2 # encoding: [0x10,0x00,0x16,0x44]