Add the following 64-bit vector integer arithmetic instructions added in POWER8:

vaddudm vsubudm vmulesw vmulosw vmuleuw vmulouw vmuluwm vmaxsd vmaxud vminsd vminud vcmpequd vcmpequd. vcmpgtsd vcmpgtsd. vcmpgtud vcmpgtud. vrld vsld vsrd vsrad Phabricator review: http://reviews.llvm.org/D7959 llvm-svn: 231115
2024-11-23 19:23:23 +01:00 · 2015-03-03 19:55:45 +00:00 · 2015-03-03 19:55:45 +00:00 · 2e98937142
commit 2e98937142
parent 0dc6751111
13 changed files with 768 additions and 18 deletions
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@ -94,6 +94,12 @@ class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix>
                          [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                          [IntrNoMem]>;

+/// PowerPC_Vec_DDD_Intrinsic - A PowerPC intrinsic that takes two v2i64
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_DDD_Intrinsic<string GCCIntSuffix>
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                          [IntrNoMem]>;

 //===----------------------------------------------------------------------===//
 // PowerPC VSX Intrinsic Class Definitions.
@ -198,7 +204,17 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vcmpgtfp : GCCBuiltin<"__builtin_altivec_vcmpgtfp">,
              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
                        [IntrNoMem]>;
-                        
+
+  def int_ppc_altivec_vcmpequd : GCCBuiltin<"__builtin_altivec_vcmpequd">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsd : GCCBuiltin<"__builtin_altivec_vcmpgtsd">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtud : GCCBuiltin<"__builtin_altivec_vcmpgtud">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+                                                
  def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">,
              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                        [IntrNoMem]>;
@ -242,7 +258,17 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vcmpgtfp_p : GCCBuiltin<"__builtin_altivec_vcmpgtfp_p">,
              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
                        [IntrNoMem]>;
-                        
+
+  def int_ppc_altivec_vcmpequd_p : GCCBuiltin<"__builtin_altivec_vcmpequd_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsd_p : GCCBuiltin<"__builtin_altivec_vcmpgtsd_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtud_p : GCCBuiltin<"__builtin_altivec_vcmpgtud_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
+                        [IntrNoMem]>;
+
  def int_ppc_altivec_vcmpequw_p : GCCBuiltin<"__builtin_altivec_vcmpequw_p">,
              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
                        [IntrNoMem]>;
@ -287,18 +313,22 @@ def int_ppc_altivec_vmaxfp : PowerPC_Vec_FFF_Intrinsic<"vmaxfp">;
 def int_ppc_altivec_vmaxsb : PowerPC_Vec_BBB_Intrinsic<"vmaxsb">;
 def int_ppc_altivec_vmaxsh : PowerPC_Vec_HHH_Intrinsic<"vmaxsh">;
 def int_ppc_altivec_vmaxsw : PowerPC_Vec_WWW_Intrinsic<"vmaxsw">;
+def int_ppc_altivec_vmaxsd : PowerPC_Vec_DDD_Intrinsic<"vmaxsd">;
 def int_ppc_altivec_vmaxub : PowerPC_Vec_BBB_Intrinsic<"vmaxub">;
 def int_ppc_altivec_vmaxuh : PowerPC_Vec_HHH_Intrinsic<"vmaxuh">;
 def int_ppc_altivec_vmaxuw : PowerPC_Vec_WWW_Intrinsic<"vmaxuw">;
+def int_ppc_altivec_vmaxud : PowerPC_Vec_DDD_Intrinsic<"vmaxud">;

 // Vector minimum.
 def int_ppc_altivec_vminfp : PowerPC_Vec_FFF_Intrinsic<"vminfp">;
 def int_ppc_altivec_vminsb : PowerPC_Vec_BBB_Intrinsic<"vminsb">;
 def int_ppc_altivec_vminsh : PowerPC_Vec_HHH_Intrinsic<"vminsh">;
 def int_ppc_altivec_vminsw : PowerPC_Vec_WWW_Intrinsic<"vminsw">;
+def int_ppc_altivec_vminsd : PowerPC_Vec_DDD_Intrinsic<"vminsd">;
 def int_ppc_altivec_vminub : PowerPC_Vec_BBB_Intrinsic<"vminub">;
 def int_ppc_altivec_vminuh : PowerPC_Vec_HHH_Intrinsic<"vminuh">;
 def int_ppc_altivec_vminuw : PowerPC_Vec_WWW_Intrinsic<"vminuw">;
+def int_ppc_altivec_vminud : PowerPC_Vec_DDD_Intrinsic<"vminud">;

 // Saturating adds.
 def int_ppc_altivec_vaddubs : PowerPC_Vec_BBB_Intrinsic<"vaddubs">;
@ -361,12 +391,18 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vmulesh : GCCBuiltin<"__builtin_altivec_vmulesh">,
          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
                    [IntrNoMem]>;
+  def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">,
+          Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                    [IntrNoMem]>;
  def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
                    [IntrNoMem]>;
  def int_ppc_altivec_vmuleuh : GCCBuiltin<"__builtin_altivec_vmuleuh">,
          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
                    [IntrNoMem]>;
+  def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">,
+          Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                    [IntrNoMem]>;

  def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@ -374,12 +410,21 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vmulosh : GCCBuiltin<"__builtin_altivec_vmulosh">,
          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
                    [IntrNoMem]>;
+  def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">,
+          Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                    [IntrNoMem]>;
  def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
                    [IntrNoMem]>;
  def int_ppc_altivec_vmulouh : GCCBuiltin<"__builtin_altivec_vmulouh">,
          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
                    [IntrNoMem]>;
+  def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">,
+          Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmuluwm : GCCBuiltin<"__builtin_altivec_vmuluwm">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                    [IntrNoMem]>;

  // Vector Sum Intructions.
  def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,
@ -473,6 +518,7 @@ def int_ppc_altivec_vslo  : PowerPC_Vec_WWW_Intrinsic<"vslo">;
 def int_ppc_altivec_vslb  : PowerPC_Vec_BBB_Intrinsic<"vslb">;
 def int_ppc_altivec_vslh  : PowerPC_Vec_HHH_Intrinsic<"vslh">;
 def int_ppc_altivec_vslw  : PowerPC_Vec_WWW_Intrinsic<"vslw">;
+def int_ppc_altivec_vsld  : PowerPC_Vec_DDD_Intrinsic<"vsld">;

 // Right Shifts.
 def int_ppc_altivec_vsr   : PowerPC_Vec_WWW_Intrinsic<"vsr">;
@ -481,14 +527,17 @@ def int_ppc_altivec_vsro  : PowerPC_Vec_WWW_Intrinsic<"vsro">;
 def int_ppc_altivec_vsrb  : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
 def int_ppc_altivec_vsrh  : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
 def int_ppc_altivec_vsrw  : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
+def int_ppc_altivec_vsrd  : PowerPC_Vec_DDD_Intrinsic<"vsrd">;
 def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">;
 def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">;
 def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
+def int_ppc_altivec_vsrad : PowerPC_Vec_DDD_Intrinsic<"vsrad">;

 // Rotates.
 def int_ppc_altivec_vrlb  : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
 def int_ppc_altivec_vrlh  : PowerPC_Vec_HHH_Intrinsic<"vrlh">;
 def int_ppc_altivec_vrlw  : PowerPC_Vec_WWW_Intrinsic<"vrlw">;
+def int_ppc_altivec_vrld  : PowerPC_Vec_DDD_Intrinsic<"vrld">;

 let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
  // Miscellaneous.
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -2105,7 +2105,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {

 // getVCmpInst: return the vector compare instruction for the specified
 // vector type and condition code. Since this is for altivec specific code,
-// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
+// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
                                bool HasVSX, bool &Swap, bool &Negate) {
  Swap = false;
@ -2184,6 +2184,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
          return PPC::VCMPEQUH;
        else if (VecVT == MVT::v4i32)
          return PPC::VCMPEQUW;
+        else if (VecVT == MVT::v2i64)
+          return PPC::VCMPEQUD;
        break;
      case ISD::SETGT:
        if (VecVT == MVT::v16i8)
@ -2192,6 +2194,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
          return PPC::VCMPGTSH;
        else if (VecVT == MVT::v4i32)
          return PPC::VCMPGTSW;
+        else if (VecVT == MVT::v2i64)
+          return PPC::VCMPGTSD;
        break;
      case ISD::SETUGT:
        if (VecVT == MVT::v16i8)
@ -2200,6 +2204,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
          return PPC::VCMPGTUH;
        else if (VecVT == MVT::v4i32)
          return PPC::VCMPGTUW;
+        else if (VecVT == MVT::v2i64)
+          return PPC::VCMPGTUD;
        break;
      default:
        break;
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -574,15 +574,20 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
      addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
      addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);

-      // VSX v2i64 only supports non-arithmetic operations.
-      setOperationAction(ISD::ADD, MVT::v2i64, Expand);
-      setOperationAction(ISD::SUB, MVT::v2i64, Expand);
-
      setOperationAction(ISD::SHL, MVT::v2i64, Expand);
      setOperationAction(ISD::SRA, MVT::v2i64, Expand);
      setOperationAction(ISD::SRL, MVT::v2i64, Expand);

-      setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+      if (Subtarget.hasP8Altivec()) {
+        setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
+      }
+      else {
+        setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+        // VSX v2i64 only supports non-arithmetic operations.
+        setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+        setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+      }

      setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
      AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
@ -7027,7 +7032,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
 /// altivec comparison.  If it is, return true and fill in Opc/isDot with
 /// information about the intrinsic.
 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
-                                  bool &isDot) {
+                                  bool &isDot, const PPCSubtarget &Subtarget) {
  unsigned IntrinsicID =
    cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
  CompareOpc = -1;
@ -7040,29 +7045,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpequd_p: 
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 199; 
+      isDot = 1; 
+    }
+    else 
+      return false;
+
+    break;
  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtsd_p: 
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 967; 
+      isDot = 1; 
+    }
+    else 
+      return false;
+
+    break;
  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtud_p: 
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 711; 
+      isDot = 1; 
+    }
+    else 
+      return false;

+    break;
+      
    // Normal Comparisons.
  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpequd:
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 199; 
+      isDot = 0; 
+    }
+    else
+      return false;
+
+    break;
  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtsd:   
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 967; 
+      isDot = 0; 
+    }
+    else
+      return false;
+
+    break;
  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtud:   
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 711; 
+      isDot = 0; 
+    }
+    else
+      return false;
+
+    break;
  }
  return true;
 }
@ -7076,7 +7135,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  SDLoc dl(Op);
  int CompareOpc;
  bool isDot;
-  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+  if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
    return SDValue();    // Don't custom lower most intrinsics.

  // If this is a non-dot comparison, make the VCMP node and we are done.
@ -10166,7 +10225,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
        isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
-        getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+        getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
      assert(isDot && "Can't compare against a vector result!");

      // If this is a comparison against something other than 0/1, then we know
@ -10279,14 +10338,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
    case Intrinsic::ppc_altivec_vcmpequb_p:
    case Intrinsic::ppc_altivec_vcmpequh_p:
    case Intrinsic::ppc_altivec_vcmpequw_p:
+    case Intrinsic::ppc_altivec_vcmpequd_p:
    case Intrinsic::ppc_altivec_vcmpgefp_p:
    case Intrinsic::ppc_altivec_vcmpgtfp_p:
    case Intrinsic::ppc_altivec_vcmpgtsb_p:
    case Intrinsic::ppc_altivec_vcmpgtsh_p:
    case Intrinsic::ppc_altivec_vcmpgtsw_p:
+    case Intrinsic::ppc_altivec_vcmpgtsd_p:
    case Intrinsic::ppc_altivec_vcmpgtub_p:
    case Intrinsic::ppc_altivec_vcmpgtuh_p:
    case Intrinsic::ppc_altivec_vcmpgtuw_p:
+    case Intrinsic::ppc_altivec_vcmpgtud_p:
      KnownZero = ~1U;  // All bits but the low one are known to be zero.
      break;
    }
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@ -750,7 +750,7 @@ def VCMPGTSW  : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
 def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
 def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
 def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
-                      
+
 let isCodeGenOnly = 1 in {
 def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
                      "vxor $vD, $vD, $vD", IIC_VecFP,
@ -941,6 +941,40 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
 def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
 let Predicates = [HasP8Altivec] in {

+let isCommutable = 1 in {
+def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw,
+                          v2i64, v4i32>;
+def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw,
+                          v2i64, v4i32>;
+def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw,
+                          v2i64, v4i32>;
+def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw,
+                          v2i64, v4i32>;
+def VMULUWM : VX1_Int_Ty<137, "vmuluwm", int_ppc_altivec_vmuluwm,
+                         v4i32>;
+def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>;
+def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>;
+def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
+def VMIDUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
+} // isCommutable
+
+def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
+def VSLD : VX1_Int_Ty<1476, "vsld", int_ppc_altivec_vsld, v2i64>;
+def VSRD : VX1_Int_Ty<1732, "vsrd", int_ppc_altivec_vsrd, v2i64>;
+def VSRAD : VX1_Int_Ty<964, "vsrad", int_ppc_altivec_vsrad, v2i64>;
+
+
+// Vector Integer Arithmetic Instructions
+let isCommutable = 1 in {
+def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vaddudm $vD, $vA, $vB", IIC_VecGeneral,
+                       [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>;
+} // isCommutable
+
+def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vsubudm $vD, $vA, $vB", IIC_VecGeneral,
+                       [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>;
+
 // Count Leading Zeros
 def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB),
                     "vclzb $vD, $vB", IIC_VecGeneral,
@ -992,4 +1026,13 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                      "vorc $vD, $vA, $vB", IIC_VecGeneral,
                      [(set v4i32:$vD, (or v4i32:$vA,
                                           (vnot_ppc v4i32:$vB)))]>;
+
+// i64 element comparisons.
+def VCMPEQUD  : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
+def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD  : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
+def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD  : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
+def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+
 } // end HasP8Altivec
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@ -381,6 +381,7 @@ include "PPCScheduleE5500.td"
 //    vaddsbs    IIC_VecGeneral
 //    vaddshs    IIC_VecGeneral
 //    vaddsws    IIC_VecGeneral
+//    vaddudm    IIC_VecGeneral
 //    vaddubm    IIC_VecGeneral
 //    vaddubs    IIC_VecGeneral
 //    vadduhm    IIC_VecGeneral
@ -402,14 +403,17 @@ include "PPCScheduleE5500.td"
 //    vcmpequb   IIC_VecGeneral
 //    vcmpequh   IIC_VecGeneral
 //    vcmpequw   IIC_VecGeneral
+//    vcmpequd   IIC_VecGeneral
 //    vcmpgefp   IIC_VecFPCompare
 //    vcmpgtfp   IIC_VecFPCompare
 //    vcmpgtsb   IIC_VecGeneral
 //    vcmpgtsh   IIC_VecGeneral
 //    vcmpgtsw   IIC_VecGeneral
+//    vcmpgtsd   IIC_VecGeneral
 //    vcmpgtub   IIC_VecGeneral
 //    vcmpgtuh   IIC_VecGeneral
 //    vcmpgtuw   IIC_VecGeneral
+//    vcmpgtud   IIC_VecGeneral
 //    vctsxs     IIC_VecFP
 //    vctuxs     IIC_VecFP
 //    vexptefp   IIC_VecFP
@ -419,18 +423,22 @@ include "PPCScheduleE5500.td"
 //    vmaxsb     IIC_VecGeneral
 //    vmaxsh     IIC_VecGeneral
 //    vmaxsw     IIC_VecGeneral
+//    vmaxsd     IIC_VecGeneral
 //    vmaxub     IIC_VecGeneral
 //    vmaxuh     IIC_VecGeneral
 //    vmaxuw     IIC_VecGeneral
+//    vmaxud     IIC_VecGeneral
 //    vmhaddshs  IIC_VecComplex
 //    vmhraddshs IIC_VecComplex
 //    vminfp     IIC_VecFPCompare
 //    vminsb     IIC_VecGeneral
 //    vminsh     IIC_VecGeneral
 //    vminsw     IIC_VecGeneral
+//    vminsd     IIC_VecGeneral
 //    vminub     IIC_VecGeneral
 //    vminuh     IIC_VecGeneral
 //    vminuw     IIC_VecGeneral
+//    vminud     IIC_VecGeneral
 //    vmladduhm  IIC_VecComplex
 //    vmrghb     IIC_VecPerm
 //    vmrghh     IIC_VecPerm
@ -447,12 +455,17 @@ include "PPCScheduleE5500.td"
 //    vmsumuhs   IIC_VecComplex
 //    vmulesb    IIC_VecComplex
 //    vmulesh    IIC_VecComplex
+//    vmulesw    IIC_VecComplex
 //    vmuleub    IIC_VecComplex
 //    vmuleuh    IIC_VecComplex
+//    vmuleuw    IIC_VecComplex
 //    vmulosb    IIC_VecComplex
 //    vmulosh    IIC_VecComplex
+//    vmulosw    IIC_VecComplex
 //    vmuloub    IIC_VecComplex
 //    vmulouh    IIC_VecComplex
+//    vmulouw    IIC_VecComplex
+//    vmuluwm    IIC_VecComplex
 //    vnor       IIC_VecGeneral
 //    vor        IIC_VecGeneral
 //    vperm      IIC_VecPerm
@ -501,6 +514,7 @@ include "PPCScheduleE5500.td"
 //    vsubshs    IIC_VecGeneral
 //    vsubsws    IIC_VecGeneral
 //    vsububm    IIC_VecGeneral
+//    vsubudm    IIC_VecGeneral
 //    vsububs    IIC_VecGeneral
 //    vsubuhm    IIC_VecGeneral
 //    vsubuhs    IIC_VecGeneral
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@ -209,3 +209,29 @@ vector float f(vector float a, vector float b) {
    return b; 
 }

+//===----------------------------------------------------------------------===//
+
+The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:
+
+define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
+       %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+       %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+       %result = add <2 x i64> %x, %tmpvec2
+       ret <2 x i64> %result
+
+This will generate the following instruction sequence:
+        std 5, -8(1)
+        std 5, -16(1)
+        addi 3, 1, -16
+        ori 2, 2, 0
+        lxvd2x 35, 0, 3
+        vaddudm 2, 2, 3
+        blr
+
+This will almost certainly cause a load-hit-store hazard.  
+Since val is a value parameter, it should not need to be saved onto
+the stack, unless it's being done set up the vector register. Instead,
+it would be better to splat teh value into a vector register, and then
+remove the (dead) stores to the stack.
+
+
--- a/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
+++ b/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
@ -0,0 +1,62 @@
+; Check VMX 64-bit integer operations
+;
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+define <2 x i64> @test_add(<2 x i64> %x, <2 x i64> %y) nounwind {
+       %result = add <2 x i64> %x, %y
+       ret <2 x i64> %result
+; CHECK: vaddudm 2, 2, 3
+}
+
+define <2 x i64> @increment_by_one(<2 x i64> %x) nounwind {
+       %result = add <2 x i64> %x, <i64 1, i64 1>
+       ret <2 x i64> %result
+; CHECK vaddudm 2, 2, 3
+}
+
+define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
+       %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+       %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+       %result = add <2 x i64> %x, %tmpvec2
+       ret <2 x i64> %result
+; CHECK: vaddudm 2, 2, 3
+; FIXME: This is currently generating the following instruction sequence
+;
+;        std 5, -8(1)
+;        std 5, -16(1)
+;        addi 3, 1, -16
+;        ori 2, 2, 0
+;        lxvd2x 35, 0, 3
+;        vaddudm 2, 2, 3
+;        blr
+;        
+;        This will almost certainly cause a load-hit-store hazard.
+;        Since val is a value parameter, it should not need to be
+;        saved onto the stack at all (unless we're using this to set
+;        up the vector register). Instead, it would be better to splat
+;        the value into a vector register.
+}
+
+define <2 x i64> @test_sub(<2 x i64> %x, <2 x i64> %y) nounwind {
+       %result = sub <2 x i64> %x, %y
+       ret <2 x i64> %result
+; CHECK: vsubudm 2, 2, 3
+}
+
+define <2 x i64> @decrement_by_one(<2 x i64> %x) nounwind {
+       %result = sub <2 x i64> %x, <i64 -1, i64 -1>
+       ret <2 x i64> %result
+; CHECK vsubudm 2, 2, 3
+}
+
+define <2 x i64> @decrement_by_val(<2 x i64> %x, i64 %val) nounwind {
+       %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+       %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+       %result = sub <2 x i64> %x, %tmpvec2
+       ret <2 x i64> %result
+; CHECK vsubudm 2, 2, 3
+}
+
+
+
--- a/test/CodeGen/PowerPC/vec_cmpd.ll
+++ b/test/CodeGen/PowerPC/vec_cmpd.ll
@ -0,0 +1,258 @@
+; Test the doubleword comparison instructions that were added in POWER8
+;
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+define <2 x i64> @v2si64_cmp(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+       %cmp = icmp eq <2 x i64> %x, %y
+       %result = sext <2 x i1> %cmp to <2 x i64>
+       ret <2 x i64> %result
+; CHECK-LABEL: v2si64_cmp:
+; CHECK: vcmpequd 2, 2, 3
+}
+
+define <4 x i64> @v4si64_cmp(<4 x i64> %x, <4 x i64> %y) nounwind readnone {
+       %cmp = icmp eq <4 x i64> %x, %y
+       %result = sext <4 x i1> %cmp to <4 x i64>
+       ret <4 x i64> %result
+; CHECK-LABEL: v4si64_cmp
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <8 x i64> @v8si64_cmp(<8 x i64> %x, <8 x i64> %y) nounwind readnone {
+       %cmp = icmp eq <8 x i64> %x, %y
+       %result = sext <8 x i1> %cmp to <8 x i64>
+       ret <8 x i64> %result
+; CHECK-LABEL: v8si64_cmp
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <16 x i64> @v16si64_cmp(<16 x i64> %x, <16 x i64> %y) nounwind readnone {
+       %cmp = icmp eq <16 x i64> %x, %y
+       %result = sext <16 x i1> %cmp to <16 x i64>
+       ret <16 x i64> %result
+; CHECK-LABEL: v16si64_cmp
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <32 x i64> @v32si64_cmp(<32 x i64> %x, <32 x i64> %y) nounwind readnone {
+       %cmp = icmp eq <32 x i64> %x, %y
+       %result = sext <32 x i1> %cmp to <32 x i64>
+       ret <32 x i64> %result
+; CHECK-LABEL: v32si64_cmp
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Greater than signed
+define <2 x i64> @v2si64_cmp_gt(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+       %cmp = icmp sgt <2 x i64> %x, %y
+       %result = sext <2 x i1> %cmp to <2 x i64>
+       ret <2 x i64> %result
+; CHECK-LABEL: v2si64_cmp_gt
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <4 x i64> @v4si64_cmp_gt(<4 x i64> %x, <4 x i64> %y) nounwind readnone {
+       %cmp = icmp sgt <4 x i64> %x, %y
+       %result = sext <4 x i1> %cmp to <4 x i64>
+       ret <4 x i64> %result
+; CHECK-LABEL: v4si64_cmp_gt
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <8 x i64> @v8si64_cmp_gt(<8 x i64> %x, <8 x i64> %y) nounwind readnone {
+       %cmp = icmp sgt <8 x i64> %x, %y
+       %result = sext <8 x i1> %cmp to <8 x i64>
+       ret <8 x i64> %result
+; CHECK-LABEL: v8si64_cmp_gt
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <16 x i64> @v16si64_cmp_gt(<16 x i64> %x, <16 x i64> %y) nounwind readnone {
+       %cmp = icmp sgt <16 x i64> %x, %y
+       %result = sext <16 x i1> %cmp to <16 x i64>
+       ret <16 x i64> %result
+; CHECK-LABEL: v16si64_cmp_gt
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <32 x i64> @v32si64_cmp_gt(<32 x i64> %x, <32 x i64> %y) nounwind readnone {
+       %cmp = icmp sgt <32 x i64> %x, %y
+       %result = sext <32 x i1> %cmp to <32 x i64>
+       ret <32 x i64> %result
+; CHECK-LABEL: v32si64_cmp_gt
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Greater than unsigned
+define <2 x i64> @v2ui64_cmp_gt(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+       %cmp = icmp ugt <2 x i64> %x, %y
+       %result = sext <2 x i1> %cmp to <2 x i64>
+       ret <2 x i64> %result
+; CHECK-LABEL: v2ui64_cmp_gt
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <4 x i64> @v4ui64_cmp_gt(<4 x i64> %x, <4 x i64> %y) nounwind readnone {
+       %cmp = icmp ugt <4 x i64> %x, %y
+       %result = sext <4 x i1> %cmp to <4 x i64>
+       ret <4 x i64> %result
+; CHECK-LABEL: v4ui64_cmp_gt
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <8 x i64> @v8ui64_cmp_gt(<8 x i64> %x, <8 x i64> %y) nounwind readnone {
+       %cmp = icmp ugt <8 x i64> %x, %y
+       %result = sext <8 x i1> %cmp to <8 x i64>
+       ret <8 x i64> %result
+; CHECK-LABEL: v8ui64_cmp_gt
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <16 x i64> @v16ui64_cmp_gt(<16 x i64> %x, <16 x i64> %y) nounwind readnone {
+       %cmp = icmp ugt <16 x i64> %x, %y
+       %result = sext <16 x i1> %cmp to <16 x i64>
+       ret <16 x i64> %result
+; CHECK-LABEL: v16ui64_cmp_gt
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <32 x i64> @v32ui64_cmp_gt(<32 x i64> %x, <32 x i64> %y) nounwind readnone {
+       %cmp = icmp ugt <32 x i64> %x, %y
+       %result = sext <32 x i1> %cmp to <32 x i64>
+       ret <32 x i64> %result
+; CHECK-LABEL: v32ui64_cmp_gt
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Check the intrinsics also
+declare <2 x i64> @llvm.ppc.altivec.vcmpequd(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.ppc.altivec.vcmpequd.p(i32, <2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.ppc.altivec.vcmpgtsd.p(i32, <2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.ppc.altivec.vcmpgtud.p(i32, <2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_vcmpequd(<2 x i64> %x, <2 x i64> %y) {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpequd(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK-LABEL: test_vcmpequd:
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define i32 @test_vcmpequd_p(<2 x i64> %x, <2 x i64> %y) {
+       %tmp = tail call i32 @llvm.ppc.altivec.vcmpequd.p(i32 2, <2 x i64> %x, <2 x i64> %y)
+       ret i32 %tmp
+; CHECK-LABEL: test_vcmpequd_p:
+; CHECK: vcmpequd. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <2 x i64> @test_vcmpgtsd(<2 x i64> %x, <2 x i64> %y) {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK-LABEL: test_vcmpgtsd
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define i32 @test_vcmpgtsd_p(<2 x i64> %x, <2 x i64> %y) {
+       %tmp = tail call i32 @llvm.ppc.altivec.vcmpgtsd.p(i32 2, <2 x i64> %x, <2 x i64> %y)
+       ret i32 %tmp
+; CHECK-LABEL: test_vcmpgtsd_p
+; CHECK: vcmpgtsd. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <2 x i64> @test_vcmpgtud(<2 x i64> %x, <2 x i64> %y) {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK-LABEL: test_vcmpgtud
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define i32 @test_vcmpgtud_p(<2 x i64> %x, <2 x i64> %y) {
+       %tmp = tail call i32 @llvm.ppc.altivec.vcmpgtud.p(i32 2, <2 x i64> %x, <2 x i64> %y)
+       ret i32 %tmp
+; CHECK-LABEL: test_vcmpgtud_p
+; CHECK: vcmpgtud. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+
+
+
--- a/test/CodeGen/PowerPC/vec_minmax.ll
+++ b/test/CodeGen/PowerPC/vec_minmax.ll
@ -0,0 +1,34 @@
+; Test the vector min/max doubleword instructions added for P8
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+declare <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_vmaxsd(<2 x i64> %x, <2 x i64> %y) {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK: vmaxsd 2, 2, 3
+}
+
+define <2 x i64> @test_vmaxud(<2 x i64> %x, <2 x i64> %y) {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK: vmaxud 2, 2, 3
+}
+
+define <2 x i64> @test_vminsd(<2 x i64> %x, <2 x i64> %y) {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK: vminsd 2, 2, 3
+}
+
+define <2 x i64> @test_vminud(<2 x i64> %x, <2 x i64> %y) {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK: vminud 2, 2, 3
+}
+
+
--- a/test/CodeGen/PowerPC/vec_mul_even_odd.ll
+++ b/test/CodeGen/PowerPC/vec_mul_even_odd.ll
@ -0,0 +1,41 @@
+; Check the vector multiply even/odd word instructions that were added in P8
+;
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+declare <2 x i64> @llvm.ppc.altivec.vmuleuw(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vmulesw(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vmulouw(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vmulosw(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.vmuluwm(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_vmuleuw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmuleuw(<4 x i32> %x, <4 x i32> %y)
+       ret <2 x i64> %tmp
+; CHECK: vmuleuw 2, 2, 3
+}
+
+define <2 x i64> @test_vmulesw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulesw(<4 x i32> %x, <4 x i32> %y)
+       ret <2 x i64> %tmp
+; CHECK: vmulesw 2, 2, 3
+}
+
+define <2 x i64> @test_vmulouw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulouw(<4 x i32> %x, <4 x i32> %y)
+       ret <2 x i64> %tmp
+; CHECK: vmulouw 2, 2, 3
+}
+
+define <2 x i64> @test_vmulosw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulosw(<4 x i32> %x, <4 x i32> %y)
+       ret <2 x i64> %tmp
+; CHECK: vmulosw 2, 2, 3
+}
+
+define <4 x i32> @test_vmuluwm(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+       %tmp = tail call <4 x i32> @llvm.ppc.altivec.vmuluwm(<4 x i32> %x, <4 x i32> %y)
+       ret <4 x i32> %tmp
+; CHECK: vmuluwm 2, 2, 3
+}
+
--- a/test/CodeGen/PowerPC/vec_rotate_shift.ll
+++ b/test/CodeGen/PowerPC/vec_rotate_shift.ll
@ -0,0 +1,33 @@
+; Test the vector rotate and shift doubleword instructions that were added in P8
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+declare <2 x i64> @llvm.ppc.altivec.vrld(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vsld(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vsrd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vsrad(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_vrld(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vrld(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK: vrld 2, 2, 3
+}
+
+define <2 x i64> @test_vsld(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vsld(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK: vsld 2, 2, 3
+}
+
+define <2 x i64> @test_vsrd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vsrd(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK: vsrd 2, 2, 3
+}
+
+define <2 x i64> @test_vsrad(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+       %tmp = tail call <2 x i64> @llvm.ppc.altivec.vsrad(<2 x i64> %x, <2 x i64> %y)
+       ret <2 x i64> %tmp
+; CHECK: vsrad 2, 2, 3
+}
+       
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
@ -159,6 +159,9 @@
 # CHECK: vadduwm 2, 3, 4                 
 0x10 0x43 0x20 0x80

+# CHECK: vaddudm 2, 3, 4
+0x10 0x43 0x20 0xc0
+
 # CHECK: vaddubs 2, 3, 4                 
 0x10 0x43 0x22 0x00

@ -189,6 +192,9 @@
 # CHECK: vsubuwm 2, 3, 4                 
 0x10 0x43 0x24 0x80

+# CHECK: vsubudm 2, 3, 4
+0x10 0x43 0x24 0xc0
+
 # CHECK: vsububs 2, 3, 4                 
 0x10 0x43 0x26 0x00

@ -204,24 +210,39 @@
 # CHECK: vmulesh 2, 3, 4                 
 0x10 0x43 0x23 0x48

+# CHECK: vmulesw 2, 3, 4
+0x10 0x43 0x23 0x88
+
 # CHECK: vmuleub 2, 3, 4                 
 0x10 0x43 0x22 0x08

 # CHECK: vmuleuh 2, 3, 4                 
 0x10 0x43 0x22 0x48

+# CHECK: vmuleuw 2, 3, 4
+0x10 0x43 0x22 0x88
+
 # CHECK: vmulosb 2, 3, 4                 
 0x10 0x43 0x21 0x08

 # CHECK: vmulosh 2, 3, 4                 
 0x10 0x43 0x21 0x48

+# CHECK: vmulosw 2, 3, 4
+0x10 0x43 0x21 0x88
+
 # CHECK: vmuloub 2, 3, 4                 
 0x10 0x43 0x20 0x08

 # CHECK: vmulouh 2, 3, 4                 
 0x10 0x43 0x20 0x48

+# CHECK: vmulouw 2, 3, 4
+0x10 0x43 0x20 0x88
+
+# CHECK: vmuluwm 2, 3, 4
+0x10 0x43 0x20 0x89
+
 # CHECK: vmhaddshs 2, 3, 4, 5            
 0x10 0x43 0x21 0x60

@ -291,6 +312,9 @@
 # CHECK: vmaxsw 2, 3, 4                  
 0x10 0x43 0x21 0x82

+# CHECK: vmaxsd 2, 3, 4
+0x10 0x43 0x21 0xc2
+
 # CHECK: vmaxub 2, 3, 4                  
 0x10 0x43 0x20 0x02

@ -300,6 +324,9 @@
 # CHECK: vmaxuw 2, 3, 4                  
 0x10 0x43 0x20 0x82

+# CHECK: vmaxud 2, 3, 4
+0x10 0x43 0x20 0xc2
+
 # CHECK: vminsb 2, 3, 4                  
 0x10 0x43 0x23 0x02

@ -309,6 +336,9 @@
 # CHECK: vminsw 2, 3, 4                  
 0x10 0x43 0x23 0x82

+# CHECK: vminsd 2, 3, 4
+0x10 0x43 0x23 0xc2
+
 # CHECK: vminub 2, 3, 4                  
 0x10 0x43 0x22 0x02

@ -318,6 +348,9 @@
 # CHECK: vminuw 2, 3, 4                  
 0x10 0x43 0x22 0x82

+# CHECK: vminud 2, 3, 4
+0x10 0x43 0x22 0xc2
+
 # CHECK: vcmpequb 2, 3, 4                
 0x10 0x43 0x20 0x06

@ -336,6 +369,12 @@
 # CHECK: vcmpequw. 2, 3, 4               
 0x10 0x43 0x24 0x86

+# CHECK: vcmpequd 2, 3, 4
+0x10 0x43 0x20 0xc7
+
+# CHECK: vcmpequd. 2, 3, 4
+0x10 0x43 0x24 0xc7
+
 # CHECK: vcmpgtsb 2, 3, 4                
 0x10 0x43 0x23 0x06

@ -354,6 +393,12 @@
 # CHECK: vcmpgtsw. 2, 3, 4               
 0x10 0x43 0x27 0x86

+# CHECK: vcmpgtsd 2, 3, 4
+0x10 0x43 0x23 0xc7
+
+# CHECK: vcmpgtsd. 2, 3, 4
+0x10 0x43 0x27 0xc7
+
 # CHECK: vcmpgtub 2, 3, 4                
 0x10 0x43 0x22 0x06

@ -372,6 +417,12 @@
 # CHECK: vcmpgtuw. 2, 3, 4               
 0x10 0x43 0x26 0x86

+# CHECK: vcmpgtud 2, 3, 4
+0x10 0x43 0x22 0xc7
+
+# CHECK: vcmpgtud. 2, 3, 4
+0x10 0x43 0x26 0xc7
+
 # CHECK: vand 2, 3, 4                    
 0x10 0x43 0x24 0x04

@ -414,6 +465,9 @@
 # CHECK: vslw 2, 3, 4                    
 0x10 0x43 0x21 0x84

+# CHECK: vrld 2, 3, 4
+0x10 0x43 0x20 0xc4
+
 # CHECK: vsrb 2, 3, 4                    
 0x10 0x43 0x22 0x04

@ -423,6 +477,9 @@
 # CHECK: vsrw 2, 3, 4                    
 0x10 0x43 0x22 0x84

+# CHECK: vsrd 2, 3, 4
+0x10 0x43 0x26 0xc4
+
 # CHECK: vsrab 2, 3, 4                   
 0x10 0x43 0x23 0x04

@ -432,6 +489,9 @@
 # CHECK: vsraw 2, 3, 4                   
 0x10 0x43 0x23 0x84

+# CHECK: vsrad 2, 3, 4
+0x10 0x43 0x23 0xc4
+
 # CHECK: vaddfp 2, 3, 4                  
 0x10 0x43 0x20 0x0a

--- a/test/MC/PowerPC/ppc64-encoding-vmx.s
+++ b/test/MC/PowerPC/ppc64-encoding-vmx.s
@ -176,6 +176,9 @@
 # CHECK-BE: vadduwm 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x80]
 # CHECK-LE: vadduwm 2, 3, 4                 # encoding: [0x80,0x20,0x43,0x10]
            vadduwm 2, 3, 4
+# CHECK-BE: vaddudm 2, 3, 4                 # encoding: [0x10,0x43,0x20,0xc0]
+# CHECK-LE: vaddudm 2, 3, 4                 # encoding: [0xc0,0x20,0x43,0x10]
+            vaddudm 2, 3, 4
 # CHECK-BE: vaddubs 2, 3, 4                 # encoding: [0x10,0x43,0x22,0x00]
 # CHECK-LE: vaddubs 2, 3, 4                 # encoding: [0x00,0x22,0x43,0x10]
            vaddubs 2, 3, 4
@ -207,6 +210,9 @@
 # CHECK-BE: vsubuwm 2, 3, 4                 # encoding: [0x10,0x43,0x24,0x80]
 # CHECK-LE: vsubuwm 2, 3, 4                 # encoding: [0x80,0x24,0x43,0x10]
            vsubuwm 2, 3, 4
+# CHECK-BE: vsubudm 2, 3, 4                 # encoding: [0x10,0x43,0x24,0xc0]
+# CHECK-LE: vsubudm 2, 3, 4                 # encoding: [0xc0,0x24,0x43,0x10]
+            vsubudm 2, 3, 4
 # CHECK-BE: vsububs 2, 3, 4                 # encoding: [0x10,0x43,0x26,0x00]
 # CHECK-LE: vsububs 2, 3, 4                 # encoding: [0x00,0x26,0x43,0x10]
            vsububs 2, 3, 4
@ -223,24 +229,39 @@
 # CHECK-BE: vmulesh 2, 3, 4                 # encoding: [0x10,0x43,0x23,0x48]
 # CHECK-LE: vmulesh 2, 3, 4                 # encoding: [0x48,0x23,0x43,0x10]
            vmulesh 2, 3, 4
+# CHECK-BE: vmulesw 2, 3, 4                 # encoding: [0x10,0x43,0x23,0x88]
+# CHECK-LE: vmulesw 2, 3, 4                 # encoding: [0x88,0x23,0x43,0x10]
+            vmulesw 2, 3, 4
 # CHECK-BE: vmuleub 2, 3, 4                 # encoding: [0x10,0x43,0x22,0x08]
 # CHECK-LE: vmuleub 2, 3, 4                 # encoding: [0x08,0x22,0x43,0x10]
            vmuleub 2, 3, 4
 # CHECK-BE: vmuleuh 2, 3, 4                 # encoding: [0x10,0x43,0x22,0x48]
 # CHECK-LE: vmuleuh 2, 3, 4                 # encoding: [0x48,0x22,0x43,0x10]
            vmuleuh 2, 3, 4
+# CHECK-BE: vmuleuw 2, 3, 4                 # encoding: [0x10,0x43,0x22,0x88]
+# CHECK-LE: vmuleuw 2, 3, 4                 # encoding: [0x88,0x22,0x43,0x10]
+            vmuleuw 2, 3, 4
 # CHECK-BE: vmulosb 2, 3, 4                 # encoding: [0x10,0x43,0x21,0x08]
 # CHECK-LE: vmulosb 2, 3, 4                 # encoding: [0x08,0x21,0x43,0x10]
            vmulosb 2, 3, 4
 # CHECK-BE: vmulosh 2, 3, 4                 # encoding: [0x10,0x43,0x21,0x48]
 # CHECK-LE: vmulosh 2, 3, 4                 # encoding: [0x48,0x21,0x43,0x10]
            vmulosh 2, 3, 4
+# CHECK-BE: vmulosw 2, 3, 4                 # encoding: [0x10,0x43,0x21,0x88]
+# CHECK-LE: vmulosw 2, 3, 4                 # encoding: [0x88,0x21,0x43,0x10]
+            vmulosw 2, 3, 4
 # CHECK-BE: vmuloub 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x08]
 # CHECK-LE: vmuloub 2, 3, 4                 # encoding: [0x08,0x20,0x43,0x10]
            vmuloub 2, 3, 4
 # CHECK-BE: vmulouh 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x48]
 # CHECK-LE: vmulouh 2, 3, 4                 # encoding: [0x48,0x20,0x43,0x10]
            vmulouh 2, 3, 4
+# CHECK-BE: vmulouw 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x88]
+# CHECK-LE: vmulouw 2, 3, 4                 # encoding: [0x88,0x20,0x43,0x10]
+            vmulouw 2, 3, 4
+# CHECK-BE: vmuluwm 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x89]
+# CHECK-LE: vmuluwm 2, 3, 4                 # encoding: [0x89,0x20,0x43,0x10]
+            vmuluwm 2, 3, 4

 # CHECK-BE: vmhaddshs 2, 3, 4, 5            # encoding: [0x10,0x43,0x21,0x60]
 # CHECK-LE: vmhaddshs 2, 3, 4, 5            # encoding: [0x60,0x21,0x43,0x10]
@ -314,6 +335,9 @@
 # CHECK-BE: vmaxsw 2, 3, 4                  # encoding: [0x10,0x43,0x21,0x82]
 # CHECK-LE: vmaxsw 2, 3, 4                  # encoding: [0x82,0x21,0x43,0x10]
            vmaxsw 2, 3, 4
+# CHECK-BE: vmaxsd 2, 3, 4                  # encoding: [0x10,0x43,0x21,0xc2]
+# CHECK-LE: vmaxsd 2, 3, 4                    # encoding: [0xc2,0x21,0x43,0x10]
+            vmaxsd 2, 3, 4
 # CHECK-BE: vmaxub 2, 3, 4                  # encoding: [0x10,0x43,0x20,0x02]
 # CHECK-LE: vmaxub 2, 3, 4                  # encoding: [0x02,0x20,0x43,0x10]
            vmaxub 2, 3, 4
@ -323,7 +347,10 @@
 # CHECK-BE: vmaxuw 2, 3, 4                  # encoding: [0x10,0x43,0x20,0x82]
 # CHECK-LE: vmaxuw 2, 3, 4                  # encoding: [0x82,0x20,0x43,0x10]
            vmaxuw 2, 3, 4
-
+# CHECK-BE: vmaxud 2, 3, 4                  # encoding: [0x10,0x43,0x20,0xc2]
+# CHECK-LE: vmaxud 2, 3, 4                  # encoding: [0xc2,0x20,0x43,0x10]
+            vmaxud 2, 3, 4
+        
 # CHECK-BE: vminsb 2, 3, 4                  # encoding: [0x10,0x43,0x23,0x02]
 # CHECK-LE: vminsb 2, 3, 4                  # encoding: [0x02,0x23,0x43,0x10]
            vminsb 2, 3, 4
@ -333,6 +360,9 @@
 # CHECK-BE: vminsw 2, 3, 4                  # encoding: [0x10,0x43,0x23,0x82]
 # CHECK-LE: vminsw 2, 3, 4                  # encoding: [0x82,0x23,0x43,0x10]
            vminsw 2, 3, 4
+# CHECK-BE: vminsd 2, 3, 4                  # encoding: [0x10,0x43,0x23,0xc2]
+# CHECK-LE: vminsd 2, 3, 4                  # encoding: [0xc2,0x23,0x43,0x10]
+            vminsd 2, 3, 4
 # CHECK-BE: vminub 2, 3, 4                  # encoding: [0x10,0x43,0x22,0x02]
 # CHECK-LE: vminub 2, 3, 4                  # encoding: [0x02,0x22,0x43,0x10]
            vminub 2, 3, 4
@ -342,6 +372,9 @@
 # CHECK-BE: vminuw 2, 3, 4                  # encoding: [0x10,0x43,0x22,0x82]
 # CHECK-LE: vminuw 2, 3, 4                  # encoding: [0x82,0x22,0x43,0x10]
            vminuw 2, 3, 4
+# CHECK-BE: vminud 2, 3, 4                  # encoding: [0x10,0x43,0x22,0xc2]
+# CHECK-LE: vminud 2, 3, 4                  # encoding: [0xc2,0x22,0x43,0x10]
+            vminud 2, 3, 4

 # Vector integer compare instructions

@ -363,6 +396,12 @@
 # CHECK-BE: vcmpequw. 2, 3, 4               # encoding: [0x10,0x43,0x24,0x86]
 # CHECK-LE: vcmpequw. 2, 3, 4               # encoding: [0x86,0x24,0x43,0x10]
            vcmpequw. 2, 3, 4
+# CHECK-BE: vcmpequd 2, 3, 4                # encoding: [0x10,0x43,0x20,0xc7]
+# CHECK-LE: vcmpequd 2, 3, 4                # encoding: [0xc7,0x20,0x43,0x10]
+            vcmpequd 2, 3, 4
+# CHECK-BE: vcmpequd. 2, 3, 4               # encoding: [0x10,0x43,0x24,0xc7]
+# CHECK-LE: vcmpequd. 2, 3, 4               # encoding: [0xc7,0x24,0x43,0x10]
+            vcmpequd. 2, 3, 4
 # CHECK-BE: vcmpgtsb 2, 3, 4                # encoding: [0x10,0x43,0x23,0x06]
 # CHECK-LE: vcmpgtsb 2, 3, 4                # encoding: [0x06,0x23,0x43,0x10]
            vcmpgtsb 2, 3, 4
@ -381,6 +420,12 @@
 # CHECK-BE: vcmpgtsw. 2, 3, 4               # encoding: [0x10,0x43,0x27,0x86]
 # CHECK-LE: vcmpgtsw. 2, 3, 4               # encoding: [0x86,0x27,0x43,0x10]
            vcmpgtsw. 2, 3, 4
+# CHECK-BE: vcmpgtsd 2, 3, 4                # encoding: [0x10,0x43,0x23,0xc7]
+# CHECK-LE: vcmpgtsd 2, 3, 4                # encoding: [0xc7,0x23,0x43,0x10]
+            vcmpgtsd 2, 3, 4
+# CHECK-BE: vcmpgtsd. 2, 3, 4               # encoding: [0x10,0x43,0x27,0xc7]
+# CHECK-LE: vcmpgtsd. 2, 3, 4               # encoding: [0xc7,0x27,0x43,0x10]
+            vcmpgtsd. 2, 3, 4
 # CHECK-BE: vcmpgtub 2, 3, 4                # encoding: [0x10,0x43,0x22,0x06]
 # CHECK-LE: vcmpgtub 2, 3, 4                # encoding: [0x06,0x22,0x43,0x10]
            vcmpgtub 2, 3, 4
@ -399,7 +444,13 @@
 # CHECK-BE: vcmpgtuw. 2, 3, 4               # encoding: [0x10,0x43,0x26,0x86]
 # CHECK-LE: vcmpgtuw. 2, 3, 4               # encoding: [0x86,0x26,0x43,0x10]
            vcmpgtuw. 2, 3, 4
-
+# CHECK-BE: vcmpgtud 2, 3, 4                # encoding: [0x10,0x43,0x22,0xc7]
+# CHECK-LE: vcmpgtud 2, 3, 4                # encoding: [0xc7,0x22,0x43,0x10]
+            vcmpgtud 2, 3, 4
+# CHECK-BE: vcmpgtud. 2, 3, 4               # encoding: [0x10,0x43,0x26,0xc7]
+# CHECK-LE: vcmpgtud. 2, 3, 4               # encoding: [0xc7,0x26,0x43,0x10]
+            vcmpgtud. 2, 3, 4
+        
 # Vector integer logical instructions

 # CHECK-BE: vand 2, 3, 4                    # encoding: [0x10,0x43,0x24,0x04]
@ -438,7 +489,9 @@
 # CHECK-BE: vrlw 2, 3, 4                    # encoding: [0x10,0x43,0x20,0x84]
 # CHECK-LE: vrlw 2, 3, 4                    # encoding: [0x84,0x20,0x43,0x10]
            vrlw 2, 3, 4
-
+# CHECK-BE: vrld 2, 3, 4                    # encoding: [0x10,0x43,0x20,0xc4]
+# CHECK-LE: vrld 2, 3, 4                    # encoding: [0xc4,0x20,0x43,0x10]
+            vrld 2, 3, 4
 # CHECK-BE: vslb 2, 3, 4                    # encoding: [0x10,0x43,0x21,0x04]
 # CHECK-LE: vslb 2, 3, 4                    # encoding: [0x04,0x21,0x43,0x10]
            vslb 2, 3, 4
@ -448,6 +501,9 @@
 # CHECK-BE: vslw 2, 3, 4                    # encoding: [0x10,0x43,0x21,0x84]
 # CHECK-LE: vslw 2, 3, 4                    # encoding: [0x84,0x21,0x43,0x10]
            vslw 2, 3, 4
+# CHECK-BE: vsld 2, 3, 4                    # encoding: [0x10,0x43,0x25,0xc4]
+# CHECK-LE: vsld 2, 3, 4                    # encoding: [0xc4,0x25,0x43,0x10]
+            vsld 2, 3, 4
 # CHECK-BE: vsrb 2, 3, 4                    # encoding: [0x10,0x43,0x22,0x04]
 # CHECK-LE: vsrb 2, 3, 4                    # encoding: [0x04,0x22,0x43,0x10]
            vsrb 2, 3, 4
@ -457,6 +513,9 @@
 # CHECK-BE: vsrw 2, 3, 4                    # encoding: [0x10,0x43,0x22,0x84]
 # CHECK-LE: vsrw 2, 3, 4                    # encoding: [0x84,0x22,0x43,0x10]
            vsrw 2, 3, 4
+# CHECK-BE: vsrd 2, 3, 4                    # encoding: [0x10,0x43,0x26,0xc4]
+# CHECK-LE: vsrd 2, 3, 4                    # encoding: [0xc4,0x26,0x43,0x10]
+            vsrd 2, 3, 4
 # CHECK-BE: vsrab 2, 3, 4                   # encoding: [0x10,0x43,0x23,0x04]
 # CHECK-LE: vsrab 2, 3, 4                   # encoding: [0x04,0x23,0x43,0x10]
            vsrab 2, 3, 4
@ -466,6 +525,9 @@
 # CHECK-BE: vsraw 2, 3, 4                   # encoding: [0x10,0x43,0x23,0x84]
 # CHECK-LE: vsraw 2, 3, 4                   # encoding: [0x84,0x23,0x43,0x10]
            vsraw 2, 3, 4
+# CHECK-BE: vsrad 2, 3, 4                   # encoding: [0x10,0x43,0x23,0xc4]
+# CHECK-LE: vsrad 2, 3, 4                   # encoding: [0xc4,0x23,0x43,0x10]
+            vsrad 2, 3, 4

 # Vector floating-point instructions

@ -576,16 +638,16 @@

 # CHECK-BE: vpopcnth 2, 3                   # encoding: [0x10,0x40,0x1f,0x43]
 # CHECK-LE: vpopcnth 2, 3                   # encoding: [0x43,0x1f,0x40,0x10]
-            vpopcnth 2, 3	
+            vpopcnth 2, 3

 # CHECK-BE: vpopcntw 2, 3                   # encoding: [0x10,0x40,0x1f,0x83]
 # CHECK-LE: vpopcntw 2, 3                   # encoding: [0x83,0x1f,0x40,0x10]
            vpopcntw 2, 3
-	
+        
 # BCHECK-BE: vpopcntd 2, 3                   # encoding: [0x10,0x40,0x1f,0xC3]
 # BCHECK-LE: vpopcntd 2, 3                   # encoding: [0xC3,0x1f,0x40,0x10]
 #            vpopcntd 2, 3
-	
+        
 # Vector status and control register instructions

 # CHECK-BE: mtvscr 2                        # encoding: [0x10,0x00,0x16,0x44]