1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

Add the following 64-bit vector integer arithmetic instructions added in POWER8:

vaddudm
vsubudm
vmulesw
vmulosw
vmuleuw
vmulouw
vmuluwm
vmaxsd
vmaxud
vminsd
vminud
vcmpequd
vcmpequd.
vcmpgtsd
vcmpgtsd.
vcmpgtud
vcmpgtud.
vrld
vsld
vsrd
vsrad

Phabricator review: http://reviews.llvm.org/D7959

llvm-svn: 231115
This commit is contained in:
Kit Barton 2015-03-03 19:55:45 +00:00
parent 0dc6751111
commit 2e98937142
13 changed files with 768 additions and 18 deletions

View File

@ -94,6 +94,12 @@ class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix>
[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
/// PowerPC_Vec_DDD_Intrinsic - A PowerPC intrinsic that takes two v2i64
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_Vec_DDD_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
//===----------------------------------------------------------------------===//
// PowerPC VSX Intrinsic Class Definitions.
@ -198,7 +204,17 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vcmpgtfp : GCCBuiltin<"__builtin_altivec_vcmpgtfp">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequd : GCCBuiltin<"__builtin_altivec_vcmpequd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtsd : GCCBuiltin<"__builtin_altivec_vcmpgtsd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtud : GCCBuiltin<"__builtin_altivec_vcmpgtud">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
@ -242,7 +258,17 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vcmpgtfp_p : GCCBuiltin<"__builtin_altivec_vcmpgtfp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequd_p : GCCBuiltin<"__builtin_altivec_vcmpequd_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtsd_p : GCCBuiltin<"__builtin_altivec_vcmpgtsd_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpgtud_p : GCCBuiltin<"__builtin_altivec_vcmpgtud_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
[IntrNoMem]>;
def int_ppc_altivec_vcmpequw_p : GCCBuiltin<"__builtin_altivec_vcmpequw_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
[IntrNoMem]>;
@ -287,18 +313,22 @@ def int_ppc_altivec_vmaxfp : PowerPC_Vec_FFF_Intrinsic<"vmaxfp">;
def int_ppc_altivec_vmaxsb : PowerPC_Vec_BBB_Intrinsic<"vmaxsb">;
def int_ppc_altivec_vmaxsh : PowerPC_Vec_HHH_Intrinsic<"vmaxsh">;
def int_ppc_altivec_vmaxsw : PowerPC_Vec_WWW_Intrinsic<"vmaxsw">;
def int_ppc_altivec_vmaxsd : PowerPC_Vec_DDD_Intrinsic<"vmaxsd">;
def int_ppc_altivec_vmaxub : PowerPC_Vec_BBB_Intrinsic<"vmaxub">;
def int_ppc_altivec_vmaxuh : PowerPC_Vec_HHH_Intrinsic<"vmaxuh">;
def int_ppc_altivec_vmaxuw : PowerPC_Vec_WWW_Intrinsic<"vmaxuw">;
def int_ppc_altivec_vmaxud : PowerPC_Vec_DDD_Intrinsic<"vmaxud">;
// Vector minimum.
def int_ppc_altivec_vminfp : PowerPC_Vec_FFF_Intrinsic<"vminfp">;
def int_ppc_altivec_vminsb : PowerPC_Vec_BBB_Intrinsic<"vminsb">;
def int_ppc_altivec_vminsh : PowerPC_Vec_HHH_Intrinsic<"vminsh">;
def int_ppc_altivec_vminsw : PowerPC_Vec_WWW_Intrinsic<"vminsw">;
def int_ppc_altivec_vminsd : PowerPC_Vec_DDD_Intrinsic<"vminsd">;
def int_ppc_altivec_vminub : PowerPC_Vec_BBB_Intrinsic<"vminub">;
def int_ppc_altivec_vminuh : PowerPC_Vec_HHH_Intrinsic<"vminuh">;
def int_ppc_altivec_vminuw : PowerPC_Vec_WWW_Intrinsic<"vminuw">;
def int_ppc_altivec_vminud : PowerPC_Vec_DDD_Intrinsic<"vminud">;
// Saturating adds.
def int_ppc_altivec_vaddubs : PowerPC_Vec_BBB_Intrinsic<"vaddubs">;
@ -361,12 +391,18 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vmulesh : GCCBuiltin<"__builtin_altivec_vmulesh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuleuh : GCCBuiltin<"__builtin_altivec_vmuleuh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@ -374,12 +410,21 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vmulosh : GCCBuiltin<"__builtin_altivec_vmulosh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulouh : GCCBuiltin<"__builtin_altivec_vmulouh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuluwm : GCCBuiltin<"__builtin_altivec_vmuluwm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
// Vector Sum Intructions.
def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,
@ -473,6 +518,7 @@ def int_ppc_altivec_vslo : PowerPC_Vec_WWW_Intrinsic<"vslo">;
def int_ppc_altivec_vslb : PowerPC_Vec_BBB_Intrinsic<"vslb">;
def int_ppc_altivec_vslh : PowerPC_Vec_HHH_Intrinsic<"vslh">;
def int_ppc_altivec_vslw : PowerPC_Vec_WWW_Intrinsic<"vslw">;
def int_ppc_altivec_vsld : PowerPC_Vec_DDD_Intrinsic<"vsld">;
// Right Shifts.
def int_ppc_altivec_vsr : PowerPC_Vec_WWW_Intrinsic<"vsr">;
@ -481,14 +527,17 @@ def int_ppc_altivec_vsro : PowerPC_Vec_WWW_Intrinsic<"vsro">;
def int_ppc_altivec_vsrb : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
def int_ppc_altivec_vsrh : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
def int_ppc_altivec_vsrw : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
def int_ppc_altivec_vsrd : PowerPC_Vec_DDD_Intrinsic<"vsrd">;
def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">;
def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">;
def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
def int_ppc_altivec_vsrad : PowerPC_Vec_DDD_Intrinsic<"vsrad">;
// Rotates.
def int_ppc_altivec_vrlb : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
def int_ppc_altivec_vrlh : PowerPC_Vec_HHH_Intrinsic<"vrlh">;
def int_ppc_altivec_vrlw : PowerPC_Vec_WWW_Intrinsic<"vrlw">;
def int_ppc_altivec_vrld : PowerPC_Vec_DDD_Intrinsic<"vrld">;
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
// Miscellaneous.

View File

@ -2105,7 +2105,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
bool HasVSX, bool &Swap, bool &Negate) {
Swap = false;
@ -2184,6 +2184,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPEQUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPEQUW;
else if (VecVT == MVT::v2i64)
return PPC::VCMPEQUD;
break;
case ISD::SETGT:
if (VecVT == MVT::v16i8)
@ -2192,6 +2194,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTSH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW;
else if (VecVT == MVT::v2i64)
return PPC::VCMPGTSD;
break;
case ISD::SETUGT:
if (VecVT == MVT::v16i8)
@ -2200,6 +2204,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTUW;
else if (VecVT == MVT::v2i64)
return PPC::VCMPGTUD;
break;
default:
break;

View File

@ -574,15 +574,20 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
// VSX v2i64 only supports non-arithmetic operations.
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
setOperationAction(ISD::SUB, MVT::v2i64, Expand);
setOperationAction(ISD::SHL, MVT::v2i64, Expand);
setOperationAction(ISD::SRA, MVT::v2i64, Expand);
setOperationAction(ISD::SRL, MVT::v2i64, Expand);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
if (Subtarget.hasP8Altivec()) {
setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
}
else {
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
// VSX v2i64 only supports non-arithmetic operations.
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
setOperationAction(ISD::SUB, MVT::v2i64, Expand);
}
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
@ -7027,7 +7032,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
/// altivec comparison. If it is, return true and fill in Opc/isDot with
/// information about the intrinsic.
static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
bool &isDot) {
bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
@ -7040,29 +7045,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequd_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 199;
isDot = 1;
}
else
return false;
break;
case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsd_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 967;
isDot = 1;
}
else
return false;
break;
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtud_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 711;
isDot = 1;
}
else
return false;
break;
// Normal Comparisons.
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequd:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 199;
isDot = 0;
}
else
return false;
break;
case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsd:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 967;
isDot = 0;
}
else
return false;
break;
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtud:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 711;
isDot = 0;
}
else
return false;
break;
}
return true;
}
@ -7076,7 +7135,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
int CompareOpc;
bool isDot;
if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
return SDValue(); // Don't custom lower most intrinsics.
// If this is a non-dot comparison, make the VCMP node and we are done.
@ -10166,7 +10225,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
assert(isDot && "Can't compare against a vector result!");
// If this is a comparison against something other than 0/1, then we know
@ -10279,14 +10338,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::ppc_altivec_vcmpequb_p:
case Intrinsic::ppc_altivec_vcmpequh_p:
case Intrinsic::ppc_altivec_vcmpequw_p:
case Intrinsic::ppc_altivec_vcmpequd_p:
case Intrinsic::ppc_altivec_vcmpgefp_p:
case Intrinsic::ppc_altivec_vcmpgtfp_p:
case Intrinsic::ppc_altivec_vcmpgtsb_p:
case Intrinsic::ppc_altivec_vcmpgtsh_p:
case Intrinsic::ppc_altivec_vcmpgtsw_p:
case Intrinsic::ppc_altivec_vcmpgtsd_p:
case Intrinsic::ppc_altivec_vcmpgtub_p:
case Intrinsic::ppc_altivec_vcmpgtuh_p:
case Intrinsic::ppc_altivec_vcmpgtuw_p:
case Intrinsic::ppc_altivec_vcmpgtud_p:
KnownZero = ~1U; // All bits but the low one are known to be zero.
break;
}

View File

@ -750,7 +750,7 @@ def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
let isCodeGenOnly = 1 in {
def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
"vxor $vD, $vD, $vD", IIC_VecFP,
@ -941,6 +941,40 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
let Predicates = [HasP8Altivec] in {
let isCommutable = 1 in {
def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw,
v2i64, v4i32>;
def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw,
v2i64, v4i32>;
def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw,
v2i64, v4i32>;
def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw,
v2i64, v4i32>;
def VMULUWM : VX1_Int_Ty<137, "vmuluwm", int_ppc_altivec_vmuluwm,
v4i32>;
def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>;
def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>;
def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
def VMIDUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
} // isCommutable
def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
def VSLD : VX1_Int_Ty<1476, "vsld", int_ppc_altivec_vsld, v2i64>;
def VSRD : VX1_Int_Ty<1732, "vsrd", int_ppc_altivec_vsrd, v2i64>;
def VSRAD : VX1_Int_Ty<964, "vsrad", int_ppc_altivec_vsrad, v2i64>;
// Vector Integer Arithmetic Instructions
let isCommutable = 1 in {
def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vaddudm $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>;
} // isCommutable
def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vsubudm $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>;
// Count Leading Zeros
def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB),
"vclzb $vD, $vB", IIC_VecGeneral,
@ -992,4 +1026,13 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vorc $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (or v4i32:$vA,
(vnot_ppc v4i32:$vB)))]>;
// i64 element comparisons.
def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
} // end HasP8Altivec

View File

@ -381,6 +381,7 @@ include "PPCScheduleE5500.td"
// vaddsbs IIC_VecGeneral
// vaddshs IIC_VecGeneral
// vaddsws IIC_VecGeneral
// vaddudm IIC_VecGeneral
// vaddubm IIC_VecGeneral
// vaddubs IIC_VecGeneral
// vadduhm IIC_VecGeneral
@ -402,14 +403,17 @@ include "PPCScheduleE5500.td"
// vcmpequb IIC_VecGeneral
// vcmpequh IIC_VecGeneral
// vcmpequw IIC_VecGeneral
// vcmpequd IIC_VecGeneral
// vcmpgefp IIC_VecFPCompare
// vcmpgtfp IIC_VecFPCompare
// vcmpgtsb IIC_VecGeneral
// vcmpgtsh IIC_VecGeneral
// vcmpgtsw IIC_VecGeneral
// vcmpgtsd IIC_VecGeneral
// vcmpgtub IIC_VecGeneral
// vcmpgtuh IIC_VecGeneral
// vcmpgtuw IIC_VecGeneral
// vcmpgtud IIC_VecGeneral
// vctsxs IIC_VecFP
// vctuxs IIC_VecFP
// vexptefp IIC_VecFP
@ -419,18 +423,22 @@ include "PPCScheduleE5500.td"
// vmaxsb IIC_VecGeneral
// vmaxsh IIC_VecGeneral
// vmaxsw IIC_VecGeneral
// vmaxsd IIC_VecGeneral
// vmaxub IIC_VecGeneral
// vmaxuh IIC_VecGeneral
// vmaxuw IIC_VecGeneral
// vmaxud IIC_VecGeneral
// vmhaddshs IIC_VecComplex
// vmhraddshs IIC_VecComplex
// vminfp IIC_VecFPCompare
// vminsb IIC_VecGeneral
// vminsh IIC_VecGeneral
// vminsw IIC_VecGeneral
// vminsd IIC_VecGeneral
// vminub IIC_VecGeneral
// vminuh IIC_VecGeneral
// vminuw IIC_VecGeneral
// vminud IIC_VecGeneral
// vmladduhm IIC_VecComplex
// vmrghb IIC_VecPerm
// vmrghh IIC_VecPerm
@ -447,12 +455,17 @@ include "PPCScheduleE5500.td"
// vmsumuhs IIC_VecComplex
// vmulesb IIC_VecComplex
// vmulesh IIC_VecComplex
// vmulesw IIC_VecComplex
// vmuleub IIC_VecComplex
// vmuleuh IIC_VecComplex
// vmuleuw IIC_VecComplex
// vmulosb IIC_VecComplex
// vmulosh IIC_VecComplex
// vmulosw IIC_VecComplex
// vmuloub IIC_VecComplex
// vmulouh IIC_VecComplex
// vmulouw IIC_VecComplex
// vmuluwm IIC_VecComplex
// vnor IIC_VecGeneral
// vor IIC_VecGeneral
// vperm IIC_VecPerm
@ -501,6 +514,7 @@ include "PPCScheduleE5500.td"
// vsubshs IIC_VecGeneral
// vsubsws IIC_VecGeneral
// vsububm IIC_VecGeneral
// vsubudm IIC_VecGeneral
// vsububs IIC_VecGeneral
// vsubuhm IIC_VecGeneral
// vsubuhs IIC_VecGeneral

View File

@ -209,3 +209,29 @@ vector float f(vector float a, vector float b) {
return b;
}
//===----------------------------------------------------------------------===//
The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:
define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
%tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
%tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
%result = add <2 x i64> %x, %tmpvec2
ret <2 x i64> %result
This will generate the following instruction sequence:
std 5, -8(1)
std 5, -16(1)
addi 3, 1, -16
ori 2, 2, 0
lxvd2x 35, 0, 3
vaddudm 2, 2, 3
blr
This will almost certainly cause a load-hit-store hazard.
Since val is a value parameter, it should not need to be saved onto
the stack, unless it's being done set up the vector register. Instead,
it would be better to splat teh value into a vector register, and then
remove the (dead) stores to the stack.

View File

@ -0,0 +1,62 @@
; Check VMX 64-bit integer operations
;
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
define <2 x i64> @test_add(<2 x i64> %x, <2 x i64> %y) nounwind {
%result = add <2 x i64> %x, %y
ret <2 x i64> %result
; CHECK: vaddudm 2, 2, 3
}
define <2 x i64> @increment_by_one(<2 x i64> %x) nounwind {
%result = add <2 x i64> %x, <i64 1, i64 1>
ret <2 x i64> %result
; CHECK vaddudm 2, 2, 3
}
define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
%tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
%tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
%result = add <2 x i64> %x, %tmpvec2
ret <2 x i64> %result
; CHECK: vaddudm 2, 2, 3
; FIXME: This is currently generating the following instruction sequence
;
; std 5, -8(1)
; std 5, -16(1)
; addi 3, 1, -16
; ori 2, 2, 0
; lxvd2x 35, 0, 3
; vaddudm 2, 2, 3
; blr
;
; This will almost certainly cause a load-hit-store hazard.
; Since val is a value parameter, it should not need to be
; saved onto the stack at all (unless we're using this to set
; up the vector register). Instead, it would be better to splat
; the value into a vector register.
}
define <2 x i64> @test_sub(<2 x i64> %x, <2 x i64> %y) nounwind {
%result = sub <2 x i64> %x, %y
ret <2 x i64> %result
; CHECK: vsubudm 2, 2, 3
}
define <2 x i64> @decrement_by_one(<2 x i64> %x) nounwind {
%result = sub <2 x i64> %x, <i64 -1, i64 -1>
ret <2 x i64> %result
; CHECK vsubudm 2, 2, 3
}
define <2 x i64> @decrement_by_val(<2 x i64> %x, i64 %val) nounwind {
%tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
%tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
%result = sub <2 x i64> %x, %tmpvec2
ret <2 x i64> %result
; CHECK vsubudm 2, 2, 3
}

View File

@ -0,0 +1,258 @@
; Test the doubleword comparison instructions that were added in POWER8
;
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
define <2 x i64> @v2si64_cmp(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
%cmp = icmp eq <2 x i64> %x, %y
%result = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %result
; CHECK-LABEL: v2si64_cmp:
; CHECK: vcmpequd 2, 2, 3
}
define <4 x i64> @v4si64_cmp(<4 x i64> %x, <4 x i64> %y) nounwind readnone {
%cmp = icmp eq <4 x i64> %x, %y
%result = sext <4 x i1> %cmp to <4 x i64>
ret <4 x i64> %result
; CHECK-LABEL: v4si64_cmp
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <8 x i64> @v8si64_cmp(<8 x i64> %x, <8 x i64> %y) nounwind readnone {
%cmp = icmp eq <8 x i64> %x, %y
%result = sext <8 x i1> %cmp to <8 x i64>
ret <8 x i64> %result
; CHECK-LABEL: v8si64_cmp
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <16 x i64> @v16si64_cmp(<16 x i64> %x, <16 x i64> %y) nounwind readnone {
%cmp = icmp eq <16 x i64> %x, %y
%result = sext <16 x i1> %cmp to <16 x i64>
ret <16 x i64> %result
; CHECK-LABEL: v16si64_cmp
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <32 x i64> @v32si64_cmp(<32 x i64> %x, <32 x i64> %y) nounwind readnone {
%cmp = icmp eq <32 x i64> %x, %y
%result = sext <32 x i1> %cmp to <32 x i64>
ret <32 x i64> %result
; CHECK-LABEL: v32si64_cmp
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
; Greater than signed
define <2 x i64> @v2si64_cmp_gt(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
%cmp = icmp sgt <2 x i64> %x, %y
%result = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %result
; CHECK-LABEL: v2si64_cmp_gt
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <4 x i64> @v4si64_cmp_gt(<4 x i64> %x, <4 x i64> %y) nounwind readnone {
%cmp = icmp sgt <4 x i64> %x, %y
%result = sext <4 x i1> %cmp to <4 x i64>
ret <4 x i64> %result
; CHECK-LABEL: v4si64_cmp_gt
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <8 x i64> @v8si64_cmp_gt(<8 x i64> %x, <8 x i64> %y) nounwind readnone {
%cmp = icmp sgt <8 x i64> %x, %y
%result = sext <8 x i1> %cmp to <8 x i64>
ret <8 x i64> %result
; CHECK-LABEL: v8si64_cmp_gt
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <16 x i64> @v16si64_cmp_gt(<16 x i64> %x, <16 x i64> %y) nounwind readnone {
%cmp = icmp sgt <16 x i64> %x, %y
%result = sext <16 x i1> %cmp to <16 x i64>
ret <16 x i64> %result
; CHECK-LABEL: v16si64_cmp_gt
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <32 x i64> @v32si64_cmp_gt(<32 x i64> %x, <32 x i64> %y) nounwind readnone {
%cmp = icmp sgt <32 x i64> %x, %y
%result = sext <32 x i1> %cmp to <32 x i64>
ret <32 x i64> %result
; CHECK-LABEL: v32si64_cmp_gt
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
; Greater than unsigned
define <2 x i64> @v2ui64_cmp_gt(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
%cmp = icmp ugt <2 x i64> %x, %y
%result = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %result
; CHECK-LABEL: v2ui64_cmp_gt
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <4 x i64> @v4ui64_cmp_gt(<4 x i64> %x, <4 x i64> %y) nounwind readnone {
%cmp = icmp ugt <4 x i64> %x, %y
%result = sext <4 x i1> %cmp to <4 x i64>
ret <4 x i64> %result
; CHECK-LABEL: v4ui64_cmp_gt
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <8 x i64> @v8ui64_cmp_gt(<8 x i64> %x, <8 x i64> %y) nounwind readnone {
%cmp = icmp ugt <8 x i64> %x, %y
%result = sext <8 x i1> %cmp to <8 x i64>
ret <8 x i64> %result
; CHECK-LABEL: v8ui64_cmp_gt
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <16 x i64> @v16ui64_cmp_gt(<16 x i64> %x, <16 x i64> %y) nounwind readnone {
%cmp = icmp ugt <16 x i64> %x, %y
%result = sext <16 x i1> %cmp to <16 x i64>
ret <16 x i64> %result
; CHECK-LABEL: v16ui64_cmp_gt
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <32 x i64> @v32ui64_cmp_gt(<32 x i64> %x, <32 x i64> %y) nounwind readnone {
%cmp = icmp ugt <32 x i64> %x, %y
%result = sext <32 x i1> %cmp to <32 x i64>
ret <32 x i64> %result
; CHECK-LABEL: v32ui64_cmp_gt
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
; Check the intrinsics also
declare <2 x i64> @llvm.ppc.altivec.vcmpequd(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.ppc.altivec.vcmpequd.p(i32, <2 x i64>, <2 x i64>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.ppc.altivec.vcmpgtsd.p(i32, <2 x i64>, <2 x i64>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.ppc.altivec.vcmpgtud.p(i32, <2 x i64>, <2 x i64>) nounwind readnone
define <2 x i64> @test_vcmpequd(<2 x i64> %x, <2 x i64> %y) {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpequd(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK-LABEL: test_vcmpequd:
; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define i32 @test_vcmpequd_p(<2 x i64> %x, <2 x i64> %y) {
%tmp = tail call i32 @llvm.ppc.altivec.vcmpequd.p(i32 2, <2 x i64> %x, <2 x i64> %y)
ret i32 %tmp
; CHECK-LABEL: test_vcmpequd_p:
; CHECK: vcmpequd. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <2 x i64> @test_vcmpgtsd(<2 x i64> %x, <2 x i64> %y) {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK-LABEL: test_vcmpgtsd
; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define i32 @test_vcmpgtsd_p(<2 x i64> %x, <2 x i64> %y) {
%tmp = tail call i32 @llvm.ppc.altivec.vcmpgtsd.p(i32 2, <2 x i64> %x, <2 x i64> %y)
ret i32 %tmp
; CHECK-LABEL: test_vcmpgtsd_p
; CHECK: vcmpgtsd. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define <2 x i64> @test_vcmpgtud(<2 x i64> %x, <2 x i64> %y) {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK-LABEL: test_vcmpgtud
; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}
define i32 @test_vcmpgtud_p(<2 x i64> %x, <2 x i64> %y) {
%tmp = tail call i32 @llvm.ppc.altivec.vcmpgtud.p(i32 2, <2 x i64> %x, <2 x i64> %y)
ret i32 %tmp
; CHECK-LABEL: test_vcmpgtud_p
; CHECK: vcmpgtud. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
}

View File

@ -0,0 +1,34 @@
; Test the vector min/max doubleword instructions added for P8
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
declare <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64>, <2 x i64>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64>, <2 x i64>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64>, <2 x i64>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64>, <2 x i64>) nounwind readnone
define <2 x i64> @test_vmaxsd(<2 x i64> %x, <2 x i64> %y) {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK: vmaxsd 2, 2, 3
}
define <2 x i64> @test_vmaxud(<2 x i64> %x, <2 x i64> %y) {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK: vmaxud 2, 2, 3
}
define <2 x i64> @test_vminsd(<2 x i64> %x, <2 x i64> %y) {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK: vminsd 2, 2, 3
}
define <2 x i64> @test_vminud(<2 x i64> %x, <2 x i64> %y) {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK: vminud 2, 2, 3
}

View File

@ -0,0 +1,41 @@
; Check the vector multiply even/odd word instructions that were added in P8
;
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
declare <2 x i64> @llvm.ppc.altivec.vmuleuw(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vmulesw(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vmulouw(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vmulosw(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.ppc.altivec.vmuluwm(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_vmuleuw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vmuleuw(<4 x i32> %x, <4 x i32> %y)
ret <2 x i64> %tmp
; CHECK: vmuleuw 2, 2, 3
}
define <2 x i64> @test_vmulesw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulesw(<4 x i32> %x, <4 x i32> %y)
ret <2 x i64> %tmp
; CHECK: vmulesw 2, 2, 3
}
define <2 x i64> @test_vmulouw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulouw(<4 x i32> %x, <4 x i32> %y)
ret <2 x i64> %tmp
; CHECK: vmulouw 2, 2, 3
}
define <2 x i64> @test_vmulosw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulosw(<4 x i32> %x, <4 x i32> %y)
ret <2 x i64> %tmp
; CHECK: vmulosw 2, 2, 3
}
define <4 x i32> @test_vmuluwm(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
%tmp = tail call <4 x i32> @llvm.ppc.altivec.vmuluwm(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %tmp
; CHECK: vmuluwm 2, 2, 3
}

View File

@ -0,0 +1,33 @@
; Test the vector rotate and shift doubleword instructions that were added in P8
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
declare <2 x i64> @llvm.ppc.altivec.vrld(<2 x i64>, <2 x i64>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vsld(<2 x i64>, <2 x i64>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vsrd(<2 x i64>, <2 x i64>) nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.vsrad(<2 x i64>, <2 x i64>) nounwind readnone
define <2 x i64> @test_vrld(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vrld(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK: vrld 2, 2, 3
}
define <2 x i64> @test_vsld(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vsld(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK: vsld 2, 2, 3
}
define <2 x i64> @test_vsrd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vsrd(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK: vsrd 2, 2, 3
}
define <2 x i64> @test_vsrad(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
%tmp = tail call <2 x i64> @llvm.ppc.altivec.vsrad(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %tmp
; CHECK: vsrad 2, 2, 3
}

View File

@ -159,6 +159,9 @@
# CHECK: vadduwm 2, 3, 4
0x10 0x43 0x20 0x80
# CHECK: vaddudm 2, 3, 4
0x10 0x43 0x20 0xc0
# CHECK: vaddubs 2, 3, 4
0x10 0x43 0x22 0x00
@ -189,6 +192,9 @@
# CHECK: vsubuwm 2, 3, 4
0x10 0x43 0x24 0x80
# CHECK: vsubudm 2, 3, 4
0x10 0x43 0x24 0xc0
# CHECK: vsububs 2, 3, 4
0x10 0x43 0x26 0x00
@ -204,24 +210,39 @@
# CHECK: vmulesh 2, 3, 4
0x10 0x43 0x23 0x48
# CHECK: vmulesw 2, 3, 4
0x10 0x43 0x23 0x88
# CHECK: vmuleub 2, 3, 4
0x10 0x43 0x22 0x08
# CHECK: vmuleuh 2, 3, 4
0x10 0x43 0x22 0x48
# CHECK: vmuleuw 2, 3, 4
0x10 0x43 0x22 0x88
# CHECK: vmulosb 2, 3, 4
0x10 0x43 0x21 0x08
# CHECK: vmulosh 2, 3, 4
0x10 0x43 0x21 0x48
# CHECK: vmulosw 2, 3, 4
0x10 0x43 0x21 0x88
# CHECK: vmuloub 2, 3, 4
0x10 0x43 0x20 0x08
# CHECK: vmulouh 2, 3, 4
0x10 0x43 0x20 0x48
# CHECK: vmulouw 2, 3, 4
0x10 0x43 0x20 0x88
# CHECK: vmuluwm 2, 3, 4
0x10 0x43 0x20 0x89
# CHECK: vmhaddshs 2, 3, 4, 5
0x10 0x43 0x21 0x60
@ -291,6 +312,9 @@
# CHECK: vmaxsw 2, 3, 4
0x10 0x43 0x21 0x82
# CHECK: vmaxsd 2, 3, 4
0x10 0x43 0x21 0xc2
# CHECK: vmaxub 2, 3, 4
0x10 0x43 0x20 0x02
@ -300,6 +324,9 @@
# CHECK: vmaxuw 2, 3, 4
0x10 0x43 0x20 0x82
# CHECK: vmaxud 2, 3, 4
0x10 0x43 0x20 0xc2
# CHECK: vminsb 2, 3, 4
0x10 0x43 0x23 0x02
@ -309,6 +336,9 @@
# CHECK: vminsw 2, 3, 4
0x10 0x43 0x23 0x82
# CHECK: vminsd 2, 3, 4
0x10 0x43 0x23 0xc2
# CHECK: vminub 2, 3, 4
0x10 0x43 0x22 0x02
@ -318,6 +348,9 @@
# CHECK: vminuw 2, 3, 4
0x10 0x43 0x22 0x82
# CHECK: vminud 2, 3, 4
0x10 0x43 0x22 0xc2
# CHECK: vcmpequb 2, 3, 4
0x10 0x43 0x20 0x06
@ -336,6 +369,12 @@
# CHECK: vcmpequw. 2, 3, 4
0x10 0x43 0x24 0x86
# CHECK: vcmpequd 2, 3, 4
0x10 0x43 0x20 0xc7
# CHECK: vcmpequd. 2, 3, 4
0x10 0x43 0x24 0xc7
# CHECK: vcmpgtsb 2, 3, 4
0x10 0x43 0x23 0x06
@ -354,6 +393,12 @@
# CHECK: vcmpgtsw. 2, 3, 4
0x10 0x43 0x27 0x86
# CHECK: vcmpgtsd 2, 3, 4
0x10 0x43 0x23 0xc7
# CHECK: vcmpgtsd. 2, 3, 4
0x10 0x43 0x27 0xc7
# CHECK: vcmpgtub 2, 3, 4
0x10 0x43 0x22 0x06
@ -372,6 +417,12 @@
# CHECK: vcmpgtuw. 2, 3, 4
0x10 0x43 0x26 0x86
# CHECK: vcmpgtud 2, 3, 4
0x10 0x43 0x22 0xc7
# CHECK: vcmpgtud. 2, 3, 4
0x10 0x43 0x26 0xc7
# CHECK: vand 2, 3, 4
0x10 0x43 0x24 0x04
@ -414,6 +465,9 @@
# CHECK: vslw 2, 3, 4
0x10 0x43 0x21 0x84
# CHECK: vrld 2, 3, 4
0x10 0x43 0x20 0xc4
# CHECK: vsrb 2, 3, 4
0x10 0x43 0x22 0x04
@ -423,6 +477,9 @@
# CHECK: vsrw 2, 3, 4
0x10 0x43 0x22 0x84
# CHECK: vsrd 2, 3, 4
0x10 0x43 0x26 0xc4
# CHECK: vsrab 2, 3, 4
0x10 0x43 0x23 0x04
@ -432,6 +489,9 @@
# CHECK: vsraw 2, 3, 4
0x10 0x43 0x23 0x84
# CHECK: vsrad 2, 3, 4
0x10 0x43 0x23 0xc4
# CHECK: vaddfp 2, 3, 4
0x10 0x43 0x20 0x0a

View File

@ -176,6 +176,9 @@
# CHECK-BE: vadduwm 2, 3, 4 # encoding: [0x10,0x43,0x20,0x80]
# CHECK-LE: vadduwm 2, 3, 4 # encoding: [0x80,0x20,0x43,0x10]
vadduwm 2, 3, 4
# CHECK-BE: vaddudm 2, 3, 4 # encoding: [0x10,0x43,0x20,0xc0]
# CHECK-LE: vaddudm 2, 3, 4 # encoding: [0xc0,0x20,0x43,0x10]
vaddudm 2, 3, 4
# CHECK-BE: vaddubs 2, 3, 4 # encoding: [0x10,0x43,0x22,0x00]
# CHECK-LE: vaddubs 2, 3, 4 # encoding: [0x00,0x22,0x43,0x10]
vaddubs 2, 3, 4
@ -207,6 +210,9 @@
# CHECK-BE: vsubuwm 2, 3, 4 # encoding: [0x10,0x43,0x24,0x80]
# CHECK-LE: vsubuwm 2, 3, 4 # encoding: [0x80,0x24,0x43,0x10]
vsubuwm 2, 3, 4
# CHECK-BE: vsubudm 2, 3, 4 # encoding: [0x10,0x43,0x24,0xc0]
# CHECK-LE: vsubudm 2, 3, 4 # encoding: [0xc0,0x24,0x43,0x10]
vsubudm 2, 3, 4
# CHECK-BE: vsububs 2, 3, 4 # encoding: [0x10,0x43,0x26,0x00]
# CHECK-LE: vsububs 2, 3, 4 # encoding: [0x00,0x26,0x43,0x10]
vsububs 2, 3, 4
@ -223,24 +229,39 @@
# CHECK-BE: vmulesh 2, 3, 4 # encoding: [0x10,0x43,0x23,0x48]
# CHECK-LE: vmulesh 2, 3, 4 # encoding: [0x48,0x23,0x43,0x10]
vmulesh 2, 3, 4
# CHECK-BE: vmulesw 2, 3, 4 # encoding: [0x10,0x43,0x23,0x88]
# CHECK-LE: vmulesw 2, 3, 4 # encoding: [0x88,0x23,0x43,0x10]
vmulesw 2, 3, 4
# CHECK-BE: vmuleub 2, 3, 4 # encoding: [0x10,0x43,0x22,0x08]
# CHECK-LE: vmuleub 2, 3, 4 # encoding: [0x08,0x22,0x43,0x10]
vmuleub 2, 3, 4
# CHECK-BE: vmuleuh 2, 3, 4 # encoding: [0x10,0x43,0x22,0x48]
# CHECK-LE: vmuleuh 2, 3, 4 # encoding: [0x48,0x22,0x43,0x10]
vmuleuh 2, 3, 4
# CHECK-BE: vmuleuw 2, 3, 4 # encoding: [0x10,0x43,0x22,0x88]
# CHECK-LE: vmuleuw 2, 3, 4 # encoding: [0x88,0x22,0x43,0x10]
vmuleuw 2, 3, 4
# CHECK-BE: vmulosb 2, 3, 4 # encoding: [0x10,0x43,0x21,0x08]
# CHECK-LE: vmulosb 2, 3, 4 # encoding: [0x08,0x21,0x43,0x10]
vmulosb 2, 3, 4
# CHECK-BE: vmulosh 2, 3, 4 # encoding: [0x10,0x43,0x21,0x48]
# CHECK-LE: vmulosh 2, 3, 4 # encoding: [0x48,0x21,0x43,0x10]
vmulosh 2, 3, 4
# CHECK-BE: vmulosw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x88]
# CHECK-LE: vmulosw 2, 3, 4 # encoding: [0x88,0x21,0x43,0x10]
vmulosw 2, 3, 4
# CHECK-BE: vmuloub 2, 3, 4 # encoding: [0x10,0x43,0x20,0x08]
# CHECK-LE: vmuloub 2, 3, 4 # encoding: [0x08,0x20,0x43,0x10]
vmuloub 2, 3, 4
# CHECK-BE: vmulouh 2, 3, 4 # encoding: [0x10,0x43,0x20,0x48]
# CHECK-LE: vmulouh 2, 3, 4 # encoding: [0x48,0x20,0x43,0x10]
vmulouh 2, 3, 4
# CHECK-BE: vmulouw 2, 3, 4 # encoding: [0x10,0x43,0x20,0x88]
# CHECK-LE: vmulouw 2, 3, 4 # encoding: [0x88,0x20,0x43,0x10]
vmulouw 2, 3, 4
# CHECK-BE: vmuluwm 2, 3, 4 # encoding: [0x10,0x43,0x20,0x89]
# CHECK-LE: vmuluwm 2, 3, 4 # encoding: [0x89,0x20,0x43,0x10]
vmuluwm 2, 3, 4
# CHECK-BE: vmhaddshs 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x60]
# CHECK-LE: vmhaddshs 2, 3, 4, 5 # encoding: [0x60,0x21,0x43,0x10]
@ -314,6 +335,9 @@
# CHECK-BE: vmaxsw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x82]
# CHECK-LE: vmaxsw 2, 3, 4 # encoding: [0x82,0x21,0x43,0x10]
vmaxsw 2, 3, 4
# CHECK-BE: vmaxsd 2, 3, 4 # encoding: [0x10,0x43,0x21,0xc2]
# CHECK-LE: vmaxsd 2, 3, 4 # encoding: [0xc2,0x21,0x43,0x10]
vmaxsd 2, 3, 4
# CHECK-BE: vmaxub 2, 3, 4 # encoding: [0x10,0x43,0x20,0x02]
# CHECK-LE: vmaxub 2, 3, 4 # encoding: [0x02,0x20,0x43,0x10]
vmaxub 2, 3, 4
@ -323,7 +347,10 @@
# CHECK-BE: vmaxuw 2, 3, 4 # encoding: [0x10,0x43,0x20,0x82]
# CHECK-LE: vmaxuw 2, 3, 4 # encoding: [0x82,0x20,0x43,0x10]
vmaxuw 2, 3, 4
# CHECK-BE: vmaxud 2, 3, 4 # encoding: [0x10,0x43,0x20,0xc2]
# CHECK-LE: vmaxud 2, 3, 4 # encoding: [0xc2,0x20,0x43,0x10]
vmaxud 2, 3, 4
# CHECK-BE: vminsb 2, 3, 4 # encoding: [0x10,0x43,0x23,0x02]
# CHECK-LE: vminsb 2, 3, 4 # encoding: [0x02,0x23,0x43,0x10]
vminsb 2, 3, 4
@ -333,6 +360,9 @@
# CHECK-BE: vminsw 2, 3, 4 # encoding: [0x10,0x43,0x23,0x82]
# CHECK-LE: vminsw 2, 3, 4 # encoding: [0x82,0x23,0x43,0x10]
vminsw 2, 3, 4
# CHECK-BE: vminsd 2, 3, 4 # encoding: [0x10,0x43,0x23,0xc2]
# CHECK-LE: vminsd 2, 3, 4 # encoding: [0xc2,0x23,0x43,0x10]
vminsd 2, 3, 4
# CHECK-BE: vminub 2, 3, 4 # encoding: [0x10,0x43,0x22,0x02]
# CHECK-LE: vminub 2, 3, 4 # encoding: [0x02,0x22,0x43,0x10]
vminub 2, 3, 4
@ -342,6 +372,9 @@
# CHECK-BE: vminuw 2, 3, 4 # encoding: [0x10,0x43,0x22,0x82]
# CHECK-LE: vminuw 2, 3, 4 # encoding: [0x82,0x22,0x43,0x10]
vminuw 2, 3, 4
# CHECK-BE: vminud 2, 3, 4 # encoding: [0x10,0x43,0x22,0xc2]
# CHECK-LE: vminud 2, 3, 4 # encoding: [0xc2,0x22,0x43,0x10]
vminud 2, 3, 4
# Vector integer compare instructions
@ -363,6 +396,12 @@
# CHECK-BE: vcmpequw. 2, 3, 4 # encoding: [0x10,0x43,0x24,0x86]
# CHECK-LE: vcmpequw. 2, 3, 4 # encoding: [0x86,0x24,0x43,0x10]
vcmpequw. 2, 3, 4
# CHECK-BE: vcmpequd 2, 3, 4 # encoding: [0x10,0x43,0x20,0xc7]
# CHECK-LE: vcmpequd 2, 3, 4 # encoding: [0xc7,0x20,0x43,0x10]
vcmpequd 2, 3, 4
# CHECK-BE: vcmpequd. 2, 3, 4 # encoding: [0x10,0x43,0x24,0xc7]
# CHECK-LE: vcmpequd. 2, 3, 4 # encoding: [0xc7,0x24,0x43,0x10]
vcmpequd. 2, 3, 4
# CHECK-BE: vcmpgtsb 2, 3, 4 # encoding: [0x10,0x43,0x23,0x06]
# CHECK-LE: vcmpgtsb 2, 3, 4 # encoding: [0x06,0x23,0x43,0x10]
vcmpgtsb 2, 3, 4
@ -381,6 +420,12 @@
# CHECK-BE: vcmpgtsw. 2, 3, 4 # encoding: [0x10,0x43,0x27,0x86]
# CHECK-LE: vcmpgtsw. 2, 3, 4 # encoding: [0x86,0x27,0x43,0x10]
vcmpgtsw. 2, 3, 4
# CHECK-BE: vcmpgtsd 2, 3, 4 # encoding: [0x10,0x43,0x23,0xc7]
# CHECK-LE: vcmpgtsd 2, 3, 4 # encoding: [0xc7,0x23,0x43,0x10]
vcmpgtsd 2, 3, 4
# CHECK-BE: vcmpgtsd. 2, 3, 4 # encoding: [0x10,0x43,0x27,0xc7]
# CHECK-LE: vcmpgtsd. 2, 3, 4 # encoding: [0xc7,0x27,0x43,0x10]
vcmpgtsd. 2, 3, 4
# CHECK-BE: vcmpgtub 2, 3, 4 # encoding: [0x10,0x43,0x22,0x06]
# CHECK-LE: vcmpgtub 2, 3, 4 # encoding: [0x06,0x22,0x43,0x10]
vcmpgtub 2, 3, 4
@ -399,7 +444,13 @@
# CHECK-BE: vcmpgtuw. 2, 3, 4 # encoding: [0x10,0x43,0x26,0x86]
# CHECK-LE: vcmpgtuw. 2, 3, 4 # encoding: [0x86,0x26,0x43,0x10]
vcmpgtuw. 2, 3, 4
# CHECK-BE: vcmpgtud 2, 3, 4 # encoding: [0x10,0x43,0x22,0xc7]
# CHECK-LE: vcmpgtud 2, 3, 4 # encoding: [0xc7,0x22,0x43,0x10]
vcmpgtud 2, 3, 4
# CHECK-BE: vcmpgtud. 2, 3, 4 # encoding: [0x10,0x43,0x26,0xc7]
# CHECK-LE: vcmpgtud. 2, 3, 4 # encoding: [0xc7,0x26,0x43,0x10]
vcmpgtud. 2, 3, 4
# Vector integer logical instructions
# CHECK-BE: vand 2, 3, 4 # encoding: [0x10,0x43,0x24,0x04]
@ -438,7 +489,9 @@
# CHECK-BE: vrlw 2, 3, 4 # encoding: [0x10,0x43,0x20,0x84]
# CHECK-LE: vrlw 2, 3, 4 # encoding: [0x84,0x20,0x43,0x10]
vrlw 2, 3, 4
# CHECK-BE: vrld 2, 3, 4 # encoding: [0x10,0x43,0x20,0xc4]
# CHECK-LE: vrld 2, 3, 4 # encoding: [0xc4,0x20,0x43,0x10]
vrld 2, 3, 4
# CHECK-BE: vslb 2, 3, 4 # encoding: [0x10,0x43,0x21,0x04]
# CHECK-LE: vslb 2, 3, 4 # encoding: [0x04,0x21,0x43,0x10]
vslb 2, 3, 4
@ -448,6 +501,9 @@
# CHECK-BE: vslw 2, 3, 4 # encoding: [0x10,0x43,0x21,0x84]
# CHECK-LE: vslw 2, 3, 4 # encoding: [0x84,0x21,0x43,0x10]
vslw 2, 3, 4
# CHECK-BE: vsld 2, 3, 4 # encoding: [0x10,0x43,0x25,0xc4]
# CHECK-LE: vsld 2, 3, 4 # encoding: [0xc4,0x25,0x43,0x10]
vsld 2, 3, 4
# CHECK-BE: vsrb 2, 3, 4 # encoding: [0x10,0x43,0x22,0x04]
# CHECK-LE: vsrb 2, 3, 4 # encoding: [0x04,0x22,0x43,0x10]
vsrb 2, 3, 4
@ -457,6 +513,9 @@
# CHECK-BE: vsrw 2, 3, 4 # encoding: [0x10,0x43,0x22,0x84]
# CHECK-LE: vsrw 2, 3, 4 # encoding: [0x84,0x22,0x43,0x10]
vsrw 2, 3, 4
# CHECK-BE: vsrd 2, 3, 4 # encoding: [0x10,0x43,0x26,0xc4]
# CHECK-LE: vsrd 2, 3, 4 # encoding: [0xc4,0x26,0x43,0x10]
vsrd 2, 3, 4
# CHECK-BE: vsrab 2, 3, 4 # encoding: [0x10,0x43,0x23,0x04]
# CHECK-LE: vsrab 2, 3, 4 # encoding: [0x04,0x23,0x43,0x10]
vsrab 2, 3, 4
@ -466,6 +525,9 @@
# CHECK-BE: vsraw 2, 3, 4 # encoding: [0x10,0x43,0x23,0x84]
# CHECK-LE: vsraw 2, 3, 4 # encoding: [0x84,0x23,0x43,0x10]
vsraw 2, 3, 4
# CHECK-BE: vsrad 2, 3, 4 # encoding: [0x10,0x43,0x23,0xc4]
# CHECK-LE: vsrad 2, 3, 4 # encoding: [0xc4,0x23,0x43,0x10]
vsrad 2, 3, 4
# Vector floating-point instructions
@ -576,16 +638,16 @@
# CHECK-BE: vpopcnth 2, 3 # encoding: [0x10,0x40,0x1f,0x43]
# CHECK-LE: vpopcnth 2, 3 # encoding: [0x43,0x1f,0x40,0x10]
vpopcnth 2, 3
vpopcnth 2, 3
# CHECK-BE: vpopcntw 2, 3 # encoding: [0x10,0x40,0x1f,0x83]
# CHECK-LE: vpopcntw 2, 3 # encoding: [0x83,0x1f,0x40,0x10]
vpopcntw 2, 3
# BCHECK-BE: vpopcntd 2, 3 # encoding: [0x10,0x40,0x1f,0xC3]
# BCHECK-LE: vpopcntd 2, 3 # encoding: [0xC3,0x1f,0x40,0x10]
# vpopcntd 2, 3
# Vector status and control register instructions
# CHECK-BE: mtvscr 2 # encoding: [0x10,0x00,0x16,0x44]