1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

Merge floating point and integer UNPCK X86ISD node types.

llvm-svn: 145926
This commit is contained in:
Craig Topper 2011-12-06 08:21:25 +00:00
parent ed2be25205
commit 846d53deed
4 changed files with 104 additions and 167 deletions

View File

@ -2851,10 +2851,8 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::UNPCKLP:
case X86ISD::PUNPCKL:
case X86ISD::UNPCKHP:
case X86ISD::PUNPCKH:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VPERMILP:
case X86ISD::VPERM2X128:
return true;
@ -2914,10 +2912,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVLPD:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::UNPCKLP:
case X86ISD::PUNPCKL:
case X86ISD::UNPCKHP:
case X86ISD::PUNPCKH:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
return DAG.getNode(Opc, dl, VT, V1, V2);
}
return SDValue();
@ -4460,12 +4456,10 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
break;
case X86ISD::PUNPCKH:
case X86ISD::UNPCKHP:
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, ShuffleMask);
break;
case X86ISD::PUNPCKL:
case X86ISD::UNPCKLP:
case X86ISD::UNPCKL:
DecodeUNPCKLMask(VT, ShuffleMask);
break;
case X86ISD::MOVHLPS:
@ -6364,50 +6358,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
X86::getShuffleSHUFImmediate(SVOp), DAG);
}
static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v32i8:
case MVT::v16i8:
case MVT::v16i16:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64: return X86ISD::PUNPCKL;
case MVT::v8i32:
case MVT::v4i64:
if (HasAVX2) return X86ISD::PUNPCKL;
// else use fp unit for int unpack.
case MVT::v8f32:
case MVT::v4f32:
case MVT::v4f64:
case MVT::v2f64: return X86ISD::UNPCKLP;
default:
llvm_unreachable("Unknown type for unpckl");
}
return 0;
}
static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v32i8:
case MVT::v16i8:
case MVT::v16i16:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64: return X86ISD::PUNPCKH;
case MVT::v4i64:
case MVT::v8i32:
if (HasAVX2) return X86ISD::PUNPCKH;
// else use fp unit for int unpack.
case MVT::v8f32:
case MVT::v4f32:
case MVT::v4f64:
case MVT::v2f64: return X86ISD::UNPCKHP;
default:
llvm_unreachable("Unknown type for unpckh");
}
return 0;
}
static
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI,
@ -6518,11 +6468,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
DAG);
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
DAG);
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() &&
V2IsUndef && RelaxedMayFoldVectorLoad(V1))
@ -6534,8 +6482,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Use to match splats
if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
DAG);
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (X86::isPSHUFDMask(SVOp)) {
// The actual implementation will match the mask in the if above and then
@ -6635,12 +6582,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (isUNPCKLMask(M, VT, HasAVX2))
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2,
DAG);
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
if (isUNPCKHMask(M, VT, HasAVX2))
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2,
DAG);
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
@ -6664,12 +6609,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
if (X86::isUNPCKLMask(NewSVOp, HasAVX2))
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1,
DAG);
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG);
if (X86::isUNPCKHMask(NewSVOp, HasAVX2))
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1,
DAG);
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG);
}
// Normalize the node to match x86 shuffle ops if needed
@ -6689,8 +6632,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
if (VT == MVT::v2f64 || VT == MVT::v2i64)
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
DAG);
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
}
if (isPSHUFHWMask(M, VT))
@ -6708,11 +6650,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
X86::getShuffleSHUFImmediate(SVOp), DAG);
if (isUNPCKL_v_undef_Mask(M, VT))
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
DAG);
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
if (isUNPCKH_v_undef_Mask(M, VT))
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
DAG);
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
//===--------------------------------------------------------------------===//
// Generate target specific nodes for 128 or 256-bit shuffles only
@ -11023,10 +10963,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
case X86ISD::MOVSD: return "X86ISD::MOVSD";
case X86ISD::MOVSS: return "X86ISD::MOVSS";
case X86ISD::UNPCKLP: return "X86ISD::UNPCKLP";
case X86ISD::UNPCKHP: return "X86ISD::UNPCKHP";
case X86ISD::PUNPCKL: return "X86ISD::PUNPCKL";
case X86ISD::PUNPCKH: return "X86ISD::PUNPCKH";
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
@ -14616,10 +14554,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::SHUFPS: // Handle all target specific shuffles
case X86ISD::SHUFPD:
case X86ISD::PALIGN:
case X86ISD::PUNPCKH:
case X86ISD::UNPCKHP:
case X86ISD::PUNPCKL:
case X86ISD::UNPCKLP:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:

View File

@ -273,10 +273,8 @@ namespace llvm {
MOVLPD,
MOVSD,
MOVSS,
UNPCKLP,
UNPCKHP,
PUNPCKL,
PUNPCKH,
UNPCKL,
UNPCKH,
VPERMILP,
VPERM2X128,
VBROADCAST,

View File

@ -130,11 +130,8 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
def X86Unpcklp : SDNode<"X86ISD::UNPCKLP", SDTShuff2Op>;
def X86Unpckhp : SDNode<"X86ISD::UNPCKHP", SDTShuff2Op>;
def X86Punpckl : SDNode<"X86ISD::PUNPCKL", SDTShuff2Op>;
def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>;
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;

View File

@ -1157,11 +1157,11 @@ let Predicates = [HasAVX] in {
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
// FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
def : Pat<(v2f64 (X86Unpcklp VR128:$src1,
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
@ -1172,10 +1172,10 @@ let Predicates = [HasAVX] in {
// Store patterns
def : Pat<(store (f64 (vector_extract
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(VMOVHPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (f64 (vector_extract
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(VMOVHPDmr addr:$dst, VR128:$src)>;
}
@ -1195,16 +1195,16 @@ let Predicates = [HasSSE1] in {
// Store patterns
def : Pat<(store (f64 (vector_extract
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(MOVHPSmr addr:$dst, VR128:$src)>;
}
let Predicates = [HasSSE2] in {
// FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
def : Pat<(v2f64 (X86Unpcklp VR128:$src1,
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
@ -1215,7 +1215,7 @@ let Predicates = [HasSSE2] in {
// Store patterns
def : Pat<(store (f64 (vector_extract
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))),addr:$dst),
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst),
(MOVHPDmr addr:$dst, VR128:$src)>;
}
@ -2431,27 +2431,27 @@ let AddedComplexity = 10 in {
} // AddedComplexity
let Predicates = [HasSSE1] in {
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),
def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),
def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),
def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),
def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
(UNPCKHPSrr VR128:$src1, VR128:$src2)>;
}
let Predicates = [HasSSE2] in {
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),
def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),
def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),
def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),
def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
(UNPCKHPDrr VR128:$src1, VR128:$src2)>;
// FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
// time and the fold opportunity reappears.
@ -2464,59 +2464,43 @@ let Predicates = [HasSSE2] in {
}
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),
def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),
def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),
def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),
def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
(VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v8f32 (X86Unpcklp VR256:$src1, (memopv8f32 addr:$src2))),
def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8f32 (X86Unpcklp VR256:$src1, VR256:$src2)),
def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86Unpcklp VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86Unpcklp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8f32 (X86Unpckhp VR256:$src1, (memopv8f32 addr:$src2))),
def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8f32 (X86Unpckhp VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86Unpckhp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckhp VR256:$src1, VR256:$src2)),
def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),
def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),
def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),
def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),
def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4f64 (X86Unpcklp VR256:$src1, (memopv4f64 addr:$src2))),
def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4f64 (X86Unpcklp VR256:$src1, VR256:$src2)),
def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86Unpcklp VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpcklp VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4f64 (X86Unpckhp VR256:$src1, (memopv4f64 addr:$src2))),
def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4f64 (X86Unpckhp VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86Unpckhp VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckhp VR256:$src1, VR256:$src2)),
def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
// FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
// time and the fold opportunity reappears.
@ -4199,66 +4183,88 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
}
let Predicates = [HasAVX] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpckl,
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
bc_v16i8, 0>, VEX_4V;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpckl,
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
bc_v8i16, 0>, VEX_4V;
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckl,
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
bc_v4i32, 0>, VEX_4V;
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpckl,
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
bc_v2i64, 0>, VEX_4V;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckh,
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
bc_v16i8, 0>, VEX_4V;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckh,
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
bc_v8i16, 0>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckh,
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
bc_v4i32, 0>, VEX_4V;
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckh,
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
bc_v2i64, 0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpckl,
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
bc_v32i8>, VEX_4V;
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpckl,
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
bc_v16i16>, VEX_4V;
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckl,
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
bc_v8i32>, VEX_4V;
defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpckl,
defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
bc_v4i64>, VEX_4V;
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckh,
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
bc_v32i8>, VEX_4V;
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckh,
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
bc_v16i16>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckh,
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
bc_v8i32>, VEX_4V;
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckh,
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
bc_v4i64>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpckl,
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
bc_v16i8>;
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpckl,
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
bc_v8i16>;
defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckl,
defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
bc_v4i32>;
defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpckl,
defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
bc_v2i64>;
defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckh,
defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
bc_v16i8>;
defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckh,
defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
bc_v8i16>;
defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckh,
defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
bc_v4i32>;
defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckh,
defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
bc_v2i64>;
}
} // ExeDomain = SSEPackedInt
// Patterns for using AVX1 instructions with integer vectors
// Here to give AVX2 priority
let Predicates = [HasAVX] in {
def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
}
// Splat v2f64 / v2i64
let AddedComplexity = 10 in {
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),