mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 03:23:01 +02:00
Merge floating point and integer UNPCK X86ISD node types.
llvm-svn: 145926
This commit is contained in:
parent
ed2be25205
commit
846d53deed
@ -2851,10 +2851,8 @@ static bool isTargetShuffle(unsigned Opcode) {
|
|||||||
case X86ISD::MOVDDUP:
|
case X86ISD::MOVDDUP:
|
||||||
case X86ISD::MOVSS:
|
case X86ISD::MOVSS:
|
||||||
case X86ISD::MOVSD:
|
case X86ISD::MOVSD:
|
||||||
case X86ISD::UNPCKLP:
|
case X86ISD::UNPCKL:
|
||||||
case X86ISD::PUNPCKL:
|
case X86ISD::UNPCKH:
|
||||||
case X86ISD::UNPCKHP:
|
|
||||||
case X86ISD::PUNPCKH:
|
|
||||||
case X86ISD::VPERMILP:
|
case X86ISD::VPERMILP:
|
||||||
case X86ISD::VPERM2X128:
|
case X86ISD::VPERM2X128:
|
||||||
return true;
|
return true;
|
||||||
@ -2914,10 +2912,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
|
|||||||
case X86ISD::MOVLPD:
|
case X86ISD::MOVLPD:
|
||||||
case X86ISD::MOVSS:
|
case X86ISD::MOVSS:
|
||||||
case X86ISD::MOVSD:
|
case X86ISD::MOVSD:
|
||||||
case X86ISD::UNPCKLP:
|
case X86ISD::UNPCKL:
|
||||||
case X86ISD::PUNPCKL:
|
case X86ISD::UNPCKH:
|
||||||
case X86ISD::UNPCKHP:
|
|
||||||
case X86ISD::PUNPCKH:
|
|
||||||
return DAG.getNode(Opc, dl, VT, V1, V2);
|
return DAG.getNode(Opc, dl, VT, V1, V2);
|
||||||
}
|
}
|
||||||
return SDValue();
|
return SDValue();
|
||||||
@ -4460,12 +4456,10 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
|
|||||||
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||||
ShuffleMask);
|
ShuffleMask);
|
||||||
break;
|
break;
|
||||||
case X86ISD::PUNPCKH:
|
case X86ISD::UNPCKH:
|
||||||
case X86ISD::UNPCKHP:
|
|
||||||
DecodeUNPCKHMask(VT, ShuffleMask);
|
DecodeUNPCKHMask(VT, ShuffleMask);
|
||||||
break;
|
break;
|
||||||
case X86ISD::PUNPCKL:
|
case X86ISD::UNPCKL:
|
||||||
case X86ISD::UNPCKLP:
|
|
||||||
DecodeUNPCKLMask(VT, ShuffleMask);
|
DecodeUNPCKLMask(VT, ShuffleMask);
|
||||||
break;
|
break;
|
||||||
case X86ISD::MOVHLPS:
|
case X86ISD::MOVHLPS:
|
||||||
@ -6364,50 +6358,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
|
|||||||
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
|
|
||||||
switch(VT.getSimpleVT().SimpleTy) {
|
|
||||||
case MVT::v32i8:
|
|
||||||
case MVT::v16i8:
|
|
||||||
case MVT::v16i16:
|
|
||||||
case MVT::v8i16:
|
|
||||||
case MVT::v4i32:
|
|
||||||
case MVT::v2i64: return X86ISD::PUNPCKL;
|
|
||||||
case MVT::v8i32:
|
|
||||||
case MVT::v4i64:
|
|
||||||
if (HasAVX2) return X86ISD::PUNPCKL;
|
|
||||||
// else use fp unit for int unpack.
|
|
||||||
case MVT::v8f32:
|
|
||||||
case MVT::v4f32:
|
|
||||||
case MVT::v4f64:
|
|
||||||
case MVT::v2f64: return X86ISD::UNPCKLP;
|
|
||||||
default:
|
|
||||||
llvm_unreachable("Unknown type for unpckl");
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
|
|
||||||
switch(VT.getSimpleVT().SimpleTy) {
|
|
||||||
case MVT::v32i8:
|
|
||||||
case MVT::v16i8:
|
|
||||||
case MVT::v16i16:
|
|
||||||
case MVT::v8i16:
|
|
||||||
case MVT::v4i32:
|
|
||||||
case MVT::v2i64: return X86ISD::PUNPCKH;
|
|
||||||
case MVT::v4i64:
|
|
||||||
case MVT::v8i32:
|
|
||||||
if (HasAVX2) return X86ISD::PUNPCKH;
|
|
||||||
// else use fp unit for int unpack.
|
|
||||||
case MVT::v8f32:
|
|
||||||
case MVT::v4f32:
|
|
||||||
case MVT::v4f64:
|
|
||||||
case MVT::v2f64: return X86ISD::UNPCKHP;
|
|
||||||
default:
|
|
||||||
llvm_unreachable("Unknown type for unpckh");
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
static
|
||||||
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
|
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
|
||||||
const TargetLowering &TLI,
|
const TargetLowering &TLI,
|
||||||
@ -6518,11 +6468,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
|
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
|
||||||
// unpckh_undef). Only use pshufd if speed is more important than size.
|
// unpckh_undef). Only use pshufd if speed is more important than size.
|
||||||
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
|
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
|
||||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
|
||||||
DAG);
|
|
||||||
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
|
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
|
||||||
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
|
||||||
DAG);
|
|
||||||
|
|
||||||
if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() &&
|
if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() &&
|
||||||
V2IsUndef && RelaxedMayFoldVectorLoad(V1))
|
V2IsUndef && RelaxedMayFoldVectorLoad(V1))
|
||||||
@ -6534,8 +6482,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
// Use to match splats
|
// Use to match splats
|
||||||
if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef &&
|
if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef &&
|
||||||
(VT == MVT::v2f64 || VT == MVT::v2i64))
|
(VT == MVT::v2f64 || VT == MVT::v2i64))
|
||||||
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
|
||||||
DAG);
|
|
||||||
|
|
||||||
if (X86::isPSHUFDMask(SVOp)) {
|
if (X86::isPSHUFDMask(SVOp)) {
|
||||||
// The actual implementation will match the mask in the if above and then
|
// The actual implementation will match the mask in the if above and then
|
||||||
@ -6635,12 +6582,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isUNPCKLMask(M, VT, HasAVX2))
|
if (isUNPCKLMask(M, VT, HasAVX2))
|
||||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2,
|
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
|
||||||
DAG);
|
|
||||||
|
|
||||||
if (isUNPCKHMask(M, VT, HasAVX2))
|
if (isUNPCKHMask(M, VT, HasAVX2))
|
||||||
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2,
|
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
|
||||||
DAG);
|
|
||||||
|
|
||||||
if (V2IsSplat) {
|
if (V2IsSplat) {
|
||||||
// Normalize mask so all entries that point to V2 points to its first
|
// Normalize mask so all entries that point to V2 points to its first
|
||||||
@ -6664,12 +6609,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
|
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
|
||||||
|
|
||||||
if (X86::isUNPCKLMask(NewSVOp, HasAVX2))
|
if (X86::isUNPCKLMask(NewSVOp, HasAVX2))
|
||||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1,
|
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG);
|
||||||
DAG);
|
|
||||||
|
|
||||||
if (X86::isUNPCKHMask(NewSVOp, HasAVX2))
|
if (X86::isUNPCKHMask(NewSVOp, HasAVX2))
|
||||||
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1,
|
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG);
|
||||||
DAG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normalize the node to match x86 shuffle ops if needed
|
// Normalize the node to match x86 shuffle ops if needed
|
||||||
@ -6689,8 +6632,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
|
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
|
||||||
SVOp->getSplatIndex() == 0 && V2IsUndef) {
|
SVOp->getSplatIndex() == 0 && V2IsUndef) {
|
||||||
if (VT == MVT::v2f64 || VT == MVT::v2i64)
|
if (VT == MVT::v2f64 || VT == MVT::v2i64)
|
||||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
|
||||||
DAG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isPSHUFHWMask(M, VT))
|
if (isPSHUFHWMask(M, VT))
|
||||||
@ -6708,11 +6650,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
||||||
|
|
||||||
if (isUNPCKL_v_undef_Mask(M, VT))
|
if (isUNPCKL_v_undef_Mask(M, VT))
|
||||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
|
||||||
DAG);
|
|
||||||
if (isUNPCKH_v_undef_Mask(M, VT))
|
if (isUNPCKH_v_undef_Mask(M, VT))
|
||||||
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
|
||||||
DAG);
|
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
// Generate target specific nodes for 128 or 256-bit shuffles only
|
// Generate target specific nodes for 128 or 256-bit shuffles only
|
||||||
@ -11023,10 +10963,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
|
case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
|
||||||
case X86ISD::MOVSD: return "X86ISD::MOVSD";
|
case X86ISD::MOVSD: return "X86ISD::MOVSD";
|
||||||
case X86ISD::MOVSS: return "X86ISD::MOVSS";
|
case X86ISD::MOVSS: return "X86ISD::MOVSS";
|
||||||
case X86ISD::UNPCKLP: return "X86ISD::UNPCKLP";
|
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
|
||||||
case X86ISD::UNPCKHP: return "X86ISD::UNPCKHP";
|
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
|
||||||
case X86ISD::PUNPCKL: return "X86ISD::PUNPCKL";
|
|
||||||
case X86ISD::PUNPCKH: return "X86ISD::PUNPCKH";
|
|
||||||
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
|
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
|
||||||
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
|
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
|
||||||
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
|
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
|
||||||
@ -14616,10 +14554,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||||||
case X86ISD::SHUFPS: // Handle all target specific shuffles
|
case X86ISD::SHUFPS: // Handle all target specific shuffles
|
||||||
case X86ISD::SHUFPD:
|
case X86ISD::SHUFPD:
|
||||||
case X86ISD::PALIGN:
|
case X86ISD::PALIGN:
|
||||||
case X86ISD::PUNPCKH:
|
case X86ISD::UNPCKH:
|
||||||
case X86ISD::UNPCKHP:
|
case X86ISD::UNPCKL:
|
||||||
case X86ISD::PUNPCKL:
|
|
||||||
case X86ISD::UNPCKLP:
|
|
||||||
case X86ISD::MOVHLPS:
|
case X86ISD::MOVHLPS:
|
||||||
case X86ISD::MOVLHPS:
|
case X86ISD::MOVLHPS:
|
||||||
case X86ISD::PSHUFD:
|
case X86ISD::PSHUFD:
|
||||||
|
@ -273,10 +273,8 @@ namespace llvm {
|
|||||||
MOVLPD,
|
MOVLPD,
|
||||||
MOVSD,
|
MOVSD,
|
||||||
MOVSS,
|
MOVSS,
|
||||||
UNPCKLP,
|
UNPCKL,
|
||||||
UNPCKHP,
|
UNPCKH,
|
||||||
PUNPCKL,
|
|
||||||
PUNPCKH,
|
|
||||||
VPERMILP,
|
VPERMILP,
|
||||||
VPERM2X128,
|
VPERM2X128,
|
||||||
VBROADCAST,
|
VBROADCAST,
|
||||||
|
@ -130,11 +130,8 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
|
|||||||
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
|
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
|
||||||
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
|
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
|
||||||
|
|
||||||
def X86Unpcklp : SDNode<"X86ISD::UNPCKLP", SDTShuff2Op>;
|
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
|
||||||
def X86Unpckhp : SDNode<"X86ISD::UNPCKHP", SDTShuff2Op>;
|
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
|
||||||
|
|
||||||
def X86Punpckl : SDNode<"X86ISD::PUNPCKL", SDTShuff2Op>;
|
|
||||||
def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>;
|
|
||||||
|
|
||||||
def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
|
def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
|
||||||
|
|
||||||
|
@ -1157,11 +1157,11 @@ let Predicates = [HasAVX] in {
|
|||||||
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
|
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
|
||||||
(VMOVHPSrm VR128:$src1, addr:$src2)>;
|
(VMOVHPSrm VR128:$src1, addr:$src2)>;
|
||||||
|
|
||||||
// FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem
|
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
|
||||||
// is during lowering, where it's not possible to recognize the load fold cause
|
// is during lowering, where it's not possible to recognize the load fold cause
|
||||||
// it has two uses through a bitcast. One use disappears at isel time and the
|
// it has two uses through a bitcast. One use disappears at isel time and the
|
||||||
// fold opportunity reappears.
|
// fold opportunity reappears.
|
||||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1,
|
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
|
||||||
(scalar_to_vector (loadf64 addr:$src2)))),
|
(scalar_to_vector (loadf64 addr:$src2)))),
|
||||||
(VMOVHPDrm VR128:$src1, addr:$src2)>;
|
(VMOVHPDrm VR128:$src1, addr:$src2)>;
|
||||||
|
|
||||||
@ -1172,10 +1172,10 @@ let Predicates = [HasAVX] in {
|
|||||||
|
|
||||||
// Store patterns
|
// Store patterns
|
||||||
def : Pat<(store (f64 (vector_extract
|
def : Pat<(store (f64 (vector_extract
|
||||||
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
||||||
(VMOVHPSmr addr:$dst, VR128:$src)>;
|
(VMOVHPSmr addr:$dst, VR128:$src)>;
|
||||||
def : Pat<(store (f64 (vector_extract
|
def : Pat<(store (f64 (vector_extract
|
||||||
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
||||||
(VMOVHPDmr addr:$dst, VR128:$src)>;
|
(VMOVHPDmr addr:$dst, VR128:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1195,16 +1195,16 @@ let Predicates = [HasSSE1] in {
|
|||||||
|
|
||||||
// Store patterns
|
// Store patterns
|
||||||
def : Pat<(store (f64 (vector_extract
|
def : Pat<(store (f64 (vector_extract
|
||||||
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
||||||
(MOVHPSmr addr:$dst, VR128:$src)>;
|
(MOVHPSmr addr:$dst, VR128:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasSSE2] in {
|
let Predicates = [HasSSE2] in {
|
||||||
// FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem
|
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
|
||||||
// is during lowering, where it's not possible to recognize the load fold cause
|
// is during lowering, where it's not possible to recognize the load fold cause
|
||||||
// it has two uses through a bitcast. One use disappears at isel time and the
|
// it has two uses through a bitcast. One use disappears at isel time and the
|
||||||
// fold opportunity reappears.
|
// fold opportunity reappears.
|
||||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1,
|
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
|
||||||
(scalar_to_vector (loadf64 addr:$src2)))),
|
(scalar_to_vector (loadf64 addr:$src2)))),
|
||||||
(MOVHPDrm VR128:$src1, addr:$src2)>;
|
(MOVHPDrm VR128:$src1, addr:$src2)>;
|
||||||
|
|
||||||
@ -1215,7 +1215,7 @@ let Predicates = [HasSSE2] in {
|
|||||||
|
|
||||||
// Store patterns
|
// Store patterns
|
||||||
def : Pat<(store (f64 (vector_extract
|
def : Pat<(store (f64 (vector_extract
|
||||||
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))),addr:$dst),
|
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst),
|
||||||
(MOVHPDmr addr:$dst, VR128:$src)>;
|
(MOVHPDmr addr:$dst, VR128:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2431,27 +2431,27 @@ let AddedComplexity = 10 in {
|
|||||||
} // AddedComplexity
|
} // AddedComplexity
|
||||||
|
|
||||||
let Predicates = [HasSSE1] in {
|
let Predicates = [HasSSE1] in {
|
||||||
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),
|
def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
|
||||||
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
|
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
|
||||||
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),
|
def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
|
||||||
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
|
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
|
||||||
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),
|
def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
|
||||||
(UNPCKHPSrm VR128:$src1, addr:$src2)>;
|
(UNPCKHPSrm VR128:$src1, addr:$src2)>;
|
||||||
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),
|
def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
|
||||||
(UNPCKHPSrr VR128:$src1, VR128:$src2)>;
|
(UNPCKHPSrr VR128:$src1, VR128:$src2)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasSSE2] in {
|
let Predicates = [HasSSE2] in {
|
||||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),
|
def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
|
||||||
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
|
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
|
||||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),
|
def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
|
||||||
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
|
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
|
||||||
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),
|
def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
|
||||||
(UNPCKHPDrm VR128:$src1, addr:$src2)>;
|
(UNPCKHPDrm VR128:$src1, addr:$src2)>;
|
||||||
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),
|
def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
|
||||||
(UNPCKHPDrr VR128:$src1, VR128:$src2)>;
|
(UNPCKHPDrr VR128:$src1, VR128:$src2)>;
|
||||||
|
|
||||||
// FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the
|
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
|
||||||
// problem is during lowering, where it's not possible to recognize the load
|
// problem is during lowering, where it's not possible to recognize the load
|
||||||
// fold cause it has two uses through a bitcast. One use disappears at isel
|
// fold cause it has two uses through a bitcast. One use disappears at isel
|
||||||
// time and the fold opportunity reappears.
|
// time and the fold opportunity reappears.
|
||||||
@ -2464,59 +2464,43 @@ let Predicates = [HasSSE2] in {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),
|
def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
|
||||||
(VUNPCKLPSrm VR128:$src1, addr:$src2)>;
|
(VUNPCKLPSrm VR128:$src1, addr:$src2)>;
|
||||||
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),
|
def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
|
||||||
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
|
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
|
||||||
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),
|
def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
|
||||||
(VUNPCKHPSrm VR128:$src1, addr:$src2)>;
|
(VUNPCKHPSrm VR128:$src1, addr:$src2)>;
|
||||||
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),
|
def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
|
||||||
(VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
|
(VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
|
||||||
|
|
||||||
def : Pat<(v8f32 (X86Unpcklp VR256:$src1, (memopv8f32 addr:$src2))),
|
def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))),
|
||||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
||||||
def : Pat<(v8f32 (X86Unpcklp VR256:$src1, VR256:$src2)),
|
def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
||||||
def : Pat<(v8i32 (X86Unpcklp VR256:$src1, VR256:$src2)),
|
def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))),
|
||||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
|
||||||
def : Pat<(v8i32 (X86Unpcklp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
|
||||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
|
||||||
def : Pat<(v8f32 (X86Unpckhp VR256:$src1, (memopv8f32 addr:$src2))),
|
|
||||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
||||||
def : Pat<(v8f32 (X86Unpckhp VR256:$src1, VR256:$src2)),
|
def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
|
||||||
def : Pat<(v8i32 (X86Unpckhp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
|
||||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
|
||||||
def : Pat<(v8i32 (X86Unpckhp VR256:$src1, VR256:$src2)),
|
|
||||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
||||||
|
|
||||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),
|
def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
|
||||||
(VUNPCKLPDrm VR128:$src1, addr:$src2)>;
|
(VUNPCKLPDrm VR128:$src1, addr:$src2)>;
|
||||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),
|
def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
|
||||||
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
|
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
|
||||||
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),
|
def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
|
||||||
(VUNPCKHPDrm VR128:$src1, addr:$src2)>;
|
(VUNPCKHPDrm VR128:$src1, addr:$src2)>;
|
||||||
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),
|
def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
|
||||||
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
|
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
|
||||||
|
|
||||||
def : Pat<(v4f64 (X86Unpcklp VR256:$src1, (memopv4f64 addr:$src2))),
|
def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))),
|
||||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
||||||
def : Pat<(v4f64 (X86Unpcklp VR256:$src1, VR256:$src2)),
|
def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
||||||
def : Pat<(v4i64 (X86Unpcklp VR256:$src1, (memopv4i64 addr:$src2))),
|
def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))),
|
||||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
|
||||||
def : Pat<(v4i64 (X86Unpcklp VR256:$src1, VR256:$src2)),
|
|
||||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
|
||||||
def : Pat<(v4f64 (X86Unpckhp VR256:$src1, (memopv4f64 addr:$src2))),
|
|
||||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
||||||
def : Pat<(v4f64 (X86Unpckhp VR256:$src1, VR256:$src2)),
|
def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
|
||||||
def : Pat<(v4i64 (X86Unpckhp VR256:$src1, (memopv4i64 addr:$src2))),
|
|
||||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
|
||||||
def : Pat<(v4i64 (X86Unpckhp VR256:$src1, VR256:$src2)),
|
|
||||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
||||||
|
|
||||||
// FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the
|
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
|
||||||
// problem is during lowering, where it's not possible to recognize the load
|
// problem is during lowering, where it's not possible to recognize the load
|
||||||
// fold cause it has two uses through a bitcast. One use disappears at isel
|
// fold cause it has two uses through a bitcast. One use disappears at isel
|
||||||
// time and the fold opportunity reappears.
|
// time and the fold opportunity reappears.
|
||||||
@ -4199,66 +4183,88 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
|
|||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpckl,
|
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
|
||||||
bc_v16i8, 0>, VEX_4V;
|
bc_v16i8, 0>, VEX_4V;
|
||||||
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpckl,
|
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
|
||||||
bc_v8i16, 0>, VEX_4V;
|
bc_v8i16, 0>, VEX_4V;
|
||||||
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckl,
|
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
|
||||||
bc_v4i32, 0>, VEX_4V;
|
bc_v4i32, 0>, VEX_4V;
|
||||||
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpckl,
|
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
|
||||||
bc_v2i64, 0>, VEX_4V;
|
bc_v2i64, 0>, VEX_4V;
|
||||||
|
|
||||||
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckh,
|
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
|
||||||
bc_v16i8, 0>, VEX_4V;
|
bc_v16i8, 0>, VEX_4V;
|
||||||
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckh,
|
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
|
||||||
bc_v8i16, 0>, VEX_4V;
|
bc_v8i16, 0>, VEX_4V;
|
||||||
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckh,
|
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
|
||||||
bc_v4i32, 0>, VEX_4V;
|
bc_v4i32, 0>, VEX_4V;
|
||||||
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckh,
|
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
|
||||||
bc_v2i64, 0>, VEX_4V;
|
bc_v2i64, 0>, VEX_4V;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX2] in {
|
let Predicates = [HasAVX2] in {
|
||||||
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpckl,
|
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
|
||||||
bc_v32i8>, VEX_4V;
|
bc_v32i8>, VEX_4V;
|
||||||
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpckl,
|
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
|
||||||
bc_v16i16>, VEX_4V;
|
bc_v16i16>, VEX_4V;
|
||||||
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckl,
|
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
|
||||||
bc_v8i32>, VEX_4V;
|
bc_v8i32>, VEX_4V;
|
||||||
defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpckl,
|
defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
|
||||||
bc_v4i64>, VEX_4V;
|
bc_v4i64>, VEX_4V;
|
||||||
|
|
||||||
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckh,
|
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
|
||||||
bc_v32i8>, VEX_4V;
|
bc_v32i8>, VEX_4V;
|
||||||
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckh,
|
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
|
||||||
bc_v16i16>, VEX_4V;
|
bc_v16i16>, VEX_4V;
|
||||||
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckh,
|
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
|
||||||
bc_v8i32>, VEX_4V;
|
bc_v8i32>, VEX_4V;
|
||||||
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckh,
|
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
|
||||||
bc_v4i64>, VEX_4V;
|
bc_v4i64>, VEX_4V;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpckl,
|
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
|
||||||
bc_v16i8>;
|
bc_v16i8>;
|
||||||
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpckl,
|
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
|
||||||
bc_v8i16>;
|
bc_v8i16>;
|
||||||
defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckl,
|
defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
|
||||||
bc_v4i32>;
|
bc_v4i32>;
|
||||||
defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpckl,
|
defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
|
||||||
bc_v2i64>;
|
bc_v2i64>;
|
||||||
|
|
||||||
defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckh,
|
defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
|
||||||
bc_v16i8>;
|
bc_v16i8>;
|
||||||
defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckh,
|
defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
|
||||||
bc_v8i16>;
|
bc_v8i16>;
|
||||||
defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckh,
|
defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
|
||||||
bc_v4i32>;
|
bc_v4i32>;
|
||||||
defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckh,
|
defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
|
||||||
bc_v2i64>;
|
bc_v2i64>;
|
||||||
}
|
}
|
||||||
} // ExeDomain = SSEPackedInt
|
} // ExeDomain = SSEPackedInt
|
||||||
|
|
||||||
|
// Patterns for using AVX1 instructions with integer vectors
|
||||||
|
// Here to give AVX2 priority
|
||||||
|
let Predicates = [HasAVX] in {
|
||||||
|
def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||||
|
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
||||||
|
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||||
|
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
||||||
|
def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||||
|
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
||||||
|
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||||
|
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
|
||||||
|
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
||||||
|
def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||||
|
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
||||||
|
def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
|
||||||
|
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
||||||
|
def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||||
|
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
||||||
|
}
|
||||||
|
|
||||||
// Splat v2f64 / v2i64
|
// Splat v2f64 / v2i64
|
||||||
let AddedComplexity = 10 in {
|
let AddedComplexity = 10 in {
|
||||||
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
|
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
|
||||||
|
Loading…
Reference in New Issue
Block a user