mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
With sse3 and when the source is a load or has multiple uses, favors movddup over shuffp*, pshufd, etc. Without sse3 or when the source is from a register, make use of movlhps
llvm-svn: 56620
This commit is contained in:
parent
62f64ab4c8
commit
d190aeb62d
@ -2516,6 +2516,21 @@ bool X86::isSplatLoMask(SDNode *N) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
|
/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
|
||||||
|
bool X86::isMOVDDUPMask(SDNode *N) {
|
||||||
|
assert(N->getOpcode() == ISD::BUILD_VECTOR);
|
||||||
|
|
||||||
|
unsigned e = N->getNumOperands() / 2;
|
||||||
|
for (unsigned i = 0; i < e; ++i)
|
||||||
|
if (!isUndefOrEqual(N->getOperand(i), i))
|
||||||
|
return false;
|
||||||
|
for (unsigned i = 0; i < e; ++i)
|
||||||
|
if (!isUndefOrEqual(N->getOperand(e+i), i))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
|
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
|
||||||
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
|
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
|
||||||
/// instructions.
|
/// instructions.
|
||||||
@ -2683,15 +2698,14 @@ static bool ShouldXformToMOVHLPS(SDNode *Mask) {
|
|||||||
/// is promoted to a vector. It also returns the LoadSDNode by reference if
|
/// is promoted to a vector. It also returns the LoadSDNode by reference if
|
||||||
/// required.
|
/// required.
|
||||||
static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
|
static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
|
||||||
if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
|
||||||
N = N->getOperand(0).getNode();
|
return false;
|
||||||
if (ISD::isNON_EXTLoad(N)) {
|
N = N->getOperand(0).getNode();
|
||||||
if (LD)
|
if (!ISD::isNON_EXTLoad(N))
|
||||||
*LD = cast<LoadSDNode>(N);
|
return false;
|
||||||
return true;
|
if (LD)
|
||||||
}
|
*LD = cast<LoadSDNode>(N);
|
||||||
}
|
return true;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
|
/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
|
||||||
@ -2943,6 +2957,46 @@ static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) {
|
|||||||
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
|
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// isVectorLoad - Returns true if the node is a vector load, a scalar
|
||||||
|
/// load that's promoted to vector, or a load bitcasted.
|
||||||
|
static bool isVectorLoad(SDValue Op) {
|
||||||
|
assert(Op.getValueType().isVector() && "Expected a vector type");
|
||||||
|
if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR ||
|
||||||
|
Op.getOpcode() == ISD::BIT_CONVERT) {
|
||||||
|
return isa<LoadSDNode>(Op.getOperand(0));
|
||||||
|
}
|
||||||
|
return isa<LoadSDNode>(Op);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64.
|
||||||
|
///
|
||||||
|
static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask,
|
||||||
|
SelectionDAG &DAG, bool HasSSE3) {
|
||||||
|
// If we have sse3 and shuffle has more than one use or input is a load, then
|
||||||
|
// use movddup. Otherwise, use movlhps.
|
||||||
|
bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1));
|
||||||
|
MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32;
|
||||||
|
MVT VT = Op.getValueType();
|
||||||
|
if (VT == PVT)
|
||||||
|
return Op;
|
||||||
|
unsigned NumElems = PVT.getVectorNumElements();
|
||||||
|
if (NumElems == 2) {
|
||||||
|
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
|
||||||
|
Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
|
||||||
|
} else {
|
||||||
|
assert(NumElems == 4);
|
||||||
|
SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32);
|
||||||
|
SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32);
|
||||||
|
Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst0, Cst1, Cst0, Cst1);
|
||||||
|
}
|
||||||
|
|
||||||
|
V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1);
|
||||||
|
SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1,
|
||||||
|
DAG.getNode(ISD::UNDEF, PVT), Mask);
|
||||||
|
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
|
||||||
|
}
|
||||||
|
|
||||||
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
|
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
|
||||||
/// vector of zero or undef vector. This produces a shuffle where the low
|
/// vector of zero or undef vector. This produces a shuffle where the low
|
||||||
/// element of V2 is swizzled into the zero/undef vector, landing at element
|
/// element of V2 is swizzled into the zero/undef vector, landing at element
|
||||||
@ -3894,6 +3948,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
|||||||
return PromoteSplat(Op, DAG, Subtarget->hasSSE2());
|
return PromoteSplat(Op, DAG, Subtarget->hasSSE2());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Canonicalize movddup shuffles.
|
||||||
|
if (V2IsUndef && Subtarget->hasSSE2() &&
|
||||||
|
X86::isMOVDDUPMask(PermMask.getNode()))
|
||||||
|
return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3());
|
||||||
|
|
||||||
// If the shuffle can be profitably rewritten as a narrower shuffle, then
|
// If the shuffle can be profitably rewritten as a narrower shuffle, then
|
||||||
// do it!
|
// do it!
|
||||||
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
|
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
|
||||||
|
@ -295,6 +295,10 @@ namespace llvm {
|
|||||||
/// specifies a splat of zero element.
|
/// specifies a splat of zero element.
|
||||||
bool isSplatLoMask(SDNode *N);
|
bool isSplatLoMask(SDNode *N);
|
||||||
|
|
||||||
|
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
|
/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
|
||||||
|
bool isMOVDDUPMask(SDNode *N);
|
||||||
|
|
||||||
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
|
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
|
||||||
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
|
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
|
||||||
/// instructions.
|
/// instructions.
|
||||||
|
@ -208,6 +208,10 @@ def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
|
|||||||
return X86::isSplatLoMask(N);
|
return X86::isSplatLoMask(N);
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
|
return X86::isMOVDDUPMask(N);
|
||||||
|
}]>;
|
||||||
|
|
||||||
def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
|
def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
|
||||||
return X86::isMOVHLPSMask(N);
|
return X86::isMOVHLPSMask(N);
|
||||||
}]>;
|
}]>;
|
||||||
@ -755,6 +759,11 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:
|
|||||||
} // AddedComplexity
|
} // AddedComplexity
|
||||||
} // Constraints = "$src1 = $dst"
|
} // Constraints = "$src1 = $dst"
|
||||||
|
|
||||||
|
let AddedComplexity = 15 in
|
||||||
|
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)),
|
||||||
|
(MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Arithmetic
|
// Arithmetic
|
||||||
@ -2452,16 +2461,24 @@ def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
|||||||
|
|
||||||
def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
"movddup\t{$src, $dst|$dst, $src}",
|
"movddup\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (v2f64 (vector_shuffle
|
[(set VR128:$dst,
|
||||||
VR128:$src, (undef),
|
(v2f64 (vector_shuffle VR128:$src, (undef),
|
||||||
SSE_splat_lo_mask)))]>;
|
MOVDDUP_shuffle_mask)))]>;
|
||||||
def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||||
"movddup\t{$src, $dst|$dst, $src}",
|
"movddup\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (vector_shuffle
|
(v2f64 (vector_shuffle
|
||||||
(scalar_to_vector (loadf64 addr:$src)),
|
(scalar_to_vector (loadf64 addr:$src)),
|
||||||
(undef),
|
(undef), MOVDDUP_shuffle_mask)))]>;
|
||||||
SSE_splat_lo_mask)))]>;
|
|
||||||
|
def : Pat<(vector_shuffle
|
||||||
|
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
|
||||||
|
(undef), MOVDDUP_shuffle_mask),
|
||||||
|
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
|
||||||
|
def : Pat<(vector_shuffle
|
||||||
|
(memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask),
|
||||||
|
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
|
||||||
|
|
||||||
|
|
||||||
// Arithmetic
|
// Arithmetic
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
|
16
test/CodeGen/X86/vec_shuffle-22.ll
Normal file
16
test/CodeGen/X86/vec_shuffle-22.ll
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep shuf
|
||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse3 | grep movlhps | count 2
|
||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse3 | grep movddup | count 1
|
||||||
|
|
||||||
|
define <4 x float> @t1(<4 x float> %a) nounwind {
|
||||||
|
entry:
|
||||||
|
%tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 > ; <<4 x float>> [#uses=1]
|
||||||
|
ret <4 x float> %tmp1
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @t2(<4 x i32>* %a) nounwind {
|
||||||
|
entry:
|
||||||
|
%tmp1 = load <4 x i32>* %a;
|
||||||
|
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 > ; <<4 x i32>> [#uses=1]
|
||||||
|
ret <4 x i32> %tmp2
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user