mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
Add support for AVX 256-bit version of MOVDDUP!
llvm-svn: 138588
This commit is contained in:
parent
dedd2ffa0b
commit
5b3d2c9e17
@ -3560,6 +3560,13 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
|
||||
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
|
||||
return false;
|
||||
|
||||
// For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
|
||||
// FIXME: Need a better way to get rid of this, there's no latency difference
|
||||
// between UNPCKLPD and MOVDDUP, the later should always be checked first and
|
||||
// the former later. We should also remove the "_undef" special mask.
|
||||
if (NumElems == 4 && VT.getSizeInBits() == 256)
|
||||
return false;
|
||||
|
||||
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
|
||||
// independently on 128-bit lanes.
|
||||
unsigned NumLanes = VT.getSizeInBits() / 128;
|
||||
@ -3913,6 +3920,28 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
|
||||
return true;
|
||||
}
|
||||
|
||||
/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to 256-bit
|
||||
/// version of MOVDDUP.
|
||||
static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
|
||||
const X86Subtarget *Subtarget) {
|
||||
EVT VT = N->getValueType(0);
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF;
|
||||
|
||||
if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 ||
|
||||
!V2IsUndef || NumElts != 4)
|
||||
return false;
|
||||
|
||||
for (int i = 0; i != NumElts/2; ++i)
|
||||
if (!isUndefOrEqual(N->getMaskElt(i), 0))
|
||||
return false;
|
||||
for (int i = NumElts/2; i != NumElts; ++i)
|
||||
if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to 128-bit
|
||||
/// version of MOVDDUP.
|
||||
@ -6691,6 +6720,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
// supported in the AVX instruction set.
|
||||
//
|
||||
|
||||
// Handle VMOVDDUPY permutations
|
||||
if (isMOVDDUPYMask(SVOp, Subtarget))
|
||||
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
|
||||
|
||||
// Handle VPERMILPS* permutations
|
||||
if (isVPERMILPSMask(M, VT, Subtarget))
|
||||
return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
|
||||
|
@ -4010,6 +4010,20 @@ let Predicates = [HasAVX] in {
|
||||
def : Pat<(X86Movddup (bc_v2f64
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))))),
|
||||
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
|
||||
|
||||
// 256-bit version
|
||||
def : Pat<(X86Movddup (memopv4f64 addr:$src)),
|
||||
(VMOVDDUPYrm addr:$src)>;
|
||||
def : Pat<(X86Movddup (memopv4i64 addr:$src)),
|
||||
(VMOVDDUPYrm addr:$src)>;
|
||||
def : Pat<(X86Movddup (v4f64 (scalar_to_vector (loadf64 addr:$src)))),
|
||||
(VMOVDDUPYrm addr:$src)>;
|
||||
def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
|
||||
(VMOVDDUPYrm addr:$src)>;
|
||||
def : Pat<(X86Movddup (v4f64 VR256:$src)),
|
||||
(VMOVDDUPYrr VR256:$src)>;
|
||||
def : Pat<(X86Movddup (v4i64 VR256:$src)),
|
||||
(VMOVDDUPYrr VR256:$src)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
14
test/CodeGen/X86/avx-vmovddup.ll
Normal file
14
test/CodeGen/X86/avx-vmovddup.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||
|
||||
; CHECK: vmovddup %ymm
|
||||
define <4 x i64> @A(<4 x i64> %a) {
|
||||
%c = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
||||
ret <4 x i64> %c
|
||||
}
|
||||
|
||||
; CHECK: vmovddup (%
|
||||
define <4 x i64> @B(<4 x i64>* %ptr) {
|
||||
%a = load <4 x i64>* %ptr
|
||||
%c = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
||||
ret <4 x i64> %c
|
||||
}
|
Loading…
Reference in New Issue
Block a user