mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
Get rid of some bogus patterns for X86vzmovl. Don't create VZEXT_MOVL
nodes for vectors with an i16 element type. Add an optimization for building a vector which is all zeros/undef except for the bottom element, where the bottom element is an i8 or i16. llvm-svn: 72988
This commit is contained in:
parent
539325c8e7
commit
e546f94ef5
@ -2428,9 +2428,10 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVSS,
|
||||
/// MOVSD, and MOVD, i.e. setting the lowest element.
|
||||
static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) {
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
if (NumElts != 2 && NumElts != 4)
|
||||
if (VT.getVectorElementType().getSizeInBits() < 32)
|
||||
return false;
|
||||
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
|
||||
if (!isUndefOrEqual(Mask[0], NumElts))
|
||||
return false;
|
||||
@ -3082,7 +3083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
}
|
||||
|
||||
// Special case for single non-zero, non-undef, element.
|
||||
if (NumNonZero == 1 && NumElems <= 4) {
|
||||
if (NumNonZero == 1) {
|
||||
unsigned Idx = CountTrailingZeros_32(NonZeros);
|
||||
SDValue Item = Op.getOperand(Idx);
|
||||
|
||||
@ -3123,15 +3124,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
// If we have a constant or non-constant insertion into the low element of
|
||||
// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
|
||||
// the rest of the elements. This will be matched as movd/movq/movss/movsd
|
||||
// depending on what the source datatype is. Because we can only get here
|
||||
// when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
|
||||
if (Idx == 0 &&
|
||||
// Don't do this for i64 values on x86-32.
|
||||
(EVT != MVT::i64 || Subtarget->is64Bit())) {
|
||||
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
|
||||
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
|
||||
return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
|
||||
Subtarget->hasSSE2(), DAG);
|
||||
// depending on what the source datatype is.
|
||||
if (Idx == 0) {
|
||||
if (NumZero == 0) {
|
||||
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
|
||||
} else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 ||
|
||||
(EVT == MVT::i64 && Subtarget->is64Bit())) {
|
||||
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
|
||||
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
|
||||
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
|
||||
DAG);
|
||||
} else if (EVT == MVT::i16 || EVT == MVT::i8) {
|
||||
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
|
||||
MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
|
||||
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
|
||||
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
|
||||
Subtarget->hasSSE2(), DAG);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item);
|
||||
}
|
||||
}
|
||||
|
||||
// Is it a vector logical left shift?
|
||||
|
@ -577,30 +577,13 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
|
||||
def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
|
||||
(MMX_MOVQ2FR64rr VR64:$src)>;
|
||||
|
||||
// Move scalar to MMX zero-extended
|
||||
// movd to MMX register zero-extends
|
||||
let AddedComplexity = 15 in {
|
||||
def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
|
||||
(MMX_MOVZDI2PDIrr GR32:$src)>;
|
||||
def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
|
||||
(MMX_MOVZDI2PDIrr GR32:$src)>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))),
|
||||
(MMX_MOVZDI2PDIrm addr:$src)>;
|
||||
def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))),
|
||||
(MMX_MOVZDI2PDIrm addr:$src)>;
|
||||
def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
|
||||
(MMX_MOVZDI2PDIrm addr:$src)>;
|
||||
}
|
||||
|
||||
// Clear top half.
|
||||
let AddedComplexity = 15 in {
|
||||
def : Pat<(v8i8 (X86vzmovl VR64:$src)),
|
||||
(MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
|
||||
def : Pat<(v4i16 (X86vzmovl VR64:$src)),
|
||||
(MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
|
||||
def : Pat<(v2i32 (X86vzmovl VR64:$src)),
|
||||
(MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
|
||||
}
|
||||
|
37
test/CodeGen/X86/2009-06-05-VZextByteShort.ll
Normal file
37
test/CodeGen/X86/2009-06-05-VZextByteShort.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2 > %t1
|
||||
; RUN: grep movzwl %t1 | count 2
|
||||
; RUN: grep movzbl %t1 | count 2
|
||||
; RUN: grep movd %t1 | count 4
|
||||
|
||||
define <4 x i16> @a(i32* %x1) nounwind {
|
||||
%x2 = load i32* %x1
|
||||
%x3 = lshr i32 %x2, 1
|
||||
%x = trunc i32 %x3 to i16
|
||||
%r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0
|
||||
ret <4 x i16> %r
|
||||
}
|
||||
|
||||
define <8 x i16> @b(i32* %x1) nounwind {
|
||||
%x2 = load i32* %x1
|
||||
%x3 = lshr i32 %x2, 1
|
||||
%x = trunc i32 %x3 to i16
|
||||
%r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0
|
||||
ret <8 x i16> %r
|
||||
}
|
||||
|
||||
define <8 x i8> @c(i32* %x1) nounwind {
|
||||
%x2 = load i32* %x1
|
||||
%x3 = lshr i32 %x2, 1
|
||||
%x = trunc i32 %x3 to i8
|
||||
%r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0
|
||||
ret <8 x i8> %r
|
||||
}
|
||||
|
||||
define <16 x i8> @d(i32* %x1) nounwind {
|
||||
%x2 = load i32* %x1
|
||||
%x3 = lshr i32 %x2, 1
|
||||
%x = trunc i32 %x3 to i8
|
||||
%r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0
|
||||
ret <16 x i8> %r
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user