diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 83751412e95..67217fd9924 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3922,53 +3922,35 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { SDOperand X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { - // Transform it so it match pinsrw which expects a 16-bit value in a GR32 - // as its second argument. MVT::ValueType VT = Op.getValueType(); - MVT::ValueType BaseVT = MVT::getVectorElementType(VT); + MVT::ValueType EVT = MVT::getVectorElementType(VT); + if (EVT == MVT::i8) + return SDOperand(); + SDOperand N0 = Op.getOperand(0); SDOperand N1 = Op.getOperand(1); SDOperand N2 = Op.getOperand(2); - if (MVT::getSizeInBits(BaseVT) == 16) { + + if (MVT::getSizeInBits(EVT) == 16) { + // Transform it so it match pinsrw which expects a 16-bit value in a GR32 + // as its second argument. if (N1.getValueType() != MVT::i32) N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getConstant(cast(N2)->getValue(),getPointerTy()); return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); - } else if (MVT::getSizeInBits(BaseVT) == 32) { - unsigned Idx = cast(N2)->getValue(); - if (Idx == 0) { - // Use a movss. - N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); - MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); - MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); - SmallVector MaskVec; - MaskVec.push_back(DAG.getConstant(4, BaseVT)); - for (unsigned i = 1; i <= 3; ++i) - MaskVec.push_back(DAG.getConstant(i, BaseVT)); - return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, - DAG.getNode(ISD::BUILD_VECTOR, MaskVT, - &MaskVec[0], MaskVec.size())); - } else { - // Use two pinsrw instructions to insert a 32 bit value. - Idx <<= 1; - if (MVT::isFloatingPoint(N1.getValueType())) { - N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); - N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); - N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, - DAG.getConstant(0, getPointerTy())); - } - N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); - N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, - DAG.getConstant(Idx, getPointerTy())); - N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); - N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, - DAG.getConstant(Idx+1, getPointerTy())); - return DAG.getNode(ISD::BIT_CONVERT, VT, N0); - } } - return SDOperand(); + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); + unsigned Idx = cast(N2)->getValue(); + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); + MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); + SmallVector MaskVec; + for (unsigned i = 0; i < 4; ++i) + MaskVec.push_back(DAG.getConstant((i == Idx) ? i+4 : i, MaskEVT)); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, + DAG.getNode(ISD::BUILD_VECTOR, MaskVT, + &MaskVec[0], MaskVec.size())); } SDOperand diff --git a/test/CodeGen/X86/2007-07-31-VInsertBug.ll b/test/CodeGen/X86/2007-07-31-VInsertBug.ll deleted file mode 100644 index 1b13b080454..00000000000 --- a/test/CodeGen/X86/2007-07-31-VInsertBug.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | %prcontext {pinsrw \$2} 1 | grep "movl \$1" -; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movss - -@G = global <4 x float> zeroinitializer - -define void @test(i32 *%P1, i32* %P2, float *%FP) { - %T = load float* %FP - store i32 0, i32* %P1 - - %U = load <4 x float>* @G - store i32 1, i32* %P1 - %V = insertelement <4 x float> %U, float %T, i32 1 - store <4 x float> %V, <4 x float>* @G - - ret void -} diff --git a/test/CodeGen/X86/vec_insert-2.ll b/test/CodeGen/X86/vec_insert-2.ll new file mode 100644 index 00000000000..8fc97bfd211 --- /dev/null +++ b/test/CodeGen/X86/vec_insert-2.ll @@ -0,0 +1,23 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 1 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movhpd | count 1 +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep unpcklpd | count 1 + +define <4 x float> @t1(float %s, <4 x float> %tmp) { + %tmp1 = insertelement <4 x float> %tmp, float %s, i32 3 + ret <4 x float> %tmp1 +} + +define <4 x i32> @t2(i32 %s, <4 x i32> %tmp) { + %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 3 + ret <4 x i32> %tmp1 +} + +define <2 x double> @t3(double %s, <2 x double> %tmp) { + %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1 + ret <2 x double> %tmp1 +} + +define <8 x i16> @t4(i16 %s, <8 x i16> %tmp) { + %tmp1 = insertelement <8 x i16> %tmp, i16 %s, i32 5 + ret <8 x i16> %tmp1 +} diff --git a/test/CodeGen/X86/vec_insert-3.ll b/test/CodeGen/X86/vec_insert-3.ll new file mode 100644 index 00000000000..1d374b4b9c6 --- /dev/null +++ b/test/CodeGen/X86/vec_insert-3.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep punpcklqdq | count 1 + +define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) { + %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1 + ret <2 x i64> %tmp1 +} diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll index 8ee0484fda1..c8c9f141ec1 100644 --- a/test/CodeGen/X86/vec_insert.ll +++ b/test/CodeGen/X86/vec_insert.ll @@ -1,20 +1,19 @@ -; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 -o %t -f -; RUN: grep movss %t | count 1 -; RUN: grep pinsrw %t | count 2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss | count 1 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep pinsrw -void %test(<4 x float>* %F, int %I) { - %tmp = load <4 x float>* %F - %f = cast int %I to float - %tmp1 = insertelement <4 x float> %tmp, float %f, uint 0 - %tmp18 = add <4 x float> %tmp1, %tmp1 +define void @test(<4 x float>* %F, i32 %I) { + %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=1] + %f = sitofp i32 %I to float ; [#uses=1] + %tmp1 = insertelement <4 x float> %tmp, float %f, i32 0 ; <<4 x float>> [#uses=2] + %tmp18 = add <4 x float> %tmp1, %tmp1 ; <<4 x float>> [#uses=1] store <4 x float> %tmp18, <4 x float>* %F ret void } -void %test2(<4 x float>* %F, int %I, float %g) { - %tmp = load <4 x float>* %F - %f = cast int %I to float - %tmp1 = insertelement <4 x float> %tmp, float %f, uint 2 +define void @test2(<4 x float>* %F, i32 %I, float %g) { + %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=1] + %f = sitofp i32 %I to float ; [#uses=1] + %tmp1 = insertelement <4 x float> %tmp, float %f, i32 2 ; <<4 x float>> [#uses=1] store <4 x float> %tmp1, <4 x float>* %F ret void }