mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
Use shuffles to implement insert_vector_elt for i32, i64, f32, and f64.
llvm-svn: 44929
This commit is contained in:
parent
af6ba4dfd4
commit
ad3e7f3286
@ -3922,53 +3922,35 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
|
||||
|
||||
SDOperand
|
||||
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
|
||||
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
|
||||
// as its second argument.
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
|
||||
MVT::ValueType EVT = MVT::getVectorElementType(VT);
|
||||
if (EVT == MVT::i8)
|
||||
return SDOperand();
|
||||
|
||||
SDOperand N0 = Op.getOperand(0);
|
||||
SDOperand N1 = Op.getOperand(1);
|
||||
SDOperand N2 = Op.getOperand(2);
|
||||
if (MVT::getSizeInBits(BaseVT) == 16) {
|
||||
|
||||
if (MVT::getSizeInBits(EVT) == 16) {
|
||||
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
|
||||
// as its second argument.
|
||||
if (N1.getValueType() != MVT::i32)
|
||||
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
|
||||
if (N2.getValueType() != MVT::i32)
|
||||
N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
|
||||
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
|
||||
} else if (MVT::getSizeInBits(BaseVT) == 32) {
|
||||
unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
|
||||
if (Idx == 0) {
|
||||
// Use a movss.
|
||||
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
|
||||
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
|
||||
MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
|
||||
SmallVector<SDOperand, 8> MaskVec;
|
||||
MaskVec.push_back(DAG.getConstant(4, BaseVT));
|
||||
for (unsigned i = 1; i <= 3; ++i)
|
||||
MaskVec.push_back(DAG.getConstant(i, BaseVT));
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
|
||||
&MaskVec[0], MaskVec.size()));
|
||||
} else {
|
||||
// Use two pinsrw instructions to insert a 32 bit value.
|
||||
Idx <<= 1;
|
||||
if (MVT::isFloatingPoint(N1.getValueType())) {
|
||||
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
|
||||
N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
|
||||
N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
|
||||
DAG.getConstant(0, getPointerTy()));
|
||||
}
|
||||
N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
|
||||
N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
|
||||
DAG.getConstant(Idx, getPointerTy()));
|
||||
N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
|
||||
N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
|
||||
DAG.getConstant(Idx+1, getPointerTy()));
|
||||
return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
|
||||
}
|
||||
}
|
||||
|
||||
return SDOperand();
|
||||
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
|
||||
unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
|
||||
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
|
||||
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
|
||||
SmallVector<SDOperand, 4> MaskVec;
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
MaskVec.push_back(DAG.getConstant((i == Idx) ? i+4 : i, MaskEVT));
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
|
||||
&MaskVec[0], MaskVec.size()));
|
||||
}
|
||||
|
||||
SDOperand
|
||||
|
@ -1,16 +0,0 @@
|
||||
; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | %prcontext {pinsrw \$2} 1 | grep "movl \$1"
|
||||
; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movss
|
||||
|
||||
@G = global <4 x float> zeroinitializer
|
||||
|
||||
define void @test(i32 *%P1, i32* %P2, float *%FP) {
|
||||
%T = load float* %FP
|
||||
store i32 0, i32* %P1
|
||||
|
||||
%U = load <4 x float>* @G
|
||||
store i32 1, i32* %P1
|
||||
%V = insertelement <4 x float> %U, float %T, i32 1
|
||||
store <4 x float> %V, <4 x float>* @G
|
||||
|
||||
ret void
|
||||
}
|
23
test/CodeGen/X86/vec_insert-2.ll
Normal file
23
test/CodeGen/X86/vec_insert-2.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 1
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movhpd | count 1
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep unpcklpd | count 1
|
||||
|
||||
define <4 x float> @t1(float %s, <4 x float> %tmp) {
|
||||
%tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
|
||||
ret <4 x float> %tmp1
|
||||
}
|
||||
|
||||
define <4 x i32> @t2(i32 %s, <4 x i32> %tmp) {
|
||||
%tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 3
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <2 x double> @t3(double %s, <2 x double> %tmp) {
|
||||
%tmp1 = insertelement <2 x double> %tmp, double %s, i32 1
|
||||
ret <2 x double> %tmp1
|
||||
}
|
||||
|
||||
define <8 x i16> @t4(i16 %s, <8 x i16> %tmp) {
|
||||
%tmp1 = insertelement <8 x i16> %tmp, i16 %s, i32 5
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
6
test/CodeGen/X86/vec_insert-3.ll
Normal file
6
test/CodeGen/X86/vec_insert-3.ll
Normal file
@ -0,0 +1,6 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep punpcklqdq | count 1
|
||||
|
||||
define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) {
|
||||
%tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
@ -1,20 +1,19 @@
|
||||
; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 -o %t -f
|
||||
; RUN: grep movss %t | count 1
|
||||
; RUN: grep pinsrw %t | count 2
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss | count 1
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep pinsrw
|
||||
|
||||
void %test(<4 x float>* %F, int %I) {
|
||||
%tmp = load <4 x float>* %F
|
||||
%f = cast int %I to float
|
||||
%tmp1 = insertelement <4 x float> %tmp, float %f, uint 0
|
||||
%tmp18 = add <4 x float> %tmp1, %tmp1
|
||||
define void @test(<4 x float>* %F, i32 %I) {
|
||||
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=1]
|
||||
%f = sitofp i32 %I to float ; <float> [#uses=1]
|
||||
%tmp1 = insertelement <4 x float> %tmp, float %f, i32 0 ; <<4 x float>> [#uses=2]
|
||||
%tmp18 = add <4 x float> %tmp1, %tmp1 ; <<4 x float>> [#uses=1]
|
||||
store <4 x float> %tmp18, <4 x float>* %F
|
||||
ret void
|
||||
}
|
||||
|
||||
void %test2(<4 x float>* %F, int %I, float %g) {
|
||||
%tmp = load <4 x float>* %F
|
||||
%f = cast int %I to float
|
||||
%tmp1 = insertelement <4 x float> %tmp, float %f, uint 2
|
||||
define void @test2(<4 x float>* %F, i32 %I, float %g) {
|
||||
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=1]
|
||||
%f = sitofp i32 %I to float ; <float> [#uses=1]
|
||||
%tmp1 = insertelement <4 x float> %tmp, float %f, i32 2 ; <<4 x float>> [#uses=1]
|
||||
store <4 x float> %tmp1, <4 x float>* %F
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user