mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-24 13:33:37 +02:00
[x86] Inline an SSE4 helper function for INSERT_VECTOR_ELT lowering, no
functionality changed. Separating this into two functions wasn't helping. There was a decent amount of boilerplate duplicated, and some subsequent refactorings here will pull even more common code out. llvm-svn: 216644
This commit is contained in:
parent
c3b1f18af4
commit
fd8c86913c
@ -10653,59 +10653,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
|
|
||||||
MVT VT = Op.getSimpleValueType();
|
|
||||||
MVT EltVT = VT.getVectorElementType();
|
|
||||||
SDLoc dl(Op);
|
|
||||||
|
|
||||||
SDValue N0 = Op.getOperand(0);
|
|
||||||
SDValue N1 = Op.getOperand(1);
|
|
||||||
SDValue N2 = Op.getOperand(2);
|
|
||||||
|
|
||||||
if (!VT.is128BitVector())
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
|
|
||||||
isa<ConstantSDNode>(N2)) {
|
|
||||||
unsigned Opc;
|
|
||||||
if (VT == MVT::v8i16) {
|
|
||||||
Opc = X86ISD::PINSRW;
|
|
||||||
} else {
|
|
||||||
assert(VT == MVT::v16i8);
|
|
||||||
Opc = X86ISD::PINSRB;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
|
|
||||||
// argument.
|
|
||||||
if (N1.getValueType() != MVT::i32)
|
|
||||||
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
|
|
||||||
if (N2.getValueType() != MVT::i32)
|
|
||||||
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
|
|
||||||
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
|
|
||||||
// Bits [7:6] of the constant are the source select. This will always be
|
|
||||||
// zero here. The DAG Combiner may combine an extract_elt index into these
|
|
||||||
// bits. For example (insert (extract, 3), 2) could be matched by putting
|
|
||||||
// the '3' into bits [7:6] of X86ISD::INSERTPS.
|
|
||||||
// Bits [5:4] of the constant are the destination select. This is the
|
|
||||||
// value of the incoming immediate.
|
|
||||||
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
|
|
||||||
// combine either bitwise AND or insert of float 0.0 to set these bits.
|
|
||||||
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
|
|
||||||
// Create this as a scalar to vector..
|
|
||||||
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
|
|
||||||
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
|
|
||||||
// PINSR* works with constant index.
|
|
||||||
return Op;
|
|
||||||
}
|
|
||||||
return SDValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Insert one bit to mask vector, like v16i1 or v8i1.
|
/// Insert one bit to mask vector, like v16i1 or v8i1.
|
||||||
/// AVX-512 feature.
|
/// AVX-512 feature.
|
||||||
SDValue
|
SDValue
|
||||||
@ -10744,7 +10691,7 @@ SDValue
|
|||||||
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
|
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
|
||||||
MVT VT = Op.getSimpleValueType();
|
MVT VT = Op.getSimpleValueType();
|
||||||
MVT EltVT = VT.getVectorElementType();
|
MVT EltVT = VT.getVectorElementType();
|
||||||
|
|
||||||
if (EltVT == MVT::i1)
|
if (EltVT == MVT::i1)
|
||||||
return InsertBitToMaskVector(Op, DAG);
|
return InsertBitToMaskVector(Op, DAG);
|
||||||
|
|
||||||
@ -10753,8 +10700,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
SDValue N1 = Op.getOperand(1);
|
SDValue N1 = Op.getOperand(1);
|
||||||
SDValue N2 = Op.getOperand(2);
|
SDValue N2 = Op.getOperand(2);
|
||||||
|
|
||||||
// If this is a 256-bit vector result, first extract the 128-bit vector,
|
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
|
||||||
// insert the element into the extracted half and then place it back.
|
// into that, and then insert the subvector back into the result.
|
||||||
if (VT.is256BitVector() || VT.is512BitVector()) {
|
if (VT.is256BitVector() || VT.is512BitVector()) {
|
||||||
if (!isa<ConstantSDNode>(N2))
|
if (!isa<ConstantSDNode>(N2))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
@ -10774,8 +10721,48 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
|
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Subtarget->hasSSE41())
|
if (Subtarget->hasSSE41()) {
|
||||||
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
|
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
|
||||||
|
isa<ConstantSDNode>(N2)) {
|
||||||
|
unsigned Opc;
|
||||||
|
if (VT == MVT::v8i16) {
|
||||||
|
Opc = X86ISD::PINSRW;
|
||||||
|
} else {
|
||||||
|
assert(VT == MVT::v16i8);
|
||||||
|
Opc = X86ISD::PINSRB;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
|
||||||
|
// argument.
|
||||||
|
if (N1.getValueType() != MVT::i32)
|
||||||
|
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
|
||||||
|
if (N2.getValueType() != MVT::i32)
|
||||||
|
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
|
||||||
|
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
|
||||||
|
// Bits [7:6] of the constant are the source select. This will always be
|
||||||
|
// zero here. The DAG Combiner may combine an extract_elt index into
|
||||||
|
// these
|
||||||
|
// bits. For example (insert (extract, 3), 2) could be matched by
|
||||||
|
// putting
|
||||||
|
// the '3' into bits [7:6] of X86ISD::INSERTPS.
|
||||||
|
// Bits [5:4] of the constant are the destination select. This is the
|
||||||
|
// value of the incoming immediate.
|
||||||
|
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
|
||||||
|
// combine either bitwise AND or insert of float 0.0 to set these bits.
|
||||||
|
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
|
||||||
|
// Create this as a scalar to vector..
|
||||||
|
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
|
||||||
|
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
|
||||||
|
// PINSR* works with constant index.
|
||||||
|
return Op;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (EltVT == MVT::i8)
|
if (EltVT == MVT::i8)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
Loading…
Reference in New Issue
Block a user