mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[X86] Improve lowering of vXi1 insert_subvectors to better utilize (insert_subvector zero, vec, 0) for zeroing upper bits.
This can be better recognized during isel when the producer already zeroed the upper bits. llvm-svn: 320267
This commit is contained in:
parent
4f4608b667
commit
4d36bf76a1
@ -5013,6 +5013,10 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
||||
if (!isa<ConstantSDNode>(Idx))
|
||||
return SDValue();
|
||||
|
||||
// Inserting undef is a nop. We can just return the original vector.
|
||||
if (SubVec.isUndef())
|
||||
return Vec;
|
||||
|
||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
|
||||
return Op;
|
||||
@ -5020,19 +5024,21 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
||||
MVT OpVT = Op.getSimpleValueType();
|
||||
unsigned NumElems = OpVT.getVectorNumElements();
|
||||
|
||||
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
|
||||
|
||||
// Extend to natively supported kshift.
|
||||
MVT WideOpVT = OpVT;
|
||||
if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
|
||||
WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
|
||||
|
||||
// Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
|
||||
// if necessary.
|
||||
if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
|
||||
if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) {
|
||||
// Need to promote to v16i1, do the insert, then extract back.
|
||||
Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
|
||||
getZeroVector(MVT::v16i1, Subtarget, DAG, dl),
|
||||
SubVec, Idx);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
return Op;
|
||||
// May need to promote to a legal type.
|
||||
Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
|
||||
getZeroVector(WideOpVT, Subtarget, DAG, dl),
|
||||
SubVec, Idx);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
|
||||
}
|
||||
|
||||
MVT SubVecVT = SubVec.getSimpleValueType();
|
||||
@ -5042,30 +5048,32 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
||||
IdxVal % SubVecVT.getSizeInBits() == 0 &&
|
||||
"Unexpected index value in INSERT_SUBVECTOR");
|
||||
|
||||
// extend to natively supported kshift
|
||||
MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
|
||||
MVT WideOpVT = OpVT;
|
||||
if (OpVT.getSizeInBits() < MinVT.getStoreSizeInBits())
|
||||
WideOpVT = MinVT;
|
||||
|
||||
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
|
||||
SDValue Undef = DAG.getUNDEF(WideOpVT);
|
||||
SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
|
||||
Undef, SubVec, ZeroIdx);
|
||||
|
||||
// Extract sub-vector if require.
|
||||
auto ExtractSubVec = [&](SDValue V) {
|
||||
return (WideOpVT == OpVT) ? V : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
|
||||
OpVT, V, ZeroIdx);
|
||||
};
|
||||
if (IdxVal == 0) {
|
||||
// Zero lower bits of the Vec
|
||||
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
|
||||
ZeroIdx);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
|
||||
// Merge them together, SubVec should be zero extended.
|
||||
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
|
||||
getZeroVector(WideOpVT, Subtarget, DAG, dl),
|
||||
SubVec, ZeroIdx);
|
||||
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op,
|
||||
ZeroIdx);
|
||||
}
|
||||
|
||||
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
|
||||
Undef, SubVec, ZeroIdx);
|
||||
|
||||
if (Vec.isUndef()) {
|
||||
if (IdxVal != 0) {
|
||||
SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
|
||||
WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
|
||||
ShiftBits);
|
||||
}
|
||||
return ExtractSubVec(WideSubVec);
|
||||
assert(IdxVal != 0 && "Unexpected index");
|
||||
Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
|
||||
}
|
||||
|
||||
if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
|
||||
@ -5073,48 +5081,60 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
|
||||
NumElems = WideOpVT.getVectorNumElements();
|
||||
unsigned ShiftLeft = NumElems - SubVecNumElems;
|
||||
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
|
||||
DAG.getConstant(ShiftLeft, dl, MVT::i8));
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
|
||||
DAG.getConstant(ShiftRight, dl, MVT::i8));
|
||||
return ExtractSubVec(Vec);
|
||||
}
|
||||
|
||||
if (IdxVal == 0) {
|
||||
// Zero lower bits of the Vec
|
||||
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
|
||||
// Merge them together, SubVec should be zero extended.
|
||||
WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
|
||||
getZeroVector(WideOpVT, Subtarget, DAG, dl),
|
||||
SubVec, ZeroIdx);
|
||||
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
|
||||
return ExtractSubVec(Vec);
|
||||
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
|
||||
DAG.getConstant(ShiftLeft, dl, MVT::i8));
|
||||
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
|
||||
DAG.getConstant(ShiftRight, dl, MVT::i8));
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
|
||||
}
|
||||
|
||||
// Simple case when we put subvector in the upper part
|
||||
if (IdxVal + SubVecNumElems == NumElems) {
|
||||
// Zero upper bits of the Vec
|
||||
WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
NumElems = WideOpVT.getVectorNumElements();
|
||||
SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
|
||||
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
|
||||
return ExtractSubVec(Vec);
|
||||
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
if (SubVecNumElems * 2 == NumElems) {
|
||||
// Special case, use legal zero extending insert_subvector. This allows
|
||||
// isel to opimitize when bits are known zero.
|
||||
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
|
||||
getZeroVector(WideOpVT, Subtarget, DAG, dl),
|
||||
Vec, ZeroIdx);
|
||||
} else {
|
||||
// Otherwise use explicit shifts to zero the bits.
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
|
||||
Undef, Vec, ZeroIdx);
|
||||
NumElems = WideOpVT.getVectorNumElements();
|
||||
SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
|
||||
}
|
||||
Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
|
||||
}
|
||||
// Subvector should be inserted in the middle - use shuffle
|
||||
WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
|
||||
SubVec, ZeroIdx);
|
||||
SmallVector<int, 64> Mask;
|
||||
for (unsigned i = 0; i < NumElems; ++i)
|
||||
Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
|
||||
i : i + NumElems);
|
||||
return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
|
||||
|
||||
// Inserting into the middle is more complicated.
|
||||
|
||||
NumElems = WideOpVT.getVectorNumElements();
|
||||
|
||||
// Widen the vector if needed.
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
|
||||
// Move the current value of the bit to be replace to the lsbs.
|
||||
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
// Xor with the new bit.
|
||||
Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
|
||||
// Shift to MSB, filling bottom bits with 0.
|
||||
unsigned ShiftLeft = NumElems - SubVecNumElems;
|
||||
Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
|
||||
DAG.getConstant(ShiftLeft, dl, MVT::i8));
|
||||
// Shift to the final position, filling upper bits with 0.
|
||||
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
|
||||
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
|
||||
DAG.getConstant(ShiftRight, dl, MVT::i8));
|
||||
// Xor with original vector leaving the new value.
|
||||
Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
|
||||
// Reduce to original width if needed.
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
|
||||
}
|
||||
|
||||
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
|
||||
|
@ -56,14 +56,12 @@ define <8 x i1> @test3(<4 x i1> %a) {
|
||||
define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k0
|
||||
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; CHECK-NEXT: vpslld $31, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1
|
||||
; CHECK-NEXT: kshiftlb $4, %k1, %k1
|
||||
; CHECK-NEXT: kshiftlb $4, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrb $4, %k0, %k0
|
||||
; CHECK-NEXT: korb %k1, %k0, %k0
|
||||
; CHECK-NEXT: korb %k0, %k1, %k0
|
||||
; CHECK-NEXT: vpmovm2w %k0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
@ -74,14 +72,12 @@ define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
|
||||
define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k0
|
||||
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; CHECK-NEXT: vpsllq $63, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1
|
||||
; CHECK-NEXT: kshiftlb $2, %k1, %k1
|
||||
; CHECK-NEXT: kshiftlb $6, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrb $6, %k0, %k0
|
||||
; CHECK-NEXT: korb %k1, %k0, %k0
|
||||
; CHECK-NEXT: kshiftlb $2, %k0, %k0
|
||||
; CHECK-NEXT: korb %k0, %k1, %k0
|
||||
; CHECK-NEXT: vpmovm2d %k0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
@ -92,14 +88,12 @@ define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
|
||||
define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k0
|
||||
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; CHECK-NEXT: vpsllq $63, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1
|
||||
; CHECK-NEXT: kshiftlb $2, %k1, %k1
|
||||
; CHECK-NEXT: kshiftlb $6, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrb $6, %k0, %k0
|
||||
; CHECK-NEXT: korb %k1, %k0, %k0
|
||||
; CHECK-NEXT: kshiftlb $2, %k0, %k0
|
||||
; CHECK-NEXT: korb %k0, %k1, %k0
|
||||
; CHECK-NEXT: vpmovm2b %k0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
@ -110,14 +104,12 @@ define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) {
|
||||
define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) {
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k0
|
||||
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; CHECK-NEXT: vpslld $31, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1
|
||||
; CHECK-NEXT: kshiftlb $4, %k1, %k1
|
||||
; CHECK-NEXT: kshiftlb $4, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrb $4, %k0, %k0
|
||||
; CHECK-NEXT: korb %k1, %k0, %k0
|
||||
; CHECK-NEXT: korb %k0, %k1, %k0
|
||||
; CHECK-NEXT: vpmovm2b %k0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user