mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
Lower some BUILD_VECTORS using VEXT+shuffle.
Patch by Tim Northover. llvm-svn: 123035
This commit is contained in:
parent
48469b566a
commit
c485ff3ced
@ -3530,8 +3530,8 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
|
||||
|
||||
// If this is a case we can't handle, return null and let the default
|
||||
// expansion code take care of it.
|
||||
static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
const ARMSubtarget *ST) {
|
||||
SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
const ARMSubtarget *ST) const {
|
||||
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
@ -3622,6 +3622,13 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
if (isConstant)
|
||||
return SDValue();
|
||||
|
||||
// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
|
||||
if (NumElts >= 4) {
|
||||
SDValue shuffle = ReconstructShuffle(Op, DAG);
|
||||
if (shuffle != SDValue())
|
||||
return shuffle;
|
||||
}
|
||||
|
||||
// Vectors with 32- or 64-bit elements can be built by directly assigning
|
||||
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
|
||||
// will be legalized.
|
||||
@ -3640,6 +3647,130 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Gather data to see if the operation can be modelled as a
|
||||
// shuffle in combination with VEXTs.
|
||||
SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const {
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
SmallVector<SDValue, 2> SourceVecs;
|
||||
SmallVector<unsigned, 2> MinElts;
|
||||
SmallVector<unsigned, 2> MaxElts;
|
||||
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue V = Op.getOperand(i);
|
||||
if (V.getOpcode() == ISD::UNDEF)
|
||||
continue;
|
||||
else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
|
||||
// A shuffle can only come from building a vector from various
|
||||
// elements of other vectors.
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Record this extraction against the appropriate vector if possible...
|
||||
SDValue SourceVec = V.getOperand(0);
|
||||
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
|
||||
bool FoundSource = false;
|
||||
for (unsigned j = 0; j < SourceVecs.size(); ++j) {
|
||||
if (SourceVecs[j] == SourceVec) {
|
||||
if (MinElts[j] > EltNo)
|
||||
MinElts[j] = EltNo;
|
||||
if (MaxElts[j] < EltNo)
|
||||
MaxElts[j] = EltNo;
|
||||
FoundSource = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Or record a new source if not...
|
||||
if (!FoundSource) {
|
||||
SourceVecs.push_back(SourceVec);
|
||||
MinElts.push_back(EltNo);
|
||||
MaxElts.push_back(EltNo);
|
||||
}
|
||||
}
|
||||
|
||||
// Currently only do something sane when at most two source vectors
|
||||
// involved.
|
||||
if (SourceVecs.size() > 2)
|
||||
return SDValue();
|
||||
|
||||
SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
|
||||
int VEXTOffsets[2] = {0, 0};
|
||||
|
||||
// This loop extracts the usage patterns of the source vectors
|
||||
// and prepares appropriate SDValues for a shuffle if possible.
|
||||
for (unsigned i = 0; i < SourceVecs.size(); ++i) {
|
||||
if (SourceVecs[i].getValueType() == VT) {
|
||||
// No VEXT necessary
|
||||
ShuffleSrcs[i] = SourceVecs[i];
|
||||
VEXTOffsets[i] = 0;
|
||||
continue;
|
||||
} else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
|
||||
// It probably isn't worth padding out a smaller vector just to
|
||||
// break it down again in a shuffle.
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
unsigned SrcNumElts = SourceVecs[i].getValueType().getVectorNumElements();
|
||||
|
||||
// Since only 64-bit and 128-bit vectors are legal on ARM and
|
||||
// we've eliminated the other cases...
|
||||
assert(SrcNumElts == 2*NumElts);
|
||||
|
||||
if (MaxElts[i] - MinElts[i] >= NumElts) {
|
||||
// Span too large for a VEXT to cope
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
if (MinElts[i] >= NumElts) {
|
||||
// The extraction can just take the second half
|
||||
VEXTOffsets[i] = NumElts;
|
||||
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
|
||||
DAG.getIntPtrConstant(NumElts));
|
||||
} else if (MaxElts[i] < NumElts) {
|
||||
// The extraction can just take the first half
|
||||
VEXTOffsets[i] = 0;
|
||||
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
|
||||
DAG.getIntPtrConstant(0));
|
||||
} else {
|
||||
// An actual VEXT is needed
|
||||
VEXTOffsets[i] = MinElts[i];
|
||||
SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
|
||||
DAG.getIntPtrConstant(0));
|
||||
SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
|
||||
DAG.getIntPtrConstant(NumElts));
|
||||
ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
|
||||
DAG.getConstant(VEXTOffsets[i], MVT::i32));
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<int, 8> Mask;
|
||||
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue Entry = Op.getOperand(i);
|
||||
if (Entry.getOpcode() == ISD::UNDEF) {
|
||||
Mask.push_back(-1);
|
||||
continue;
|
||||
}
|
||||
|
||||
SDValue ExtractVec = Entry.getOperand(0);
|
||||
int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
|
||||
if (ExtractVec == SourceVecs[0]) {
|
||||
Mask.push_back(ExtractElt - VEXTOffsets[0]);
|
||||
} else {
|
||||
Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
|
||||
}
|
||||
}
|
||||
|
||||
// Final check before we try to produce nonsense...
|
||||
if (isShuffleMaskLegal(Mask, VT))
|
||||
return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], &Mask[0]);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// isShuffleMaskLegal - Targets can use this to indicate that they only
|
||||
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
|
||||
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
|
||||
|
@ -378,6 +378,10 @@ namespace llvm {
|
||||
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
const ARMSubtarget *ST) const;
|
||||
|
||||
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
|
||||
CallingConv::ID CallConv, bool isVarArg,
|
||||
|
@ -74,3 +74,62 @@ define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
|
||||
; Tests for ReconstructShuffle function. Indices have to be carefully
|
||||
; chosen to reach lowering phase as a BUILD_VECTOR.
|
||||
|
||||
; One vector needs vext, the other can be handled by extract_subvector
|
||||
; Also checks interleaving of sources is handled correctly.
|
||||
; Essence: a vext is used on %A and something saner than stack load/store for final result.
|
||||
define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
||||
;CHECK: test_interleaved:
|
||||
;CHECK: vext.16
|
||||
;CHECK-NOT: vext.16
|
||||
;CHECK: vzip.16
|
||||
%tmp1 = load <8 x i16>* %A
|
||||
%tmp2 = load <8 x i16>* %B
|
||||
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
; An undef in the shuffle list should still be optimizable
|
||||
define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
||||
;CHECK: test_undef:
|
||||
;CHECK: vzip.16
|
||||
%tmp1 = load <8 x i16>* %A
|
||||
%tmp2 = load <8 x i16>* %B
|
||||
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
; We should ignore a build_vector with more than two sources.
|
||||
; Use illegal <32 x i16> type to produce such a shuffle after legalizing types.
|
||||
; Try to look for fallback to stack expansion.
|
||||
define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
|
||||
;CHECK: test_multisource:
|
||||
;CHECK: vst1.16
|
||||
%tmp1 = load <32 x i16>* %B
|
||||
%tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
; We don't handle shuffles using more than half of a 128-bit vector.
|
||||
; Again, test for fallback to stack expansion
|
||||
define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
|
||||
;CHECK: test_largespan:
|
||||
;CHECK: vst1.16
|
||||
%tmp1 = load <8 x i16>* %B
|
||||
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
; The actual shuffle code only handles some cases, make sure we check
|
||||
; this rather than blindly emitting a VECTOR_SHUFFLE (infinite
|
||||
; lowering loop can result otherwise).
|
||||
define <8 x i8> @test_illegal(<16 x i8>* %A, <16 x i8>* %B) nounwind {
|
||||
;CHECK: test_illegal:
|
||||
;CHECK: vst1.8
|
||||
%tmp1 = load <16 x i8>* %A
|
||||
%tmp2 = load <16 x i8>* %B
|
||||
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 25, i32 3, i32 2, i32 2, i32 26>
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user