mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-22 04:22:57 +02:00
[ARM] Update ReconstructShuffle to handle mismatched types
Summary: Port the ReconstructShuffle function from AArch64 to ARM to handle mismatched incoming types in the BUILD_VECTOR node. This fixes an outstanding FIXME in the ReconstructShuffle code. Reviewers: t.p.northover, rengolin Subscribers: aemerson, llvm-commits, rengolin Differential Revision: http://reviews.llvm.org/D11720 llvm-svn: 244314
This commit is contained in:
parent
093484721d
commit
f1f3f2018c
@ -5529,18 +5529,46 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// getExtFactor - Determine the adjustment factor for the position when
|
||||
/// generating an "extract from vector registers" instruction.
|
||||
static unsigned getExtFactor(SDValue &V) {
|
||||
EVT EltType = V.getValueType().getVectorElementType();
|
||||
return EltType.getSizeInBits() / 8;
|
||||
}
|
||||
|
||||
// Gather data to see if the operation can be modelled as a
|
||||
// shuffle in combination with VEXTs.
|
||||
SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
|
||||
SDLoc dl(Op);
|
||||
EVT VT = Op.getValueType();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
SmallVector<SDValue, 2> SourceVecs;
|
||||
SmallVector<unsigned, 2> MinElts;
|
||||
SmallVector<unsigned, 2> MaxElts;
|
||||
struct ShuffleSourceInfo {
|
||||
SDValue Vec;
|
||||
unsigned MinElt;
|
||||
unsigned MaxElt;
|
||||
|
||||
// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
|
||||
// be compatible with the shuffle we intend to construct. As a result
|
||||
// ShuffleVec will be some sliding window into the original Vec.
|
||||
SDValue ShuffleVec;
|
||||
|
||||
// Code should guarantee that element i in Vec starts at element "WindowBase
|
||||
// + i * WindowScale in ShuffleVec".
|
||||
int WindowBase;
|
||||
int WindowScale;
|
||||
|
||||
bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
|
||||
ShuffleSourceInfo(SDValue Vec)
|
||||
: Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
|
||||
WindowScale(1) {}
|
||||
};
|
||||
|
||||
// First gather all vectors used as an immediate source for this BUILD_VECTOR
|
||||
// node.
|
||||
SmallVector<ShuffleSourceInfo, 2> Sources;
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue V = Op.getOperand(i);
|
||||
if (V.getOpcode() == ISD::UNDEF)
|
||||
@ -5549,127 +5577,161 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
|
||||
// A shuffle can only come from building a vector from various
|
||||
// elements of other vectors.
|
||||
return SDValue();
|
||||
} else if (V.getOperand(0).getValueType().getVectorElementType() !=
|
||||
VT.getVectorElementType()) {
|
||||
// This code doesn't know how to handle shuffles where the vector
|
||||
// element types do not match (this happens because type legalization
|
||||
// promotes the return type of EXTRACT_VECTOR_ELT).
|
||||
// FIXME: It might be appropriate to extend this code to handle
|
||||
// mismatched types.
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Record this extraction against the appropriate vector if possible...
|
||||
// Add this element source to the list if it's not already there.
|
||||
SDValue SourceVec = V.getOperand(0);
|
||||
// If the element number isn't a constant, we can't effectively
|
||||
// analyze what's going on.
|
||||
if (!isa<ConstantSDNode>(V.getOperand(1)))
|
||||
return SDValue();
|
||||
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
|
||||
bool FoundSource = false;
|
||||
for (unsigned j = 0; j < SourceVecs.size(); ++j) {
|
||||
if (SourceVecs[j] == SourceVec) {
|
||||
if (MinElts[j] > EltNo)
|
||||
MinElts[j] = EltNo;
|
||||
if (MaxElts[j] < EltNo)
|
||||
MaxElts[j] = EltNo;
|
||||
FoundSource = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
|
||||
if (Source == Sources.end())
|
||||
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
|
||||
|
||||
// Or record a new source if not...
|
||||
if (!FoundSource) {
|
||||
SourceVecs.push_back(SourceVec);
|
||||
MinElts.push_back(EltNo);
|
||||
MaxElts.push_back(EltNo);
|
||||
}
|
||||
// Update the minimum and maximum lane number seen.
|
||||
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
|
||||
Source->MinElt = std::min(Source->MinElt, EltNo);
|
||||
Source->MaxElt = std::max(Source->MaxElt, EltNo);
|
||||
}
|
||||
|
||||
// Currently only do something sane when at most two source vectors
|
||||
// involved.
|
||||
if (SourceVecs.size() > 2)
|
||||
// are involved.
|
||||
if (Sources.size() > 2)
|
||||
return SDValue();
|
||||
|
||||
SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
|
||||
int VEXTOffsets[2] = {0, 0};
|
||||
// Find out the smallest element size among result and two sources, and use
|
||||
// it as element size to build the shuffle_vector.
|
||||
EVT SmallestEltTy = VT.getVectorElementType();
|
||||
for (auto &Source : Sources) {
|
||||
EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
|
||||
if (SrcEltTy.bitsLT(SmallestEltTy))
|
||||
SmallestEltTy = SrcEltTy;
|
||||
}
|
||||
unsigned ResMultiplier =
|
||||
VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
|
||||
NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
|
||||
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
|
||||
|
||||
// This loop extracts the usage patterns of the source vectors
|
||||
// and prepares appropriate SDValues for a shuffle if possible.
|
||||
for (unsigned i = 0; i < SourceVecs.size(); ++i) {
|
||||
if (SourceVecs[i].getValueType() == VT) {
|
||||
// No VEXT necessary
|
||||
ShuffleSrcs[i] = SourceVecs[i];
|
||||
VEXTOffsets[i] = 0;
|
||||
// If the source vector is too wide or too narrow, we may nevertheless be able
|
||||
// to construct a compatible shuffle either by concatenating it with UNDEF or
|
||||
// extracting a suitable range of elements.
|
||||
for (auto &Src : Sources) {
|
||||
EVT SrcVT = Src.ShuffleVec.getValueType();
|
||||
|
||||
if (SrcVT.getSizeInBits() == VT.getSizeInBits())
|
||||
continue;
|
||||
} else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
|
||||
// It probably isn't worth padding out a smaller vector just to
|
||||
// break it down again in a shuffle.
|
||||
|
||||
// This stage of the search produces a source with the same element type as
|
||||
// the original, but with a total width matching the BUILD_VECTOR output.
|
||||
EVT EltVT = SrcVT.getVectorElementType();
|
||||
unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
|
||||
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
|
||||
|
||||
if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
|
||||
if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
|
||||
return SDValue();
|
||||
// We can pad out the smaller vector for free, so if it's part of a
|
||||
// shuffle...
|
||||
Src.ShuffleVec =
|
||||
DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
|
||||
DAG.getUNDEF(Src.ShuffleVec.getValueType()));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Since only 64-bit and 128-bit vectors are legal on ARM and
|
||||
// we've eliminated the other cases...
|
||||
assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
|
||||
"unexpected vector sizes in ReconstructShuffle");
|
||||
if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
|
||||
return SDValue();
|
||||
|
||||
if (MaxElts[i] - MinElts[i] >= NumElts) {
|
||||
if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
|
||||
// Span too large for a VEXT to cope
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
if (MinElts[i] >= NumElts) {
|
||||
if (Src.MinElt >= NumSrcElts) {
|
||||
// The extraction can just take the second half
|
||||
VEXTOffsets[i] = NumElts;
|
||||
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
|
||||
SourceVecs[i],
|
||||
DAG.getIntPtrConstant(NumElts, dl));
|
||||
} else if (MaxElts[i] < NumElts) {
|
||||
Src.ShuffleVec =
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
|
||||
DAG.getConstant(NumSrcElts, dl, MVT::i32));
|
||||
Src.WindowBase = -NumSrcElts;
|
||||
} else if (Src.MaxElt < NumSrcElts) {
|
||||
// The extraction can just take the first half
|
||||
VEXTOffsets[i] = 0;
|
||||
ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
|
||||
SourceVecs[i],
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
Src.ShuffleVec =
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
|
||||
DAG.getConstant(0, dl, MVT::i32));
|
||||
} else {
|
||||
// An actual VEXT is needed
|
||||
VEXTOffsets[i] = MinElts[i];
|
||||
SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
|
||||
SourceVecs[i],
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
|
||||
SourceVecs[i],
|
||||
DAG.getIntPtrConstant(NumElts, dl));
|
||||
ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
|
||||
DAG.getConstant(VEXTOffsets[i], dl,
|
||||
MVT::i32));
|
||||
SDValue VEXTSrc1 =
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
|
||||
DAG.getConstant(0, dl, MVT::i32));
|
||||
SDValue VEXTSrc2 =
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
|
||||
DAG.getConstant(NumSrcElts, dl, MVT::i32));
|
||||
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
|
||||
|
||||
Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
|
||||
VEXTSrc2,
|
||||
DAG.getConstant(Imm, dl, MVT::i32));
|
||||
Src.WindowBase = -Src.MinElt;
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<int, 8> Mask;
|
||||
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue Entry = Op.getOperand(i);
|
||||
if (Entry.getOpcode() == ISD::UNDEF) {
|
||||
Mask.push_back(-1);
|
||||
// Another possible incompatibility occurs from the vector element types. We
|
||||
// can fix this by bitcasting the source vectors to the same type we intend
|
||||
// for the shuffle.
|
||||
for (auto &Src : Sources) {
|
||||
EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
|
||||
if (SrcEltTy == SmallestEltTy)
|
||||
continue;
|
||||
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
|
||||
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
|
||||
Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
|
||||
Src.WindowBase *= Src.WindowScale;
|
||||
}
|
||||
|
||||
SDValue ExtractVec = Entry.getOperand(0);
|
||||
int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
|
||||
.getOperand(1))->getSExtValue();
|
||||
if (ExtractVec == SourceVecs[0]) {
|
||||
Mask.push_back(ExtractElt - VEXTOffsets[0]);
|
||||
} else {
|
||||
Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
|
||||
}
|
||||
// Final sanity check before we try to actually produce a shuffle.
|
||||
for (auto Src : Sources)
|
||||
assert(Src.ShuffleVec.getValueType() == ShuffleVT);
|
||||
|
||||
// The stars all align, our next step is to produce the mask for the shuffle.
|
||||
SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
|
||||
int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
|
||||
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
|
||||
SDValue Entry = Op.getOperand(i);
|
||||
if (Entry.getOpcode() == ISD::UNDEF)
|
||||
continue;
|
||||
|
||||
auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
|
||||
int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
|
||||
|
||||
// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
|
||||
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
|
||||
// segment.
|
||||
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
|
||||
int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
|
||||
VT.getVectorElementType().getSizeInBits());
|
||||
int LanesDefined = BitsDefined / BitsPerShuffleLane;
|
||||
|
||||
// This source is expected to fill ResMultiplier lanes of the final shuffle,
|
||||
// starting at the appropriate offset.
|
||||
int *LaneMask = &Mask[i * ResMultiplier];
|
||||
|
||||
int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
|
||||
ExtractBase += NumElts * (Src - Sources.begin());
|
||||
for (int j = 0; j < LanesDefined; ++j)
|
||||
LaneMask[j] = ExtractBase + j;
|
||||
}
|
||||
|
||||
// Final check before we try to produce nonsense...
|
||||
if (isShuffleMaskLegal(Mask, VT))
|
||||
return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
|
||||
&Mask[0]);
|
||||
|
||||
if (!isShuffleMaskLegal(Mask, ShuffleVT))
|
||||
return SDValue();
|
||||
|
||||
// We can't handle more than two sources. This should have already
|
||||
// been checked before this point.
|
||||
assert(Sources.size() <= 2 && "Too many sources!");
|
||||
|
||||
SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
|
||||
for (unsigned i = 0; i < Sources.size(); ++i)
|
||||
ShuffleOps[i] = Sources[i].ShuffleVec;
|
||||
|
||||
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
|
||||
ShuffleOps[1], &Mask[0]);
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
|
||||
}
|
||||
|
||||
/// isShuffleMaskLegal - Targets can use this to indicate that they only
|
||||
|
@ -335,3 +335,39 @@ entry:
|
||||
%0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7>
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
; Here we get a build_vector node, where all the incoming extract_element
|
||||
; values do modify the type. However, we get different input types, as some of
|
||||
; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of
|
||||
; them get truncated from i16 to i8 (from comparing cmp2 with cmp3).
|
||||
define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,
|
||||
<4 x i32> %cmp0, <4 x i32> %cmp1,
|
||||
<4 x i16> %cmp2, <4 x i16> %cmp3) {
|
||||
; CHECK-LABEL: vtrn_mismatched_builvector0
|
||||
; CHECK: vmovn.i32
|
||||
; CHECK: vtrn
|
||||
; CHECK: vbsl
|
||||
%c0 = icmp ult <4 x i32> %cmp0, %cmp1
|
||||
%c1 = icmp ult <4 x i16> %cmp2, %cmp3
|
||||
%c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
|
||||
%rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
|
||||
ret <8 x i8> %rv
|
||||
}
|
||||
|
||||
; Here we get a build_vector node, where half the incoming extract_element
|
||||
; values do not modify the type (the values form cmp2), but half of them do
|
||||
; (from the icmp operation).
|
||||
define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
|
||||
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
|
||||
; CHECK-LABEL: vtrn_mismatched_builvector1
|
||||
; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn
|
||||
; CHECK: vmovl
|
||||
; CHECK: vtrn.8
|
||||
; CHECK: vbsl
|
||||
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
|
||||
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
|
||||
%c0 = icmp ult <4 x i32> %cmp0, %cmp1
|
||||
%c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
|
||||
%rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
|
||||
ret <8 x i8> %rv
|
||||
}
|
||||
|
@ -285,3 +285,76 @@ entry:
|
||||
%0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3>
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {
|
||||
; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8.
|
||||
; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to
|
||||
; truncate from i32 to i16 and one vuzp to perform the final truncation for i8.
|
||||
; CHECK-LABEL: vuzp_trunc
|
||||
; CHECK: vmovn.i32
|
||||
; CHECK: vmovn.i32
|
||||
; CHECK: vuzp
|
||||
; CHECK: vbsl
|
||||
%c = icmp ult <8 x i32> %cmp0, %cmp1
|
||||
%res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1
|
||||
ret <8 x i8> %res
|
||||
}
|
||||
|
||||
; Shuffle the result from the compare with a <4 x i8>.
|
||||
; We need to extend the loaded <4 x i8> to <4 x i16>. Otherwise we wouldn't be able
|
||||
; to perform the vuzp and get the vbsl mask.
|
||||
define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
|
||||
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
|
||||
; CHECK-LABEL: vuzp_trunc_and_shuffle
|
||||
; CHECK: vmovl
|
||||
; CHECK: vuzp
|
||||
; CHECK: vbsl
|
||||
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
|
||||
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
|
||||
%c0 = icmp ult <4 x i32> %cmp0, %cmp1
|
||||
%c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
|
||||
ret <8 x i8> %rv
|
||||
}
|
||||
|
||||
; Use an undef value for the <4 x i8> that is being shuffled with the compare result.
|
||||
; This produces a build_vector with some of the operands undefs.
|
||||
define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,
|
||||
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
|
||||
; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right
|
||||
; CHECK: vuzp
|
||||
; CHECK: vbsl
|
||||
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
|
||||
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
|
||||
%c0 = icmp ult <4 x i32> %cmp0, %cmp1
|
||||
%c = shufflevector <4 x i1> %c0, <4 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
|
||||
ret <8 x i8> %rv
|
||||
}
|
||||
|
||||
define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
|
||||
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
|
||||
; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left
|
||||
; CHECK: vuzp
|
||||
; CHECK: vbsl
|
||||
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
|
||||
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
|
||||
%c0 = icmp ult <4 x i32> %cmp0, %cmp1
|
||||
%c = shufflevector <4 x i1> undef, <4 x i1> %c0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
|
||||
ret <8 x i8> %rv
|
||||
}
|
||||
|
||||
; We're using large data types here, and we have to fill with undef values until we
|
||||
; get some vector size that we can represent.
|
||||
define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
|
||||
<5 x i32> %cmp0, <5 x i32> %cmp1, <5 x i8> *%cmp2_ptr) {
|
||||
; CHECK-LABEL: vuzp_wide_type
|
||||
; CHECK: vbsl
|
||||
%cmp2_load = load <5 x i8>, <5 x i8> * %cmp2_ptr, align 4
|
||||
%cmp2 = trunc <5 x i8> %cmp2_load to <5 x i1>
|
||||
%c0 = icmp ult <5 x i32> %cmp0, %cmp1
|
||||
%c = shufflevector <5 x i1> %c0, <5 x i1> %cmp2, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
|
||||
%rv = select <10 x i1> %c, <10 x i8> %tr0, <10 x i8> %tr1
|
||||
ret <10 x i8> %rv
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user