mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[Hexagon] Improve casting of boolean HVX vectors to scalars
- Mark memory access for bool vectors as disallowed in target lowering. This will prevent combining bitcasts of bool vectors with stores. - Replace the actual bitcasting code with a faster version. - Handle casting of v16i1 to i16.
This commit is contained in:
parent
d5b09af4ad
commit
7752ba906a
@ -1681,8 +1681,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::STORE, VT, Custom);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v128i1, Custom);
|
||||
|
||||
for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
|
||||
MVT::v2i32}) {
|
||||
setCondCodeAction(ISD::SETNE, VT, Expand);
|
||||
@ -1696,8 +1694,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
|
||||
|
||||
// Custom-lower bitcasts from i8 to v8i1.
|
||||
setOperationAction(ISD::BITCAST, MVT::i8, Custom);
|
||||
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
|
||||
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::VSELECT, MVT::v4i8, Custom);
|
||||
setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
|
||||
@ -3081,6 +3077,12 @@ void
|
||||
HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const {
|
||||
if (isHvxOperation(N)) {
|
||||
LowerHvxOperationWrapper(N, Results, DAG);
|
||||
if (!Results.empty())
|
||||
return;
|
||||
}
|
||||
|
||||
// We are only custom-lowering stores to verify the alignment of the
|
||||
// address if it is a compile-time constant. Since a store can be modified
|
||||
// during type-legalization (the value being stored may need legalization),
|
||||
@ -3094,6 +3096,12 @@ void
|
||||
HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const {
|
||||
if (isHvxOperation(N)) {
|
||||
ReplaceHvxNodeResults(N, Results, DAG);
|
||||
if (!Results.empty())
|
||||
return;
|
||||
}
|
||||
|
||||
const SDLoc &dl(N);
|
||||
switch (N->getOpcode()) {
|
||||
case ISD::SRL:
|
||||
@ -3378,12 +3386,25 @@ EVT HexagonTargetLowering::getOptimalMemOpType(
|
||||
return MVT::Other;
|
||||
}
|
||||
|
||||
bool HexagonTargetLowering::allowsMemoryAccess(LLVMContext &Context,
|
||||
const DataLayout &DL, EVT VT, unsigned AddrSpace, unsigned Alignment,
|
||||
MachineMemOperand::Flags Flags, bool *Fast) const {
|
||||
MVT SVT = VT.getSimpleVT();
|
||||
if (Subtarget.isHVXVectorType(SVT, true))
|
||||
return allowsHvxMemoryAccess(SVT, Alignment, Flags, Fast);
|
||||
return TargetLoweringBase::allowsMemoryAccess(
|
||||
Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
|
||||
}
|
||||
|
||||
bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const {
|
||||
EVT VT, unsigned AddrSpace, unsigned Alignment,
|
||||
MachineMemOperand::Flags Flags, bool *Fast) const {
|
||||
MVT SVT = VT.getSimpleVT();
|
||||
if (Subtarget.isHVXVectorType(SVT, true))
|
||||
return allowsHvxMisalignedMemoryAccesses(SVT, Alignment, Flags, Fast);
|
||||
if (Fast)
|
||||
*Fast = false;
|
||||
return Subtarget.isHVXVectorType(VT.getSimpleVT());
|
||||
return false;
|
||||
}
|
||||
|
||||
std::pair<const TargetRegisterClass*, uint8_t>
|
||||
|
@ -305,8 +305,12 @@ namespace HexagonISD {
|
||||
EVT getOptimalMemOpType(const MemOp &Op,
|
||||
const AttributeList &FuncAttributes) const override;
|
||||
|
||||
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
|
||||
unsigned AddrSpace, unsigned Alignment, MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const override;
|
||||
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
|
||||
unsigned Align, MachineMemOperand::Flags Flags, bool *Fast)
|
||||
unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast)
|
||||
const override;
|
||||
|
||||
/// Returns relocation base for the given PIC jumptable.
|
||||
@ -404,6 +408,11 @@ namespace HexagonISD {
|
||||
VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const;
|
||||
SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const;
|
||||
|
||||
bool allowsHvxMemoryAccess(MVT VecTy, unsigned Alignment,
|
||||
MachineMemOperand::Flags Flags, bool *Fast) const;
|
||||
bool allowsHvxMisalignedMemoryAccesses(MVT VecTy, unsigned Align,
|
||||
MachineMemOperand::Flags Flags, bool *Fast) const;
|
||||
|
||||
bool isHvxSingleTy(MVT Ty) const;
|
||||
bool isHvxPairTy(MVT Ty) const;
|
||||
bool isHvxBoolTy(MVT Ty) const;
|
||||
@ -438,6 +447,8 @@ namespace HexagonISD {
|
||||
const SDLoc &dl, SelectionDAG &DAG) const;
|
||||
SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy,
|
||||
bool ZeroExt, SelectionDAG &DAG) const;
|
||||
SDValue compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const;
|
||||
@ -467,8 +478,12 @@ namespace HexagonISD {
|
||||
const override;
|
||||
|
||||
bool isHvxOperation(SDValue Op) const;
|
||||
bool isHvxOperation(SDNode *N) const;
|
||||
SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const;
|
||||
void ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
};
|
||||
|
||||
|
@ -65,6 +65,15 @@ HexagonTargetLowering::initializeHVXLowering() {
|
||||
AddPromotedToType(Opc, FromTy, ToTy);
|
||||
};
|
||||
|
||||
// Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
|
||||
// Note: v16i1 -> i16 is handled in type legalization instead of op
|
||||
// legalization.
|
||||
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
|
||||
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
|
||||
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
|
||||
setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
|
||||
setOperationAction(ISD::BITCAST, MVT::i128, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
@ -290,6 +299,25 @@ HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
|
||||
Ty.getVectorElementType() == MVT::i1;
|
||||
}
|
||||
|
||||
bool
|
||||
HexagonTargetLowering::allowsHvxMemoryAccess(MVT VecTy, unsigned Alignment,
|
||||
MachineMemOperand::Flags Flags, bool *Fast) const {
|
||||
// Bool vectors are excluded by default, but make it explicit to
|
||||
// emphasize that bool vectors cannot be loaded or stored.
|
||||
return Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false);
|
||||
}
|
||||
|
||||
bool
|
||||
HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(MVT VecTy,
|
||||
unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const {
|
||||
if (!Subtarget.isHVXVectorType(VecTy))
|
||||
return false;
|
||||
// XXX Should this be false? vmemu are a bit slower than vmem.
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
|
||||
SelectionDAG &DAG) const {
|
||||
@ -1029,6 +1057,61 @@ HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
|
||||
return DAG.getSelect(dl, ResTy, VecV, True, False);
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
|
||||
MVT ResTy, SelectionDAG &DAG) const {
|
||||
// Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
|
||||
// (i.e. the entire predicate register) to bits [0..HwLen-1] of a
|
||||
// vector register. The remaining bits of the vector register are
|
||||
// unspecified.
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
unsigned HwLen = Subtarget.getVectorLength();
|
||||
MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
|
||||
MVT PredTy = ty(VecQ);
|
||||
unsigned PredLen = PredTy.getVectorNumElements();
|
||||
assert(HwLen % PredLen == 0);
|
||||
MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
|
||||
|
||||
Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
|
||||
SmallVector<Constant*, 128> Tmp;
|
||||
// Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
|
||||
// These are bytes with the LSB rotated left with respect to their index.
|
||||
for (unsigned i = 0; i != HwLen/8; ++i) {
|
||||
for (unsigned j = 0; j != 8; ++j)
|
||||
Tmp.push_back(ConstantInt::get(Int8Ty, 1u << j));
|
||||
}
|
||||
Constant *CV = ConstantVector::get(Tmp);
|
||||
unsigned Align = HwLen;
|
||||
SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Align), DAG);
|
||||
SDValue Bytes = DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
|
||||
MachinePointerInfo::getConstantPool(MF), Align);
|
||||
|
||||
// Select the bytes that correspond to true bits in the vector predicate.
|
||||
SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
|
||||
getZero(dl, VecTy, DAG));
|
||||
// Calculate the OR of all bytes in each group of 8. That will compress
|
||||
// all the individual bits into a single byte.
|
||||
// First, OR groups of 4, via vrmpy with 0x01010101.
|
||||
SDValue All1 =
|
||||
DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
|
||||
SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
|
||||
// Then rotate the accumulated vector by 4 bytes, and do the final OR.
|
||||
SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
|
||||
{Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
|
||||
SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
|
||||
|
||||
// Pick every 8th byte and coalesce them at the beginning of the output.
|
||||
// For symmetry, coalesce every 1+8th byte after that, then every 2+8th
|
||||
// byte and so on.
|
||||
SmallVector<int,128> Mask;
|
||||
for (unsigned i = 0; i != HwLen; ++i)
|
||||
Mask.push_back((8*i) % HwLen + i/(HwLen/8));
|
||||
SDValue Collect =
|
||||
DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
|
||||
return DAG.getBitcast(ResTy, Collect);
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
|
||||
const {
|
||||
@ -1437,192 +1520,58 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
|
||||
return T7;
|
||||
}
|
||||
|
||||
// This function does the computation needed to bitcast a vector of predicate
|
||||
// register to a vector of integers.
|
||||
SDValue
|
||||
HexagonTargetLowering::HvxVecPredBitcastComputation(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue ValQ = Op.getOperand(0);
|
||||
MVT ResTy = ty(Op);
|
||||
MVT VecTy = ty(ValQ);
|
||||
const SDLoc &dl(Op);
|
||||
MVT VecTy;
|
||||
int Length;
|
||||
if (Subtarget.useHVX64BOps()) {
|
||||
VecTy = MVT::getVectorVT(MVT::i32, 16);
|
||||
Length = 2;
|
||||
}
|
||||
if (Subtarget.useHVX128BOps()) {
|
||||
VecTy = MVT::getVectorVT(MVT::i32, 32);
|
||||
Length = 4;
|
||||
}
|
||||
// r0 = ##0x08040201 // Pre-rotated bits per 4 consecutive bytes.
|
||||
SDValue C8421 = DAG.getTargetConstant(0x08040201, dl, MVT::i32);
|
||||
SDValue InstrC8421 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C8421, DAG);
|
||||
// v0 = vand(q0,r0)
|
||||
SDValue Vand =
|
||||
getInstr(Hexagon::V6_vandqrt, dl, VecTy, {Op, InstrC8421}, DAG);
|
||||
|
||||
// Or the bytes in each word into a single byte: that will form packs
|
||||
// of 4 bits of the output.
|
||||
// v1 = valign(v0,v0,#2)
|
||||
SDValue C2 = DAG.getTargetConstant(2, dl, MVT::i32);
|
||||
SDValue Valign =
|
||||
getInstr(Hexagon::V6_valignbi, dl, VecTy, {Vand, Vand, C2}, DAG);
|
||||
// v0 = vor(v0,v1)
|
||||
SDValue Vor = getInstr(Hexagon::V6_vor, dl, VecTy, {Vand, Valign}, DAG);
|
||||
// v1 = valign(v0,v0,#1)
|
||||
SDValue C1 = DAG.getTargetConstant(1, dl, MVT::i32);
|
||||
SDValue Valign1 =
|
||||
getInstr(Hexagon::V6_valignbi, dl, VecTy, {Vor, Vor, C1}, DAG);
|
||||
// v0 = vor(v0,v1)
|
||||
SDValue Vor1 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor, Valign1}, DAG);
|
||||
if (isHvxBoolTy(VecTy) && ResTy.isScalarInteger()) {
|
||||
unsigned HwLen = Subtarget.getVectorLength();
|
||||
MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
|
||||
SDValue VQ = compressHvxPred(ValQ, dl, WordTy, DAG);
|
||||
unsigned BitWidth = ResTy.getSizeInBits();
|
||||
|
||||
// Clear all the bytes per word except the lowest one.
|
||||
// r0 = #0xff
|
||||
SDValue Cff = DAG.getTargetConstant(0xff, dl, MVT::i32);
|
||||
SDValue InstrCff = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, Cff, DAG);
|
||||
// v1 = vsplat(r0)
|
||||
SDValue Vsplat = getInstr(Hexagon::V6_lvsplatw, dl, VecTy, InstrCff, DAG);
|
||||
// v0 = vand(v0,v1)
|
||||
SDValue Vand1 = getInstr(Hexagon::V6_vand, dl, VecTy, {Vor1, Vsplat}, DAG);
|
||||
if (BitWidth < 64) {
|
||||
SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
|
||||
dl, MVT::i32, DAG);
|
||||
if (BitWidth == 32)
|
||||
return W0;
|
||||
assert(BitWidth < 32u);
|
||||
return DAG.getZExtOrTrunc(W0, dl, ResTy);
|
||||
}
|
||||
|
||||
// Shift each word left by its index to position the 4-bit packs for oring.
|
||||
// The words 0..8 and 16..31 need to be ored to form the 64-bit output.
|
||||
// r0 = ##.Lshifts
|
||||
// .Lshifts:
|
||||
// .word 0
|
||||
// .word 4
|
||||
// .word 8
|
||||
// .word 12
|
||||
// .word 16
|
||||
// .word 20
|
||||
// .word 24
|
||||
// .word 28
|
||||
// .word 0
|
||||
// .word 4
|
||||
// .word 8
|
||||
// .word 12
|
||||
// .word 16
|
||||
// .word 20
|
||||
// .word 24
|
||||
// .word 28
|
||||
// v1 = vmem(r0+#0)
|
||||
SmallVector<SDValue, 32> Elems;
|
||||
for (int i = 0; i < Length; ++i) {
|
||||
Elems.push_back(DAG.getConstant(0, dl, MVT::i32));
|
||||
Elems.push_back(DAG.getConstant(4, dl, MVT::i32));
|
||||
Elems.push_back(DAG.getConstant(8, dl, MVT::i32));
|
||||
Elems.push_back(DAG.getConstant(12, dl, MVT::i32));
|
||||
Elems.push_back(DAG.getConstant(16, dl, MVT::i32));
|
||||
Elems.push_back(DAG.getConstant(20, dl, MVT::i32));
|
||||
Elems.push_back(DAG.getConstant(24, dl, MVT::i32));
|
||||
Elems.push_back(DAG.getConstant(28, dl, MVT::i32));
|
||||
// The result is >= 64 bits. The only options are 64 or 128.
|
||||
assert(BitWidth == 64 || BitWidth == 128);
|
||||
SmallVector<SDValue,4> Words;
|
||||
for (unsigned i = 0; i != BitWidth/32; ++i) {
|
||||
SDValue W = extractHvxElementReg(
|
||||
VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
|
||||
Words.push_back(W);
|
||||
}
|
||||
SmallVector<SDValue,2> Combines;
|
||||
assert(Words.size() % 2 == 0);
|
||||
for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
|
||||
SDValue C = DAG.getNode(
|
||||
HexagonISD::COMBINE, dl, MVT::i64, {Words[i], Words[i+1]});
|
||||
Combines.push_back(C);
|
||||
}
|
||||
|
||||
if (BitWidth == 64)
|
||||
return Combines[0];
|
||||
|
||||
// It must be i128. I128 is not a legal type, so this part will be
|
||||
// executed during type legalization. We need to generate code that
|
||||
// the default expansion can break up into smaller pieces.
|
||||
SDValue C0 = DAG.getZExtOrTrunc(Combines[0], dl, ResTy);
|
||||
SDValue C1 = DAG.getNode(ISD::SHL, dl, ResTy,
|
||||
DAG.getZExtOrTrunc(Combines[1], dl, ResTy),
|
||||
DAG.getConstant(64, dl, MVT::i32));
|
||||
return DAG.getNode(ISD::OR, dl, ResTy, C0, C1);
|
||||
}
|
||||
|
||||
SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
|
||||
// v0.w = vasl(v0.w,v1.w)
|
||||
SDValue Vasl = getInstr(Hexagon::V6_vaslwv, dl, VecTy, {Vand1, BV}, DAG);
|
||||
|
||||
// 3 rounds of oring.
|
||||
// r0 = #16 // HwLen/4
|
||||
SDValue C16 = DAG.getTargetConstant(16, dl, MVT::i32);
|
||||
SDValue InstrC16 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C16, DAG);
|
||||
// v1 = vror(v0,r0)
|
||||
SDValue Vror = getInstr(Hexagon::V6_vror, dl, VecTy, {Vasl, InstrC16}, DAG);
|
||||
// v0 = vor(v0,v1)
|
||||
SDValue Vor2 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vasl, Vror}, DAG);
|
||||
// r0 = #8 // HwLen/8
|
||||
SDValue C8 = DAG.getTargetConstant(8, dl, MVT::i32);
|
||||
SDValue InstrC8 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C8, DAG);
|
||||
// v1 = vror(v0,r0)
|
||||
SDValue Vror1 = getInstr(Hexagon::V6_vror, dl, VecTy, {Vor2, InstrC8}, DAG);
|
||||
// v0 = vor(v0,v1)
|
||||
SDValue Vor3 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor2, Vror1}, DAG);
|
||||
// r0 = #4 // HwLen/16
|
||||
SDValue C4 = DAG.getTargetConstant(4, dl, MVT::i32);
|
||||
SDValue InstrC4 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C4, DAG);
|
||||
// v1 = vror(v0,r0)
|
||||
SDValue Vror2 = getInstr(Hexagon::V6_vror, dl, VecTy, {Vor3, InstrC4}, DAG);
|
||||
// v0 = vor(v0,v1)
|
||||
SDValue Vor4 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor3, Vror2}, DAG);
|
||||
return Vor4;
|
||||
}
|
||||
|
||||
SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
auto *N = Op.getNode();
|
||||
EVT VT = N->getValueType(0);
|
||||
const SDLoc &dl(Op);
|
||||
SDValue Q0 = N->getOperand(0);
|
||||
EVT VTOp = Q0.getNode()->getValueType(0);
|
||||
if (!(VT == MVT::i64 || VT == MVT::i32) ||
|
||||
!(VTOp == MVT::v64i1 || VTOp == MVT::v32i1))
|
||||
return Op;
|
||||
|
||||
SDValue Vor4 = HvxVecPredBitcastComputation(Q0, DAG);
|
||||
|
||||
// The output is v.w[8]:v.w[0]
|
||||
// r3 = #0
|
||||
SDValue C0 = DAG.getTargetConstant(0, dl, MVT::i32);
|
||||
SDValue InstrC0 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C0, DAG);
|
||||
// r0 = vextract(v0,r3)
|
||||
SDValue Res =
|
||||
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC0}, DAG);
|
||||
if (VT == MVT::i64) {
|
||||
// r3 = #32
|
||||
SDValue C32 = DAG.getTargetConstant(32, dl, MVT::i32);
|
||||
SDValue InstrC32 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C32, DAG);
|
||||
// r1 = vextract(v0,r3)
|
||||
SDValue Vextract =
|
||||
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC32}, DAG);
|
||||
Res = getInstr(Hexagon::A2_combinew, dl, MVT::i64, {Vextract, Res}, DAG);
|
||||
}
|
||||
return Res;
|
||||
}
|
||||
|
||||
SDValue HexagonTargetLowering::LowerHvxStore(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
auto *N = Op.getNode();
|
||||
const SDLoc &dl(Op);
|
||||
SDValue Q0 = N->getOperand(1);
|
||||
EVT VTOp = Q0.getNode()->getValueType(0);
|
||||
if (Op.getOpcode() != ISD::STORE || VTOp != MVT::v128i1)
|
||||
return Op;
|
||||
SDValue Vor4 = HvxVecPredBitcastComputation(Q0, DAG);
|
||||
// The output is v.w[8]:v.w[0]
|
||||
// r3 = #0
|
||||
SDValue C0 = DAG.getTargetConstant(0, dl, MVT::i32);
|
||||
SDValue InstrC0 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C0, DAG);
|
||||
// r0 = vextract(v0,r3)
|
||||
SDValue Vextract0 =
|
||||
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC0}, DAG);
|
||||
// r3 = #32
|
||||
SDValue C32 = DAG.getTargetConstant(32, dl, MVT::i32);
|
||||
SDValue InstrC32 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C32, DAG);
|
||||
// r1 = vextract(v0,r3)
|
||||
SDValue Vextract1 =
|
||||
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC32}, DAG);
|
||||
SDValue Combine0 =
|
||||
getInstr(Hexagon::A2_combinew, dl, MVT::i64, {Vextract1, Vextract0}, DAG);
|
||||
// r3 = #64
|
||||
SDValue C64 = DAG.getTargetConstant(64, dl, MVT::i32);
|
||||
SDValue InstrC64 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C64, DAG);
|
||||
// r0 = vextract(v0,r3)
|
||||
SDValue Vextract2 =
|
||||
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC64}, DAG);
|
||||
// r3 = #96
|
||||
SDValue C96 = DAG.getTargetConstant(96, dl, MVT::i32);
|
||||
SDValue InstrC96 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C96, DAG);
|
||||
// r1 = vextract(v0,r3)
|
||||
SDValue Vextract3 =
|
||||
getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC96}, DAG);
|
||||
SDValue Combine1 =
|
||||
getInstr(Hexagon::A2_combinew, dl, MVT::i64, {Vextract3, Vextract2}, DAG);
|
||||
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
|
||||
SDValue C8 = DAG.getTargetConstant(8, dl, MVT::i32);
|
||||
const SDValue Ops1[] = {ST->getBasePtr(), C8, Combine1, ST->getChain()};
|
||||
SDValue Store1 = getInstr(Hexagon::S2_storerd_io, dl, MVT::Other, Ops1, DAG);
|
||||
const SDValue Ops0[] = {ST->getBasePtr(), C0, Combine0, Store1};
|
||||
SDValue Store0 = getInstr(Hexagon::S2_storerd_io, dl, MVT::Other, Ops0, DAG);
|
||||
return Store0;
|
||||
return Op;
|
||||
}
|
||||
|
||||
SDValue
|
||||
@ -1798,7 +1747,6 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::SETCC:
|
||||
case ISD::INTRINSIC_VOID: return Op;
|
||||
case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
|
||||
case ISD::STORE: return LowerHvxStore(Op, DAG);
|
||||
// Unaligned loads will be handled by the default lowering.
|
||||
case ISD::LOAD: return SDValue();
|
||||
}
|
||||
@ -1808,6 +1756,28 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
llvm_unreachable("Unhandled HVX operation");
|
||||
}
|
||||
|
||||
void
|
||||
HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
|
||||
SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
void
|
||||
HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
|
||||
SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
|
||||
unsigned Opc = N->getOpcode();
|
||||
switch (Opc) {
|
||||
case ISD::BITCAST:
|
||||
if (isHvxBoolTy(ty(N->getOperand(0)))) {
|
||||
SDValue Op(N, 0);
|
||||
SDValue C = LowerHvxBitcast(Op, DAG);
|
||||
Results.push_back(C);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
|
||||
const {
|
||||
@ -1840,3 +1810,16 @@ HexagonTargetLowering::isHvxOperation(SDValue Op) const {
|
||||
return Subtarget.isHVXVectorType(ty(V), true);
|
||||
});
|
||||
}
|
||||
|
||||
bool
|
||||
HexagonTargetLowering::isHvxOperation(SDNode *N) const {
|
||||
// If the type of any result, or any operand type are HVX vector types,
|
||||
// this is an HVX operation.
|
||||
auto IsHvxTy = [this] (EVT Ty) {
|
||||
return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
|
||||
};
|
||||
auto IsHvxOp = [this] (SDValue Op) {
|
||||
return Subtarget.isHVXVectorType(ty(Op), true);
|
||||
};
|
||||
return llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp);
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: danny:
|
||||
; CHECK: vand
|
||||
; CHECK: vrmpy
|
||||
define i64 @danny(<64 x i8> %a0, <64 x i8> %a1) #0 {
|
||||
%v0 = icmp eq <64 x i8> %a0, %a1
|
||||
%v1 = bitcast <64 x i1> %v0 to i64
|
||||
@ -9,18 +9,19 @@ define i64 @danny(<64 x i8> %a0, <64 x i8> %a1) #0 {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: sammy:
|
||||
; CHECK: vand
|
||||
; CHECK: vrmpy
|
||||
define i32 @sammy(<32 x i16> %a0, <32 x i16> %a1) #0 {
|
||||
%v0 = icmp eq <32 x i16> %a0, %a1
|
||||
%v1 = bitcast <32 x i1> %v0 to i32
|
||||
ret i32 %v1
|
||||
}
|
||||
|
||||
; This one still doesn't work.
|
||||
; define i16 @kirby(<16 x i32> %a0, <16 x i32> %a1) #0 {
|
||||
; %v0 = icmp eq <16 x i32> %a0, %a1
|
||||
; %v1 = bitcast <16 x i1> %v0 to i16
|
||||
; ret i16 %v1
|
||||
; }
|
||||
; CHECK-LABEL: kirby:
|
||||
; CHECK: vrmpy
|
||||
define i16 @kirby(<16 x i32> %a0, <16 x i32> %a1) #0 {
|
||||
%v0 = icmp eq <16 x i32> %a0, %a1
|
||||
%v1 = bitcast <16 x i1> %v0 to i16
|
||||
ret i16 %v1
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+v66,+hvx,+hvxv66,+hvx-length64b" }
|
||||
|
15
test/CodeGen/Hexagon/autohvx/isel-store-bitcast-v128i1.ll
Normal file
15
test/CodeGen/Hexagon/autohvx/isel-store-bitcast-v128i1.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
|
||||
; Primarily check if this compiles without failing.
|
||||
|
||||
; CHECK-LABEL: fred:
|
||||
; CHECK: memd
|
||||
define void @fred(<128 x i8> %a0, <128 x i8> %a1, i128* %a2) #0 {
|
||||
%v0 = icmp eq <128 x i8> %a0, %a1
|
||||
%v1 = bitcast <128 x i1> %v0 to i128
|
||||
store i128 %v1, i128* %a2, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+v66,+hvx,+hvxv66,+hvx-length128b" }
|
||||
|
@ -1,51 +1,47 @@
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
|
||||
; Test that LLVM does not assert and bitcast v64i1 to i64 is lowered.
|
||||
|
||||
; CHECK: v[[REG1:[0-9]+]] = valign(v{{[0-9]+}},v{{[0-9]+}},#2)
|
||||
; CHECK: v[[REG2:[0-9]+]] = vor(v{{[0-9]+}},v[[REG1]])
|
||||
; CHECK: v[[REG3:[0-9]+]] = valign(v[[REG2]],v[[REG2]],#1)
|
||||
; CHECK: v[[REG4:[0-9]+]] = vor(v{{[0-9]+}},v[[REG3]])
|
||||
; CHECK: v[[REG5:[0-9]+]] = vand(v[[REG4]],v{{[0-9]+}})
|
||||
; CHECK: v{{[0-9]+}}.w = vasl(v[[REG5]].w,v{{[0-9]+}}.w)
|
||||
; Test that LLVM does not assert and bitcast v64i1 to i64 is lowered
|
||||
; without crashing.
|
||||
; CHECK: valign
|
||||
|
||||
target triple = "hexagon"
|
||||
|
||||
define dso_local void @fun() local_unnamed_addr #0 {
|
||||
entry:
|
||||
br i1 undef, label %cleanup, label %if.end
|
||||
define dso_local void @f0() local_unnamed_addr #0 {
|
||||
b0:
|
||||
br i1 undef, label %b2, label %b1
|
||||
|
||||
if.end:
|
||||
%0 = load i8, i8* undef, align 1
|
||||
%conv13.i = zext i8 %0 to i32
|
||||
%trip.count.minus.1216 = add nsw i32 %conv13.i, -1
|
||||
%broadcast.splatinsert221 = insertelement <64 x i32> undef, i32 %trip.count.minus.1216, i32 0
|
||||
%broadcast.splat222 = shufflevector <64 x i32> %broadcast.splatinsert221, <64 x i32> undef, <64 x i32> zeroinitializer
|
||||
%1 = icmp ule <64 x i32> undef, %broadcast.splat222
|
||||
%wide.masked.load223 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* nonnull undef, i32 1, <64 x i1> %1, <64 x i8> undef)
|
||||
%2 = lshr <64 x i8> %wide.masked.load223, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
%3 = and <64 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%4 = zext <64 x i8> %3 to <64 x i32>
|
||||
%5 = add nsw <64 x i32> undef, %4
|
||||
%6 = select <64 x i1> %1, <64 x i32> %5, <64 x i32> undef
|
||||
%bin.rdx225 = add <64 x i32> %6, undef
|
||||
%bin.rdx227 = add <64 x i32> %bin.rdx225, undef
|
||||
%bin.rdx229 = add <64 x i32> %bin.rdx227, undef
|
||||
%bin.rdx231 = add <64 x i32> %bin.rdx229, undef
|
||||
%bin.rdx233 = add <64 x i32> %bin.rdx231, undef
|
||||
%bin.rdx235 = add <64 x i32> %bin.rdx233, undef
|
||||
%bin.rdx237 = add <64 x i32> %bin.rdx235, undef
|
||||
%7 = extractelement <64 x i32> %bin.rdx237, i32 0
|
||||
%nChans = getelementptr inbounds i8, i8* null, i32 2160
|
||||
%8 = bitcast i8* %nChans to i32*
|
||||
store i32 %7, i32* %8, align 4
|
||||
br label %cleanup
|
||||
b1: ; preds = %b0
|
||||
%v0 = load i8, i8* undef, align 1
|
||||
%v1 = zext i8 %v0 to i32
|
||||
%v2 = add nsw i32 %v1, -1
|
||||
%v3 = insertelement <64 x i32> undef, i32 %v2, i32 0
|
||||
%v4 = shufflevector <64 x i32> %v3, <64 x i32> undef, <64 x i32> zeroinitializer
|
||||
%v5 = icmp ule <64 x i32> undef, %v4
|
||||
%v6 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* nonnull undef, i32 1, <64 x i1> %v5, <64 x i8> undef)
|
||||
%v7 = lshr <64 x i8> %v6, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
%v8 = and <64 x i8> %v7, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%v9 = zext <64 x i8> %v8 to <64 x i32>
|
||||
%v10 = add nsw <64 x i32> undef, %v9
|
||||
%v11 = select <64 x i1> %v5, <64 x i32> %v10, <64 x i32> undef
|
||||
%v12 = add <64 x i32> %v11, undef
|
||||
%v13 = add <64 x i32> %v12, undef
|
||||
%v14 = add <64 x i32> %v13, undef
|
||||
%v15 = add <64 x i32> %v14, undef
|
||||
%v16 = add <64 x i32> %v15, undef
|
||||
%v17 = add <64 x i32> %v16, undef
|
||||
%v18 = add <64 x i32> %v17, undef
|
||||
%v19 = extractelement <64 x i32> %v18, i32 0
|
||||
%v20 = getelementptr inbounds i8, i8* null, i32 2160
|
||||
%v21 = bitcast i8* %v20 to i32*
|
||||
store i32 %v19, i32* %v21, align 4
|
||||
br label %b2
|
||||
|
||||
cleanup:
|
||||
b2: ; preds = %b1, %b0
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: argmemonly nounwind readonly willreturn
|
||||
declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>)
|
||||
declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32 immarg, <64 x i1>, <64 x i8>) #1
|
||||
|
||||
attributes #0 = { "target-features"="+hvx-length64b,+hvxv67,+v67,-long-calls" }
|
||||
attributes #1 = { argmemonly nounwind readonly willreturn }
|
||||
|
@ -1,47 +1,47 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
|
||||
; This test checks that store a vector predicate of type v128i1 is lowered
|
||||
; and two double stores are generated.
|
||||
|
||||
; CHECK-DAG: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}}
|
||||
; CHECK-DAG: memd(r{{[0-9]+}}+#8) = r{{[0-9]+}}:{{[0-9]+}}
|
||||
; without crashing.
|
||||
; CHECK: valign
|
||||
|
||||
target triple = "hexagon"
|
||||
|
||||
define dso_local void @raac_UnpackADIFHeader() local_unnamed_addr #0 {
|
||||
entry:
|
||||
br i1 undef, label %cleanup, label %if.end
|
||||
define dso_local void @f0() local_unnamed_addr #0 {
|
||||
b0:
|
||||
br i1 undef, label %b2, label %b1
|
||||
|
||||
if.end:
|
||||
%0 = load i8, i8* undef, align 1
|
||||
%conv13.i = zext i8 %0 to i32
|
||||
%trip.count.minus.1216 = add nsw i32 %conv13.i, -1
|
||||
%broadcast.splatinsert221 = insertelement <128 x i32> undef, i32 %trip.count.minus.1216, i32 0
|
||||
%broadcast.splat222 = shufflevector <128 x i32> %broadcast.splatinsert221, <128 x i32> undef, <128 x i32> zeroinitializer
|
||||
%1 = icmp ule <128 x i32> undef, %broadcast.splat222
|
||||
%wide.masked.load223 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* nonnull undef, i32 1, <128 x i1> %1, <128 x i8> undef)
|
||||
%2 = lshr <128 x i8> %wide.masked.load223, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
%3 = and <128 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%4 = zext <128 x i8> %3 to <128 x i32>
|
||||
%5 = add nsw <128 x i32> undef, %4
|
||||
%6 = select <128 x i1> %1, <128 x i32> %5, <128 x i32> undef
|
||||
%bin.rdx225 = add <128 x i32> %6, undef
|
||||
%bin.rdx227 = add <128 x i32> %bin.rdx225, undef
|
||||
%bin.rdx229 = add <128 x i32> %bin.rdx227, undef
|
||||
%bin.rdx231 = add <128 x i32> %bin.rdx229, undef
|
||||
%bin.rdx233 = add <128 x i32> %bin.rdx231, undef
|
||||
%bin.rdx235 = add <128 x i32> %bin.rdx233, undef
|
||||
%bin.rdx237 = add <128 x i32> %bin.rdx235, undef
|
||||
%7 = extractelement <128 x i32> %bin.rdx237, i32 0
|
||||
%nChans = getelementptr inbounds i8, i8* null, i32 2160
|
||||
%8 = bitcast i8* %nChans to i32*
|
||||
store i32 %7, i32* %8, align 4
|
||||
br label %cleanup
|
||||
b1: ; preds = %b0
|
||||
%v0 = load i8, i8* undef, align 1
|
||||
%v1 = zext i8 %v0 to i32
|
||||
%v2 = add nsw i32 %v1, -1
|
||||
%v3 = insertelement <128 x i32> undef, i32 %v2, i32 0
|
||||
%v4 = shufflevector <128 x i32> %v3, <128 x i32> undef, <128 x i32> zeroinitializer
|
||||
%v5 = icmp ule <128 x i32> undef, %v4
|
||||
%v6 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* nonnull undef, i32 1, <128 x i1> %v5, <128 x i8> undef)
|
||||
%v7 = lshr <128 x i8> %v6, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
%v8 = and <128 x i8> %v7, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%v9 = zext <128 x i8> %v8 to <128 x i32>
|
||||
%v10 = add nsw <128 x i32> undef, %v9
|
||||
%v11 = select <128 x i1> %v5, <128 x i32> %v10, <128 x i32> undef
|
||||
%v12 = add <128 x i32> %v11, undef
|
||||
%v13 = add <128 x i32> %v12, undef
|
||||
%v14 = add <128 x i32> %v13, undef
|
||||
%v15 = add <128 x i32> %v14, undef
|
||||
%v16 = add <128 x i32> %v15, undef
|
||||
%v17 = add <128 x i32> %v16, undef
|
||||
%v18 = add <128 x i32> %v17, undef
|
||||
%v19 = extractelement <128 x i32> %v18, i32 0
|
||||
%v20 = getelementptr inbounds i8, i8* null, i32 2160
|
||||
%v21 = bitcast i8* %v20 to i32*
|
||||
store i32 %v19, i32* %v21, align 4
|
||||
br label %b2
|
||||
|
||||
cleanup:
|
||||
b2: ; preds = %b1, %b0
|
||||
ret void
|
||||
}
|
||||
}
|
||||
|
||||
declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32 immarg, <128 x i1>, <128 x i8>)
|
||||
; Function Attrs: argmemonly nounwind readonly willreturn
|
||||
declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32 immarg, <128 x i1>, <128 x i8>) #1
|
||||
|
||||
attributes #0 = { "target-features"="+hvx-length128b,+hvxv67,+v67,-long-calls" }
|
||||
attributes #1 = { argmemonly nounwind readonly willreturn }
|
||||
|
Loading…
Reference in New Issue
Block a user