1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

Revert "[X86][AVX] Add getBROADCAST_LOAD helper function. NFCI."

This reverts commit 1cfecf4fc4278afb0005923f6dff595cd372da5c.

This commit broke LLVM code generated through XLA by removing a
conditional on Ld->getExtensionType() == ISD::NON_EXTLOAD
This commit is contained in:
Tres Popp 2021-07-27 16:21:10 +02:00
parent adf33c8470
commit 9d32182a3a

View File

@ -7988,30 +7988,6 @@ static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
KnownZero, DAG, Depth, ResolveKnownElts); KnownZero, DAG, Depth, ResolveKnownElts);
} }
// Attempt to create a scalar/subvector broadcast from the base MemSDNode.
static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT,
EVT MemVT, MemSDNode *Mem, unsigned Offset,
SelectionDAG &DAG) {
assert((Opcode == X86ISD::VBROADCAST_LOAD ||
Opcode == X86ISD::SUBV_BROADCAST_LOAD) &&
"Unknown broadcast load type");
// Ensure this is a simple (non-atomic, non-voltile), temporal read memop.
if (!Mem || !Mem->readMem() || !Mem->isSimple() || Mem->isNonTemporal())
return SDValue();
SDValue Ptr =
DAG.getMemBasePlusOffset(Mem->getBasePtr(), TypeSize::Fixed(Offset), DL);
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ops[] = {Mem->getChain(), Ptr};
SDValue BcstLd = DAG.getMemIntrinsicNode(
Opcode, DL, Tys, Ops, MemVT,
DAG.getMachineFunction().getMachineMemOperand(
Mem->getMemOperand(), Offset, MemVT.getStoreSize()));
DAG.makeEquivalentMemoryOrdering(SDValue(Mem, 1), BcstLd.getValue(1));
return BcstLd;
}
/// Returns the scalar element that will make up the i'th /// Returns the scalar element that will make up the i'th
/// element of the result of the vector shuffle. /// element of the result of the vector shuffle.
static SDValue getShuffleScalarElt(SDValue Op, unsigned Index, static SDValue getShuffleScalarElt(SDValue Op, unsigned Index,
@ -16084,12 +16060,21 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1); bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1);
if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() && if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() &&
MayFoldLoad(peekThroughOneUseBitcasts(V1))) { MayFoldLoad(peekThroughOneUseBitcasts(V1))) {
MVT MemVT = VT.getHalfNumVectorElementsVT();
unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1)); auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1));
if (SDValue BcstLd = getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, DL, if (!Ld->isNonTemporal()) {
VT, MemVT, Ld, Ofs, DAG)) MVT MemVT = VT.getHalfNumVectorElementsVT();
return BcstLd; unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ptr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
TypeSize::Fixed(Ofs), DL);
SDValue Ops[] = {Ld->getChain(), Ptr};
SDValue BcastLd = DAG.getMemIntrinsicNode(
X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, MemVT,
DAG.getMachineFunction().getMachineMemOperand(
Ld->getMemOperand(), Ofs, MemVT.getStoreSize()));
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1));
return BcastLd;
}
} }
// With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding. // With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding.
@ -38992,10 +38977,10 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
} }
// Subvector broadcast. // Subvector broadcast.
case X86ISD::SUBV_BROADCAST_LOAD: { case X86ISD::SUBV_BROADCAST_LOAD: {
SDLoc DL(Op);
auto *MemIntr = cast<MemIntrinsicSDNode>(Op); auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
EVT MemVT = MemIntr->getMemoryVT(); EVT MemVT = MemIntr->getMemoryVT();
if (ExtSizeInBits == MemVT.getStoreSizeInBits()) { if (ExtSizeInBits == MemVT.getStoreSizeInBits()) {
SDLoc DL(Op);
SDValue Ld = SDValue Ld =
TLO.DAG.getLoad(MemVT, DL, MemIntr->getChain(), TLO.DAG.getLoad(MemVT, DL, MemIntr->getChain(),
MemIntr->getBasePtr(), MemIntr->getMemOperand()); MemIntr->getBasePtr(), MemIntr->getMemOperand());
@ -39004,13 +38989,18 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Ld, 0, return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Ld, 0,
TLO.DAG, DL, ExtSizeInBits)); TLO.DAG, DL, ExtSizeInBits));
} else if ((ExtSizeInBits % MemVT.getStoreSizeInBits()) == 0) { } else if ((ExtSizeInBits % MemVT.getStoreSizeInBits()) == 0) {
SDLoc DL(Op);
EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),
ExtSizeInBits / VT.getScalarSizeInBits()); ExtSizeInBits / VT.getScalarSizeInBits());
if (SDValue BcstLd = SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other);
getBROADCAST_LOAD(Opc, DL, BcstVT, MemVT, MemIntr, 0, TLO.DAG)) SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)};
return TLO.CombineTo(Op, SDValue Bcst =
insertSubVector(TLO.DAG.getUNDEF(VT), BcstLd, 0, TLO.DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys,
TLO.DAG, DL, ExtSizeInBits)); Ops, MemVT, MemIntr->getMemOperand());
TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1),
Bcst.getValue(1));
return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0,
TLO.DAG, DL, ExtSizeInBits));
} }
break; break;
} }
@ -50083,21 +50073,36 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (Op0.getOpcode() == X86ISD::VBROADCAST) if (Op0.getOpcode() == X86ISD::VBROADCAST)
return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0)); return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));
// If this simple subvector or scalar/subvector broadcast_load is inserted // If this scalar/subvector broadcast_load is inserted into both halves, use
// into both halves, use a larger broadcast_load. Update other uses to use // a larger broadcast_load. Update other uses to use an extracted subvector.
// an extracted subvector. if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD ||
if (Op0.getOpcode() == ISD::LOAD ||
Op0.getOpcode() == X86ISD::VBROADCAST_LOAD ||
Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
auto *Mem = cast<MemSDNode>(Op0); auto *MemIntr = cast<MemIntrinsicSDNode>(Op0);
unsigned Opcode = Op0.getOpcode() == X86ISD::VBROADCAST_LOAD SDVTList Tys = DAG.getVTList(VT, MVT::Other);
? X86ISD::VBROADCAST_LOAD SDValue Ops[] = {MemIntr->getChain(), MemIntr->getBasePtr()};
: X86ISD::SUBV_BROADCAST_LOAD; SDValue BcastLd = DAG.getMemIntrinsicNode(Op0.getOpcode(), DL, Tys, Ops,
if (SDValue BcastLd = getBROADCAST_LOAD( MemIntr->getMemoryVT(),
Opcode, DL, VT, Mem->getMemoryVT(), Mem, 0, DAG)) { MemIntr->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(
Op0, extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits()));
DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
return BcastLd;
}
// If this is a simple subvector load repeated across multiple lanes, then
// broadcast the load. Update other uses to use an extracted subvector.
if (auto *Ld = dyn_cast<LoadSDNode>(Op0)) {
if (Ld->isSimple() && !Ld->isNonTemporal() &&
Ld->getExtensionType() == ISD::NON_EXTLOAD) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops,
Ld->getMemoryVT(), Ld->getMemOperand());
DAG.ReplaceAllUsesOfValueWith( DAG.ReplaceAllUsesOfValueWith(
Op0, Op0,
extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits())); extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits()));
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1));
return BcastLd; return BcastLd;
} }
} }
@ -50461,8 +50466,14 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() && if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() &&
SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) { SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) {
auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec); auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec);
return getBROADCAST_LOAD(X86ISD::VBROADCAST_LOAD, dl, OpVT, SDVTList Tys = DAG.getVTList(OpVT, MVT::Other);
MemIntr->getMemoryVT(), MemIntr, 0, DAG); SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops,
MemIntr->getMemoryVT(),
MemIntr->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
return BcastLd;
} }
// If we're splatting the lower half subvector of a full vector load into the // If we're splatting the lower half subvector of a full vector load into the