mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
AMDGPU: Split R600 and SI load lowering
These weren't actually sharing anything in the common LowerLOAD. llvm-svn: 260398
This commit is contained in:
parent
7bee7c2cd0
commit
2c8154f002
@ -671,18 +671,6 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
// ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
|
||||
// nothing here and let the illegal result integer be handled normally.
|
||||
return;
|
||||
case ISD::LOAD: {
|
||||
SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
|
||||
if (!Node)
|
||||
return;
|
||||
|
||||
Results.push_back(SDValue(Node, 0));
|
||||
Results.push_back(SDValue(Node, 1));
|
||||
// XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
|
||||
// function
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
|
||||
return;
|
||||
}
|
||||
case ISD::STORE: {
|
||||
if (SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG))
|
||||
Results.push_back(Lowered);
|
||||
@ -1302,87 +1290,6 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
|
||||
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
|
||||
}
|
||||
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
||||
ISD::LoadExtType ExtType = Load->getExtensionType();
|
||||
EVT VT = Op.getValueType();
|
||||
EVT MemVT = Load->getMemoryVT();
|
||||
|
||||
if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
|
||||
assert(VT == MVT::i1 && "Only i1 non-extloads expected");
|
||||
// FIXME: Copied from PPC
|
||||
// First, load into 32 bits, then truncate to 1 bit.
|
||||
|
||||
SDValue Chain = Load->getChain();
|
||||
SDValue BasePtr = Load->getBasePtr();
|
||||
MachineMemOperand *MMO = Load->getMemOperand();
|
||||
|
||||
SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
|
||||
BasePtr, MVT::i8, MMO);
|
||||
|
||||
SDValue Ops[] = {
|
||||
DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD),
|
||||
NewLD.getValue(1)
|
||||
};
|
||||
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
}
|
||||
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS ||
|
||||
Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
|
||||
ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
|
||||
return SDValue();
|
||||
|
||||
// <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
|
||||
// register (2-)byte extract.
|
||||
|
||||
// Get Register holding the target.
|
||||
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
|
||||
DAG.getConstant(2, DL, MVT::i32));
|
||||
// Load the Register.
|
||||
SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
|
||||
Load->getChain(), Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32),
|
||||
Op.getOperand(2));
|
||||
|
||||
// Get offset within the register.
|
||||
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
|
||||
Load->getBasePtr(),
|
||||
DAG.getConstant(0x3, DL, MVT::i32));
|
||||
|
||||
// Bit offset of target byte (byteIdx * 8).
|
||||
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
|
||||
DAG.getConstant(3, DL, MVT::i32));
|
||||
|
||||
// Shift to the right.
|
||||
Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
|
||||
|
||||
// Eliminate the upper bits by setting them to ...
|
||||
EVT MemEltVT = MemVT.getScalarType();
|
||||
|
||||
// ... ones.
|
||||
if (ExtType == ISD::SEXTLOAD) {
|
||||
SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
|
||||
|
||||
SDValue Ops[] = {
|
||||
DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
|
||||
Load->getChain()
|
||||
};
|
||||
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
}
|
||||
|
||||
// ... or zeros.
|
||||
SDValue Ops[] = {
|
||||
DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
|
||||
Load->getChain()
|
||||
};
|
||||
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
|
||||
|
@ -102,7 +102,6 @@ protected:
|
||||
/// \brief Split a vector store into 2 stores of half the vector.
|
||||
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -1413,17 +1413,77 @@ ConstantAddressBlock(unsigned AddressSpace) {
|
||||
}
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Ptr = Op.getOperand(1);
|
||||
SDValue LoweredLoad;
|
||||
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
||||
ISD::LoadExtType ExtType = Load->getExtensionType();
|
||||
EVT MemVT = Load->getMemoryVT();
|
||||
|
||||
if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
|
||||
return Ret;
|
||||
// <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
|
||||
// register (2-)byte extract.
|
||||
|
||||
// Get Register holding the target.
|
||||
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
|
||||
DAG.getConstant(2, DL, MVT::i32));
|
||||
// Load the Register.
|
||||
SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
|
||||
Load->getChain(),
|
||||
Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32),
|
||||
Op.getOperand(2));
|
||||
|
||||
// Get offset within the register.
|
||||
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
|
||||
Load->getBasePtr(),
|
||||
DAG.getConstant(0x3, DL, MVT::i32));
|
||||
|
||||
// Bit offset of target byte (byteIdx * 8).
|
||||
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
|
||||
DAG.getConstant(3, DL, MVT::i32));
|
||||
|
||||
// Shift to the right.
|
||||
Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
|
||||
|
||||
// Eliminate the upper bits by setting them to ...
|
||||
EVT MemEltVT = MemVT.getScalarType();
|
||||
|
||||
// ... ones.
|
||||
if (ExtType == ISD::SEXTLOAD) {
|
||||
SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
|
||||
|
||||
SDValue Ops[] = {
|
||||
DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
|
||||
Load->getChain()
|
||||
};
|
||||
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
}
|
||||
|
||||
// ... or zeros.
|
||||
SDValue Ops[] = {
|
||||
DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
|
||||
Load->getChain()
|
||||
};
|
||||
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
|
||||
unsigned AS = LoadNode->getAddressSpace();
|
||||
EVT MemVT = LoadNode->getMemoryVT();
|
||||
ISD::LoadExtType ExtType = LoadNode->getExtensionType();
|
||||
|
||||
if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
|
||||
ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
|
||||
return lowerPrivateExtLoad(Op, DAG);
|
||||
}
|
||||
|
||||
SDLoc DL(Op);
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue Chain = LoadNode->getChain();
|
||||
SDValue Ptr = LoadNode->getBasePtr();
|
||||
|
||||
// Lower loads constant address space global variable loads
|
||||
if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
|
||||
@ -1498,6 +1558,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
|
||||
return DAG.getMergeValues(MergedValues, DL);
|
||||
}
|
||||
|
||||
SDValue LoweredLoad;
|
||||
|
||||
// For most operations returning SDValue() will result in the node being
|
||||
// expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
|
||||
// need to manually expand loads that may be legal in some address spaces and
|
||||
|
@ -61,6 +61,8 @@ private:
|
||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -1599,6 +1599,28 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
||||
ISD::LoadExtType ExtType = Load->getExtensionType();
|
||||
EVT VT = Load->getMemoryVT();
|
||||
|
||||
if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
|
||||
assert(VT == MVT::i1 && "Only i1 non-extloads expected");
|
||||
// FIXME: Copied from PPC
|
||||
// First, load into 32 bits, then truncate to 1 bit.
|
||||
|
||||
SDValue Chain = Load->getChain();
|
||||
SDValue BasePtr = Load->getBasePtr();
|
||||
MachineMemOperand *MMO = Load->getMemOperand();
|
||||
|
||||
SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
|
||||
BasePtr, MVT::i8, MMO);
|
||||
|
||||
SDValue Ops[] = {
|
||||
DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD),
|
||||
NewLD.getValue(1)
|
||||
};
|
||||
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
}
|
||||
|
||||
if (Op.getValueType().isVector()) {
|
||||
assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
|
||||
@ -1631,7 +1653,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
}
|
||||
|
||||
return AMDGPUTargetLowering::LowerLOAD(Op, DAG);
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
Loading…
Reference in New Issue
Block a user