1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

AMDGPU: Use unique PSVs for buffer resources

Also fixes using the wrong memory type for some
intrinsics when custom lowering them.

llvm-svn: 321557
This commit is contained in:
Matt Arsenault 2017-12-29 17:18:21 +00:00
parent b41fe789b4
commit 4f8f93ccba
4 changed files with 93 additions and 45 deletions

View File

@ -462,7 +462,7 @@ class AMDGPUBufferLoad : Intrinsic <
llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm) llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm) llvm_i1_ty], // slc(imm)
[IntrReadMem]>; [IntrReadMem], "", [SDNPMemOperand]>;
def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
def int_amdgcn_buffer_load : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad;
@ -474,7 +474,7 @@ class AMDGPUBufferStore : Intrinsic <
llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm) llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm) llvm_i1_ty], // slc(imm)
[IntrWriteMem]>; [IntrWriteMem], "", [SDNPMemOperand]>;
def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
def int_amdgcn_buffer_store : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore;
@ -489,7 +489,7 @@ def int_amdgcn_tbuffer_load : Intrinsic <
llvm_i32_ty, // nfmt(imm) llvm_i32_ty, // nfmt(imm)
llvm_i1_ty, // glc(imm) llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm) llvm_i1_ty], // slc(imm)
[]>; [IntrReadMem], "", [SDNPMemOperand]>;
def int_amdgcn_tbuffer_store : Intrinsic < def int_amdgcn_tbuffer_store : Intrinsic <
[], [],
@ -503,7 +503,7 @@ def int_amdgcn_tbuffer_store : Intrinsic <
llvm_i32_ty, // nfmt(imm) llvm_i32_ty, // nfmt(imm)
llvm_i1_ty, // glc(imm) llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm) llvm_i1_ty], // slc(imm)
[]>; [IntrWriteMem], "", [SDNPMemOperand]>;
class AMDGPUBufferAtomic : Intrinsic < class AMDGPUBufferAtomic : Intrinsic <
[llvm_i32_ty], [llvm_i32_ty],
@ -512,7 +512,7 @@ class AMDGPUBufferAtomic : Intrinsic <
llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty], // slc(imm) llvm_i1_ty], // slc(imm)
[]>; [], "", [SDNPMemOperand]>;
def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
@ -531,7 +531,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty], // slc(imm) llvm_i1_ty], // slc(imm)
[]>; [], "", [SDNPMemOperand]>;
// Uses that do not set the done bit should set IntrWriteMem on the // Uses that do not set the done bit should set IntrWriteMem on the
// call site. // call site.

View File

@ -726,6 +726,70 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags |= MachineMemOperand::MOVolatile; Info.flags |= MachineMemOperand::MOVolatile;
return true; return true;
} }
case Intrinsic::amdgcn_tbuffer_load:
case Intrinsic::amdgcn_buffer_load:
case Intrinsic::amdgcn_buffer_load_format: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = MFI->getBufferPSV(
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
CI.getArgOperand(0));
Info.memVT = MVT::getVT(CI.getType());
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MODereferenceable;
// There is a constant offset component, but there are additional register
// offsets which could break AA if we set the offset to anything non-0.
return true;
}
case Intrinsic::amdgcn_tbuffer_store:
case Intrinsic::amdgcn_buffer_store:
case Intrinsic::amdgcn_buffer_store_format: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.opc = ISD::INTRINSIC_VOID;
Info.ptrVal = MFI->getBufferPSV(
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
CI.getArgOperand(1));
Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
Info.flags = MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable;
return true;
}
case Intrinsic::amdgcn_buffer_atomic_swap:
case Intrinsic::amdgcn_buffer_atomic_add:
case Intrinsic::amdgcn_buffer_atomic_sub:
case Intrinsic::amdgcn_buffer_atomic_smin:
case Intrinsic::amdgcn_buffer_atomic_umin:
case Intrinsic::amdgcn_buffer_atomic_smax:
case Intrinsic::amdgcn_buffer_atomic_umax:
case Intrinsic::amdgcn_buffer_atomic_and:
case Intrinsic::amdgcn_buffer_atomic_or:
case Intrinsic::amdgcn_buffer_atomic_xor: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = MFI->getBufferPSV(
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
CI.getArgOperand(1));
Info.memVT = MVT::getVT(CI.getType());
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = MFI->getBufferPSV(
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
CI.getArgOperand(2));
Info.memVT = MVT::getVT(CI.getType());
Info.flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile;
return true;
}
default: default:
return false; return false;
} }
@ -4396,7 +4460,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const { SelectionDAG &DAG) const {
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
SDLoc DL(Op); SDLoc DL(Op);
MachineFunction &MF = DAG.getMachineFunction();
switch (IntrID) { switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_inc:
@ -4423,21 +4486,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // glc Op.getOperand(5), // glc
Op.getOperand(6) // slc Op.getOperand(6) // slc
}; };
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ? unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
EVT IntVT = VT.changeTypeToInteger(); EVT IntVT = VT.changeTypeToInteger();
MachineMemOperand *MMO = MF.getMachineMemOperand( auto *M = cast<MemSDNode>(Op);
MachinePointerInfo(MFI->getBufferPSV()), return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
MachineMemOperand::MOLoad, M->getMemOperand());
VT.getStoreSize(), VT.getStoreSize());
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
} }
case Intrinsic::amdgcn_tbuffer_load: { case Intrinsic::amdgcn_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue Ops[] = { SDValue Ops[] = {
Op.getOperand(0), // Chain Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc Op.getOperand(2), // rsrc
@ -4451,14 +4511,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(10) // slc Op.getOperand(10) // slc
}; };
EVT VT = Op.getOperand(2).getValueType(); EVT VT = Op.getValueType();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),
MachineMemOperand::MOLoad,
VT.getStoreSize(), VT.getStoreSize());
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL, return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
Op->getVTList(), Ops, VT, MMO); Op->getVTList(), Ops, VT, M->getMemOperand());
} }
case Intrinsic::amdgcn_buffer_atomic_swap: case Intrinsic::amdgcn_buffer_atomic_swap:
case Intrinsic::amdgcn_buffer_atomic_add: case Intrinsic::amdgcn_buffer_atomic_add:
@ -4478,14 +4534,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // offset Op.getOperand(5), // offset
Op.getOperand(6) // slc Op.getOperand(6) // slc
}; };
EVT VT = Op.getOperand(3).getValueType(); EVT VT = Op.getValueType();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(), auto *M = cast<MemSDNode>(Op);
MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile,
VT.getStoreSize(), 4);
unsigned Opcode = 0; unsigned Opcode = 0;
switch (IntrID) { switch (IntrID) {
@ -4523,7 +4574,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
llvm_unreachable("unhandled atomic opcode"); llvm_unreachable("unhandled atomic opcode");
} }
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO); return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
M->getMemOperand());
} }
case Intrinsic::amdgcn_buffer_atomic_cmpswap: { case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
@ -4536,17 +4588,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // offset Op.getOperand(6), // offset
Op.getOperand(7) // slc Op.getOperand(7) // slc
}; };
EVT VT = Op.getOperand(4).getValueType(); EVT VT = Op.getValueType();
MachineMemOperand *MMO = MF.getMachineMemOperand( auto *M = cast<MemSDNode>(Op);
MachinePointerInfo(),
MachineMemOperand::MOLoad |
MachineMemOperand::MOStore |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOVolatile,
VT.getStoreSize(), 4);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, MMO); Op->getVTList(), Ops, VT, M->getMemOperand());
} }
// Basic sample. // Basic sample.

View File

@ -28,7 +28,6 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF), : AMDGPUMachineFunction(MF),
BufferPSV(*(MF.getSubtarget().getInstrInfo())),
PrivateSegmentBuffer(false), PrivateSegmentBuffer(false),
DispatchPtr(false), DispatchPtr(false),
QueuePtr(false), QueuePtr(false),

View File

@ -137,12 +137,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// Stack object indices for work item IDs. // Stack object indices for work item IDs.
std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
AMDGPUBufferPseudoSourceValue BufferPSV; DenseMap<const Value *,
std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
DenseMap<const Value *, DenseMap<const Value *,
std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
private: private:
unsigned LDSWaveSpillSize = 0; unsigned LDSWaveSpillSize = 0;
unsigned NumUserSGPRs = 0; unsigned NumUserSGPRs = 0;
@ -634,9 +633,13 @@ public:
return LDSWaveSpillSize; return LDSWaveSpillSize;
} }
// FIXME: These should be unique const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { const Value *BufferRsrc) {
return &BufferPSV; assert(BufferRsrc);
auto PSV = BufferPSVs.try_emplace(
BufferRsrc,
llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
return PSV.first->second.get();
} }
const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,