1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

AMDGPU: Pass function directly instead of MachineFunction

These functions just query the underlying IR function,
so pass it directly.

llvm-svn: 333442
This commit is contained in:
Matt Arsenault 2018-05-29 17:42:50 +00:00
parent 16caab0970
commit 27a95e7d2c
8 changed files with 37 additions and 36 deletions

View File

@ -192,7 +192,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
amd_kernel_code_t KernelCode;
if (STM.isAmdCodeObjectV2(*MF)) {
if (STM.isAmdCodeObjectV2(MF->getFunction())) {
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
}
@ -208,7 +208,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) {
if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) {
SmallString<128> SymbolName;
getNameWithPrefix(SymbolName, &MF->getFunction()),
getTargetStreamer()->EmitAMDGPUSymbolType(
@ -1125,7 +1125,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
// FIXME: Should use getKernArgSize
Out.kernarg_segment_byte_size =
STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
STM.getKernArgSegmentSize(MF.getFunction(), MFI->getABIArgOffset());
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
@ -1154,7 +1154,7 @@ AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
HSACodeProps.mKernargSegmentSize =
STM.getKernArgSegmentSize(MF, MFI.getABIArgOffset());
STM.getKernArgSegmentSize(MF.getFunction(), MFI.getABIArgOffset());
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
HSACodeProps.mKernargSegmentAlign =

View File

@ -221,7 +221,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCValAssign &VA = ArgLocs[i];
lowerParameter(MIRBuilder, Arg->getType(),
VA.getLocMemOffset() +
Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]);
Subtarget->getExplicitKernelArgOffset(F), VRegs[i]);
}
return true;

View File

@ -412,9 +412,9 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
}
unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
unsigned SISubtarget::getKernArgSegmentSize(const Function &F,
unsigned ExplicitArgBytes) const {
unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
unsigned ImplicitBytes = getImplicitArgNumBytes(F);
if (ImplicitBytes == 0)
return ExplicitArgBytes;

View File

@ -490,17 +490,17 @@ public:
return HasUnpackedD16VMem;
}
bool isMesaKernel(const MachineFunction &MF) const {
return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction().getCallingConv());
bool isMesaKernel(const Function &F) const {
return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
}
// Covers VS/PS/CS graphics shaders
bool isMesaGfxShader(const MachineFunction &MF) const {
return isMesa3DOS() && AMDGPU::isShader(MF.getFunction().getCallingConv());
bool isMesaGfxShader(const Function &F) const {
return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
}
bool isAmdCodeObjectV2(const MachineFunction &MF) const {
return isAmdHsaOS() || isMesaKernel(MF);
bool isAmdCodeObjectV2(const Function &F) const {
return isAmdHsaOS() || isMesaKernel(F);
}
bool hasMad64_32() const {
@ -549,8 +549,8 @@ public:
/// Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
return isAmdCodeObjectV2(MF) ? 0 : 36;
unsigned getExplicitKernelArgOffset(const Function &F) const {
return isAmdCodeObjectV2(F) ? 0 : 36;
}
unsigned getAlignmentForImplicitArgPtr() const {
@ -559,11 +559,10 @@ public:
/// \returns Number of bytes of arguments that are passed to a shader or
/// kernel in addition to the explicit ones declared for the function.
unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
if (isMesaKernel(MF))
unsigned getImplicitArgNumBytes(const Function &F) const {
if (isMesaKernel(F))
return 16;
return AMDGPU::getIntegerAttribute(
MF.getFunction(), "amdgpu-implicitarg-num-bytes", 0);
return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
}
// Scratch is allocated in 256 dword per wave blocks for the entire
@ -860,7 +859,7 @@ public:
return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
}
unsigned getKernArgSegmentSize(const MachineFunction &MF,
unsigned getKernArgSegmentSize(const Function &F,
unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs

View File

@ -1596,7 +1596,8 @@ SDValue R600TargetLowering::LowerFormalArguments(
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset();
unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) +
VA.getLocMemOffset();
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
SDValue Arg = DAG.getLoad(

View File

@ -237,6 +237,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
// We need to do the replacement of the private segment buffer and wave offset
// register even if there are no stack objects. There could be stores to undef
@ -288,7 +289,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
if (ST.isAmdCodeObjectV2(MF)) {
if (ST.isAmdCodeObjectV2(F)) {
PreloadedPrivateBufferReg = MFI->getPreloadedReg(
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
}
@ -307,7 +308,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
assert(ST.isAmdCodeObjectV2(F) || ST.isMesaGfxShader(F));
MRI.addLiveIn(PreloadedPrivateBufferReg);
MBB.addLiveIn(PreloadedPrivateBufferReg);
}
@ -332,7 +333,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
bool CopyBuffer = ResourceRegUsed &&
PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
ST.isAmdCodeObjectV2(MF) &&
ST.isAmdCodeObjectV2(F) &&
ScratchRsrcReg != PreloadedPrivateBufferReg;
// This needs to be careful of the copying order to avoid overwriting one of
@ -370,6 +371,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST,
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
const Function &Fn = MF.getFunction();
DebugLoc DL;
if (ST.isAmdPalOS()) {
@ -420,8 +422,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST,
MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable,
0, 0);
unsigned Offset
= MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
.addReg(Rsrc01)
.addImm(Offset) // offset
@ -430,9 +431,9 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST,
.addMemOperand(MMO);
return;
}
if (ST.isMesaGfxShader(MF)
if (ST.isMesaGfxShader(Fn)
|| (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
assert(!ST.isAmdCodeObjectV2(MF));
assert(!ST.isAmdCodeObjectV2(Fn));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);

View File

@ -1449,7 +1449,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
if (ST.isAmdCodeObjectV2(MF)) {
if (ST.isAmdCodeObjectV2(MF.getFunction())) {
if (RequiresStackAccess) {
// If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user
@ -1561,12 +1561,12 @@ SDValue SITargetLowering::LowerFormalArguments(
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
const Function &Fn = MF.getFunction();
FunctionType *FType = MF.getFunction().getFunctionType();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
const Function &Fn = MF.getFunction();
DiagnosticInfoUnsupported NoGraphicsHSA(
Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
DAG.getContext()->diagnose(NoGraphicsHSA);
@ -1677,7 +1677,7 @@ SDValue SITargetLowering::LowerFormalArguments(
VT = Ins[i].VT;
EVT MemVT = VA.getLocVT();
const uint64_t Offset = Subtarget->getExplicitKernelArgOffset(MF) +
const uint64_t Offset = Subtarget->getExplicitKernelArgOffset(Fn) +
VA.getLocMemOffset();
Info->setABIArgOffset(Offset + MemVT.getStoreSize());
@ -1797,7 +1797,7 @@ SDValue SITargetLowering::LowerFormalArguments(
auto &ArgUsageInfo =
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
ArgUsageInfo.setFuncArgInfo(MF.getFunction(), Info->getArgInfo());
ArgUsageInfo.setFuncArgInfo(Fn, Info->getArgInfo());
unsigned StackArgSize = CCInfo.getNextStackOffset();
Info->setBytesInStackArgArea(StackArgSize);
@ -4340,14 +4340,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
switch (IntrinsicID) {
case Intrinsic::amdgcn_implicit_buffer_ptr: {
if (getSubtarget()->isAmdCodeObjectV2(MF))
if (getSubtarget()->isAmdCodeObjectV2(MF.getFunction()))
return emitNonHSAIntrinsicError(DAG, DL, VT);
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
}
case Intrinsic::amdgcn_dispatch_ptr:
case Intrinsic::amdgcn_queue_ptr: {
if (!Subtarget->isAmdCodeObjectV2(MF)) {
if (!Subtarget->isAmdCodeObjectV2(MF.getFunction())) {
DiagnosticInfoUnsupported BadIntrin(
MF.getFunction(), "unsupported hsa intrinsic without hsa target",
DL.getDebugLoc());

View File

@ -134,7 +134,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
}
bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
bool IsCOV2 = ST.isAmdCodeObjectV2(F);
if (IsCOV2) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
@ -147,7 +147,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F.hasFnAttribute("amdgpu-dispatch-id"))
DispatchID = true;
} else if (ST.isMesaGfxShader(MF)) {
} else if (ST.isMesaGfxShader(F)) {
if (HasStackObjects || MaySpill)
ImplicitBufferPtr = true;
}