1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 10:32:48 +02:00

[AMDGPU] Refactor HSAMetadataStream::emitKernel (NFC)

Move all metadata construction into AMDGPUHSAMetadataStreamer.

Differential Revision: https://reviews.llvm.org/D48176

llvm-svn: 336707
This commit is contained in:
Scott Linder 2018-07-10 17:31:32 +00:00
parent c84207f8fa
commit cf8cea2fa7
5 changed files with 148 additions and 122 deletions

View File

@ -207,9 +207,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
HSAMetadataStream.emitKernel(MF->getFunction(),
getHSACodeProps(*MF, CurrentProgramInfo),
getHSADebugProps(*MF, CurrentProgramInfo));
HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
}
void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
@ -1197,57 +1195,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
}
}
AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const {
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
const Function &F = MF.getFunction();
// Avoid asserting on erroneous cases.
if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL)
return HSACodeProps;
HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F);
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
HSACodeProps.mKernargSegmentAlign =
std::max(uint32_t(4), MFI.getMaxKernArgAlign());
HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
HSACodeProps.mNumSGPRs = CurrentProgramInfo.NumSGPR;
HSACodeProps.mNumVGPRs = CurrentProgramInfo.NumVGPR;
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
return HSACodeProps;
}
AMDGPU::HSAMD::Kernel::DebugProps::Metadata AMDGPUAsmPrinter::getHSADebugProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const {
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
if (!STM.debuggerSupported())
return HSADebugProps;
HSADebugProps.mDebuggerABIVersion.push_back(1);
HSADebugProps.mDebuggerABIVersion.push_back(0);
if (STM.debuggerEmitPrologue()) {
HSADebugProps.mPrivateSegmentBufferSGPR =
ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
}
return HSADebugProps;
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {

View File

@ -18,6 +18,7 @@
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUHSAMetadataStreamer.h"
#include "SIProgramInfo.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
@ -52,60 +53,6 @@ private:
int32_t getTotalNumSGPRs(const SISubtarget &ST) const;
};
// Track resource usage for kernels / entry functions.
struct SIProgramInfo {
// Fields set in PGM_RSRC1 pm4 packet.
uint32_t VGPRBlocks = 0;
uint32_t SGPRBlocks = 0;
uint32_t Priority = 0;
uint32_t FloatMode = 0;
uint32_t Priv = 0;
uint32_t DX10Clamp = 0;
uint32_t DebugMode = 0;
uint32_t IEEEMode = 0;
uint64_t ScratchSize = 0;
uint64_t ComputePGMRSrc1 = 0;
// Fields set in PGM_RSRC2 pm4 packet.
uint32_t LDSBlocks = 0;
uint32_t ScratchBlocks = 0;
uint64_t ComputePGMRSrc2 = 0;
uint32_t NumVGPR = 0;
uint32_t NumSGPR = 0;
uint32_t LDSSize = 0;
bool FlatUsed = false;
// Number of SGPRs that meets number of waves per execution unit request.
uint32_t NumSGPRsForWavesPerEU = 0;
// Number of VGPRs that meets number of waves per execution unit request.
uint32_t NumVGPRsForWavesPerEU = 0;
// Fixed SGPR number used to hold wave scratch offset for entire kernel
// execution, or std::numeric_limits<uint16_t>::max() if the register is not
// used or not known.
uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
std::numeric_limits<uint16_t>::max();
// Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
// kernel execution, or std::numeric_limits<uint16_t>::max() if the register
// is not used or not known.
uint16_t DebuggerPrivateSegmentBufferSGPR =
std::numeric_limits<uint16_t>::max();
// Whether there is recursion, dynamic allocas, indirect calls or some other
// reason there may be statically unknown stack usage.
bool DynamicCallStack = false;
// Bonus information for debugging.
bool VCCUsed = false;
SIProgramInfo() = default;
};
SIProgramInfo CurrentProgramInfo;
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
@ -123,13 +70,6 @@ private:
unsigned &NumSGPR,
unsigned &NumVGPR) const;
AMDGPU::HSAMD::Kernel::CodeProps::Metadata getHSACodeProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const;
AMDGPU::HSAMD::Kernel::DebugProps::Metadata getHSADebugProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const;
/// Emit register usage information so that the GPU driver
/// can correctly setup the GPU state.
void EmitProgramInfoSI(const MachineFunction &MF,

View File

@ -14,6 +14,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUHSAMetadataStreamer.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "SIProgramInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Constants.h"
@ -196,6 +200,57 @@ std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
return Dims;
}
Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const {
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
const Function &F = MF.getFunction();
// Avoid asserting on erroneous cases.
if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL)
return HSACodeProps;
HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F);
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
HSACodeProps.mKernargSegmentAlign =
std::max(uint32_t(4), MFI.getMaxKernArgAlign());
HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
return HSACodeProps;
}
Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const {
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
if (!STM.debuggerSupported())
return HSADebugProps;
HSADebugProps.mDebuggerABIVersion.push_back(1);
HSADebugProps.mDebuggerABIVersion.push_back(0);
if (STM.debuggerEmitPrologue()) {
HSADebugProps.mPrivateSegmentBufferSGPR =
ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
}
return HSADebugProps;
}
void MetadataStreamer::emitVersion() {
auto &Version = HSAMetadata.mVersion;
@ -408,10 +463,11 @@ void MetadataStreamer::end() {
verify(HSAMetadataString);
}
void MetadataStreamer::emitKernel(
const Function &Func,
const Kernel::CodeProps::Metadata &CodeProps,
const Kernel::DebugProps::Metadata &DebugProps) {
void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
auto CodeProps = getHSACodeProps(MF, ProgramInfo);
auto DebugProps = getHSADebugProps(MF, ProgramInfo);
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
return;

View File

@ -28,6 +28,7 @@ class DataLayout;
class Function;
class MDNode;
class Module;
class SIProgramInfo;
class Type;
namespace AMDGPU {
@ -55,6 +56,13 @@ private:
std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;
Kernel::CodeProps::Metadata getHSACodeProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const;
Kernel::DebugProps::Metadata getHSADebugProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const;
void emitVersion();
void emitPrintf(const Module &Mod);
@ -87,9 +95,7 @@ public:
void end();
void emitKernel(const Function &Func,
const Kernel::CodeProps::Metadata &CodeProps,
const Kernel::DebugProps::Metadata &DebugProps);
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo);
};
} // end namespace HSAMD

View File

@ -0,0 +1,77 @@
//===--- SIProgramInfo.h ----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Defines struct to track resource usage for kernels and entry functions.
///
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H
#define LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H
namespace llvm {
/// Track resource usage for kernels / entry functions.
struct SIProgramInfo {
// Fields set in PGM_RSRC1 pm4 packet.
uint32_t VGPRBlocks = 0;
uint32_t SGPRBlocks = 0;
uint32_t Priority = 0;
uint32_t FloatMode = 0;
uint32_t Priv = 0;
uint32_t DX10Clamp = 0;
uint32_t DebugMode = 0;
uint32_t IEEEMode = 0;
uint64_t ScratchSize = 0;
uint64_t ComputePGMRSrc1 = 0;
// Fields set in PGM_RSRC2 pm4 packet.
uint32_t LDSBlocks = 0;
uint32_t ScratchBlocks = 0;
uint64_t ComputePGMRSrc2 = 0;
uint32_t NumVGPR = 0;
uint32_t NumSGPR = 0;
uint32_t LDSSize = 0;
bool FlatUsed = false;
// Number of SGPRs that meets number of waves per execution unit request.
uint32_t NumSGPRsForWavesPerEU = 0;
// Number of VGPRs that meets number of waves per execution unit request.
uint32_t NumVGPRsForWavesPerEU = 0;
// Fixed SGPR number used to hold wave scratch offset for entire kernel
// execution, or std::numeric_limits<uint16_t>::max() if the register is not
// used or not known.
uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
std::numeric_limits<uint16_t>::max();
// Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
// kernel execution, or std::numeric_limits<uint16_t>::max() if the register
// is not used or not known.
uint16_t DebuggerPrivateSegmentBufferSGPR =
std::numeric_limits<uint16_t>::max();
// Whether there is recursion, dynamic allocas, indirect calls or some other
// reason there may be statically unknown stack usage.
bool DynamicCallStack = false;
// Bonus information for debugging.
bool VCCUsed = false;
SIProgramInfo() = default;
};
} // namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H