[AMDGPU] Refactor HSAMetadataStream::emitKernel (NFC)

Move all metadata construction into AMDGPUHSAMetadataStreamer. Differential Revision: https://reviews.llvm.org/D48176 llvm-svn: 336707
2024-10-18 10:32:48 +02:00 · 2018-07-10 17:31:32 +00:00 · 2018-07-10 17:31:32 +00:00 · cf8cea2fa7
commit cf8cea2fa7
parent c84207f8fa
5 changed files with 148 additions and 122 deletions
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@ -207,9 +207,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
  if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
    return;

-  HSAMetadataStream.emitKernel(MF->getFunction(),
-                               getHSACodeProps(*MF, CurrentProgramInfo),
-                               getHSADebugProps(*MF, CurrentProgramInfo));
+  HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
 }

 void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
@ -1197,57 +1195,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
  }
 }

-AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
-    const MachineFunction &MF,
-    const SIProgramInfo &ProgramInfo) const {
-  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
-  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
-  HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
-  const Function &F = MF.getFunction();
-
-  // Avoid asserting on erroneous cases.
-  if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL)
-    return HSACodeProps;
-
-  HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F);
-  HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
-  HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
-  HSACodeProps.mKernargSegmentAlign =
-      std::max(uint32_t(4), MFI.getMaxKernArgAlign());
-  HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
-  HSACodeProps.mNumSGPRs = CurrentProgramInfo.NumSGPR;
-  HSACodeProps.mNumVGPRs = CurrentProgramInfo.NumVGPR;
-  HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
-  HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
-  HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
-  HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
-  HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
-
-  return HSACodeProps;
-}
-
-AMDGPU::HSAMD::Kernel::DebugProps::Metadata AMDGPUAsmPrinter::getHSADebugProps(
-    const MachineFunction &MF,
-    const SIProgramInfo &ProgramInfo) const {
-  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
-  HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
-
-  if (!STM.debuggerSupported())
-    return HSADebugProps;
-
-  HSADebugProps.mDebuggerABIVersion.push_back(1);
-  HSADebugProps.mDebuggerABIVersion.push_back(0);
-
-  if (STM.debuggerEmitPrologue()) {
-    HSADebugProps.mPrivateSegmentBufferSGPR =
-        ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
-    HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
-        ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
-  }
-
-  return HSADebugProps;
-}
-
 bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                                       unsigned AsmVariant,
                                       const char *ExtraCode, raw_ostream &O) {
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@ -18,6 +18,7 @@
 #include "AMDGPU.h"
 #include "AMDKernelCodeT.h"
 #include "MCTargetDesc/AMDGPUHSAMetadataStreamer.h"
+#include "SIProgramInfo.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/Support/AMDHSAKernelDescriptor.h"
@ -52,60 +53,6 @@ private:
    int32_t getTotalNumSGPRs(const SISubtarget &ST) const;
  };

-  // Track resource usage for kernels / entry functions.
-  struct SIProgramInfo {
-    // Fields set in PGM_RSRC1 pm4 packet.
-    uint32_t VGPRBlocks = 0;
-    uint32_t SGPRBlocks = 0;
-    uint32_t Priority = 0;
-    uint32_t FloatMode = 0;
-    uint32_t Priv = 0;
-    uint32_t DX10Clamp = 0;
-    uint32_t DebugMode = 0;
-    uint32_t IEEEMode = 0;
-    uint64_t ScratchSize = 0;
-
-    uint64_t ComputePGMRSrc1 = 0;
-
-    // Fields set in PGM_RSRC2 pm4 packet.
-    uint32_t LDSBlocks = 0;
-    uint32_t ScratchBlocks = 0;
-
-    uint64_t ComputePGMRSrc2 = 0;
-
-    uint32_t NumVGPR = 0;
-    uint32_t NumSGPR = 0;
-    uint32_t LDSSize = 0;
-    bool FlatUsed = false;
-
-    // Number of SGPRs that meets number of waves per execution unit request.
-    uint32_t NumSGPRsForWavesPerEU = 0;
-
-    // Number of VGPRs that meets number of waves per execution unit request.
-    uint32_t NumVGPRsForWavesPerEU = 0;
-
-    // Fixed SGPR number used to hold wave scratch offset for entire kernel
-    // execution, or std::numeric_limits<uint16_t>::max() if the register is not
-    // used or not known.
-    uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
-        std::numeric_limits<uint16_t>::max();
-
-    // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
-    // kernel execution, or std::numeric_limits<uint16_t>::max() if the register
-    // is not used or not known.
-    uint16_t DebuggerPrivateSegmentBufferSGPR =
-        std::numeric_limits<uint16_t>::max();
-
-    // Whether there is recursion, dynamic allocas, indirect calls or some other
-    // reason there may be statically unknown stack usage.
-    bool DynamicCallStack = false;
-
-    // Bonus information for debugging.
-    bool VCCUsed = false;
-
-    SIProgramInfo() = default;
-  };
-
  SIProgramInfo CurrentProgramInfo;
  DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;

@ -123,13 +70,6 @@ private:
                              unsigned &NumSGPR,
                              unsigned &NumVGPR) const;

-  AMDGPU::HSAMD::Kernel::CodeProps::Metadata getHSACodeProps(
-      const MachineFunction &MF,
-      const SIProgramInfo &ProgramInfo) const;
-  AMDGPU::HSAMD::Kernel::DebugProps::Metadata getHSADebugProps(
-      const MachineFunction &MF,
-      const SIProgramInfo &ProgramInfo) const;
-
  /// Emit register usage information so that the GPU driver
  /// can correctly setup the GPU state.
  void EmitProgramInfoSI(const MachineFunction &MF,
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
@ -14,6 +14,10 @@
 //===----------------------------------------------------------------------===//

 #include "AMDGPUHSAMetadataStreamer.h"
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIProgramInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Constants.h"
@ -196,6 +200,57 @@ std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
  return Dims;
 }

+Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
+    const MachineFunction &MF,
+    const SIProgramInfo &ProgramInfo) const {
+  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+  HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
+  const Function &F = MF.getFunction();
+
+  // Avoid asserting on erroneous cases.
+  if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL)
+    return HSACodeProps;
+
+  HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F);
+  HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
+  HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
+  HSACodeProps.mKernargSegmentAlign =
+      std::max(uint32_t(4), MFI.getMaxKernArgAlign());
+  HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
+  HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
+  HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
+  HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
+  HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
+  HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
+  HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
+  HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
+
+  return HSACodeProps;
+}
+
+Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
+    const MachineFunction &MF,
+    const SIProgramInfo &ProgramInfo) const {
+  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+  HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
+
+  if (!STM.debuggerSupported())
+    return HSADebugProps;
+
+  HSADebugProps.mDebuggerABIVersion.push_back(1);
+  HSADebugProps.mDebuggerABIVersion.push_back(0);
+
+  if (STM.debuggerEmitPrologue()) {
+    HSADebugProps.mPrivateSegmentBufferSGPR =
+        ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
+    HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
+        ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
+  }
+
+  return HSADebugProps;
+}
+
 void MetadataStreamer::emitVersion() {
  auto &Version = HSAMetadata.mVersion;

@ -408,10 +463,11 @@ void MetadataStreamer::end() {
    verify(HSAMetadataString);
 }

-void MetadataStreamer::emitKernel(
-    const Function &Func,
-    const Kernel::CodeProps::Metadata &CodeProps,
-    const Kernel::DebugProps::Metadata &DebugProps) {
+void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
+  auto &Func = MF.getFunction();
+  auto CodeProps = getHSACodeProps(MF, ProgramInfo);
+  auto DebugProps = getHSADebugProps(MF, ProgramInfo);
+
  if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
    return;

--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h
@ -28,6 +28,7 @@ class DataLayout;
 class Function;
 class MDNode;
 class Module;
+class SIProgramInfo;
 class Type;

 namespace AMDGPU {
@ -55,6 +56,13 @@ private:

  std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;

+  Kernel::CodeProps::Metadata getHSACodeProps(
+      const MachineFunction &MF,
+      const SIProgramInfo &ProgramInfo) const;
+  Kernel::DebugProps::Metadata getHSADebugProps(
+      const MachineFunction &MF,
+      const SIProgramInfo &ProgramInfo) const;
+
  void emitVersion();

  void emitPrintf(const Module &Mod);
@ -87,9 +95,7 @@ public:

  void end();

-  void emitKernel(const Function &Func,
-                  const Kernel::CodeProps::Metadata &CodeProps,
-                  const Kernel::DebugProps::Metadata &DebugProps);
+  void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo);
 };

 } // end namespace HSAMD
--- a/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/lib/Target/AMDGPU/SIProgramInfo.h
@ -0,0 +1,77 @@
+//===--- SIProgramInfo.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines struct to track resource usage for kernels and entry functions.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H
+
+namespace llvm {
+
+/// Track resource usage for kernels / entry functions.
+struct SIProgramInfo {
+    // Fields set in PGM_RSRC1 pm4 packet.
+    uint32_t VGPRBlocks = 0;
+    uint32_t SGPRBlocks = 0;
+    uint32_t Priority = 0;
+    uint32_t FloatMode = 0;
+    uint32_t Priv = 0;
+    uint32_t DX10Clamp = 0;
+    uint32_t DebugMode = 0;
+    uint32_t IEEEMode = 0;
+    uint64_t ScratchSize = 0;
+
+    uint64_t ComputePGMRSrc1 = 0;
+
+    // Fields set in PGM_RSRC2 pm4 packet.
+    uint32_t LDSBlocks = 0;
+    uint32_t ScratchBlocks = 0;
+
+    uint64_t ComputePGMRSrc2 = 0;
+
+    uint32_t NumVGPR = 0;
+    uint32_t NumSGPR = 0;
+    uint32_t LDSSize = 0;
+    bool FlatUsed = false;
+
+    // Number of SGPRs that meets number of waves per execution unit request.
+    uint32_t NumSGPRsForWavesPerEU = 0;
+
+    // Number of VGPRs that meets number of waves per execution unit request.
+    uint32_t NumVGPRsForWavesPerEU = 0;
+
+    // Fixed SGPR number used to hold wave scratch offset for entire kernel
+    // execution, or std::numeric_limits<uint16_t>::max() if the register is not
+    // used or not known.
+    uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
+        std::numeric_limits<uint16_t>::max();
+
+    // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
+    // kernel execution, or std::numeric_limits<uint16_t>::max() if the register
+    // is not used or not known.
+    uint16_t DebuggerPrivateSegmentBufferSGPR =
+        std::numeric_limits<uint16_t>::max();
+
+    // Whether there is recursion, dynamic allocas, indirect calls or some other
+    // reason there may be statically unknown stack usage.
+    bool DynamicCallStack = false;
+
+    // Bonus information for debugging.
+    bool VCCUsed = false;
+
+    SIProgramInfo() = default;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H