mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 10:32:48 +02:00
[AMDGPU] Set optional PAL metadata
Set informational fields in the .shader_functions table. Also correct the documentation, .scratch_memory_size and .lds_size are integers. Differential Revision: https://reviews.llvm.org/D105116
This commit is contained in:
parent
562db8dad2
commit
c9afbee37c
@ -11251,10 +11251,8 @@ within a map that has been added by the same *vendor-name*.
|
|||||||
".api_shader_hash" sequence of Input shader hash, typically passed in from the client. The value
|
".api_shader_hash" sequence of Input shader hash, typically passed in from the client. The value
|
||||||
2 integers is implementation defined, and can not be relied on between
|
2 integers is implementation defined, and can not be relied on between
|
||||||
different builds of the compiler.
|
different builds of the compiler.
|
||||||
".scratch_memory_size" sequence of Size in bytes of scratch memory used by the shader.
|
".scratch_memory_size" integer Size in bytes of scratch memory used by the shader.
|
||||||
2 integers
|
".lds_size" integer Size in bytes of LDS memory.
|
||||||
".lds_size" sequence of Size in bytes of LDS memory.
|
|
||||||
2 integers
|
|
||||||
".vgpr_count" integer Number of VGPRs used by the shader.
|
".vgpr_count" integer Number of VGPRs used by the shader.
|
||||||
".sgpr_count" integer Number of SGPRs used by the shader.
|
".sgpr_count" integer Number of SGPRs used by the shader.
|
||||||
".stack_frame_size_in_bytes" integer Amount of stack size used by the shader.
|
".stack_frame_size_in_bytes" integer Amount of stack size used by the shader.
|
||||||
|
@ -1395,10 +1395,16 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
|
|||||||
auto *MD = getTargetStreamer()->getPALMetadata();
|
auto *MD = getTargetStreamer()->getPALMetadata();
|
||||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
MD->setFunctionScratchSize(MF, MFI.getStackSize());
|
MD->setFunctionScratchSize(MF, MFI.getStackSize());
|
||||||
|
|
||||||
// Set compute registers
|
// Set compute registers
|
||||||
MD->setRsrc1(CallingConv::AMDGPU_CS,
|
MD->setRsrc1(CallingConv::AMDGPU_CS,
|
||||||
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
|
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
|
||||||
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
|
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
|
||||||
|
|
||||||
|
// Set optional info
|
||||||
|
MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
|
||||||
|
MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
|
||||||
|
MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is supposed to be log2(Size)
|
// This is supposed to be log2(Size)
|
||||||
|
@ -243,6 +243,27 @@ void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
|
|||||||
Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
|
Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set the amount of LDS used in bytes in the metadata.
|
||||||
|
void AMDGPUPALMetadata::setFunctionLdsSize(const MachineFunction &MF,
|
||||||
|
unsigned Val) {
|
||||||
|
auto Node = getShaderFunction(MF.getFunction().getName());
|
||||||
|
Node[".lds_size"] = MsgPackDoc.getNode(Val);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the number of used vgprs in the metadata.
|
||||||
|
void AMDGPUPALMetadata::setFunctionNumUsedVgprs(const MachineFunction &MF,
|
||||||
|
unsigned Val) {
|
||||||
|
auto Node = getShaderFunction(MF.getFunction().getName());
|
||||||
|
Node[".vgpr_count"] = MsgPackDoc.getNode(Val);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the number of used vgprs in the metadata.
|
||||||
|
void AMDGPUPALMetadata::setFunctionNumUsedSgprs(const MachineFunction &MF,
|
||||||
|
unsigned Val) {
|
||||||
|
auto Node = getShaderFunction(MF.getFunction().getName());
|
||||||
|
Node[".sgpr_count"] = MsgPackDoc.getNode(Val);
|
||||||
|
}
|
||||||
|
|
||||||
// Set the hardware register bit in PAL metadata to enable wave32 on the
|
// Set the hardware register bit in PAL metadata to enable wave32 on the
|
||||||
// shader of the given calling convention.
|
// shader of the given calling convention.
|
||||||
void AMDGPUPALMetadata::setWave32(unsigned CC) {
|
void AMDGPUPALMetadata::setWave32(unsigned CC) {
|
||||||
|
@ -80,6 +80,21 @@ public:
|
|||||||
// Set the stack frame size of a function in the metadata.
|
// Set the stack frame size of a function in the metadata.
|
||||||
void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
|
void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
|
||||||
|
|
||||||
|
// Set the amount of LDS used in bytes in the metadata. This is an optional
|
||||||
|
// advisory record for logging etc; wave dispatch actually uses the rsrc1
|
||||||
|
// register for the shader stage to determine the amount of LDS to allocate.
|
||||||
|
void setFunctionLdsSize(const MachineFunction &MF, unsigned Val);
|
||||||
|
|
||||||
|
// Set the number of used vgprs in the metadata. This is an optional advisory
|
||||||
|
// record for logging etc; wave dispatch actually uses the rsrc1 register for
|
||||||
|
// the shader stage to determine the number of vgprs to allocate.
|
||||||
|
void setFunctionNumUsedVgprs(const MachineFunction &MF, unsigned Val);
|
||||||
|
|
||||||
|
// Set the number of used sgprs in the metadata. This is an optional advisory
|
||||||
|
// record for logging etc; wave dispatch actually uses the rsrc1 register for
|
||||||
|
// the shader stage to determine the number of sgprs to allocate.
|
||||||
|
void setFunctionNumUsedSgprs(const MachineFunction &MF, unsigned Val);
|
||||||
|
|
||||||
// Set the hardware register bit in PAL metadata to enable wave32 on the
|
// Set the hardware register bit in PAL metadata to enable wave32 on the
|
||||||
// shader of the given calling convention.
|
// shader of the given calling convention.
|
||||||
void setWave32(unsigned CC);
|
void setWave32(unsigned CC);
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
|
; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
|
||||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
|
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
|
||||||
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
|
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
|
||||||
|
|
||||||
; Make sure this interacts well with -amdgpu-fixed-function-abi
|
; Make sure this interacts well with -amdgpu-fixed-function-abi
|
||||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
|
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
|
||||||
|
|
||||||
declare float @extern_func(float) #0
|
declare float @extern_func(float) #0
|
||||||
declare float @extern_func_many_args(<64 x float>) #0
|
declare float @extern_func_many_args(<64 x float>) #0
|
||||||
@ -147,40 +147,92 @@ attributes #0 = { nounwind }
|
|||||||
|
|
||||||
; GCN: amdpal.pipelines:
|
; GCN: amdpal.pipelines:
|
||||||
; GCN-NEXT: - .registers:
|
; GCN-NEXT: - .registers:
|
||||||
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
|
; GCN-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
|
||||||
; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||||
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
|
|
||||||
; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
|
||||||
; GCN-NEXT: .shader_functions:
|
; GCN-NEXT: .shader_functions:
|
||||||
; GCN-NEXT: dynamic_stack:
|
; GCN-NEXT: dynamic_stack:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GCN-NEXT: .sgpr_count: 0x24{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||||
|
; SDAG-NEXT: .vgpr_count: 0x2{{$}}
|
||||||
|
; GISEL-NEXT: .vgpr_count: 0x3{{$}}
|
||||||
; GCN-NEXT: dynamic_stack_loop:
|
; GCN-NEXT: dynamic_stack_loop:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; SDAG-NEXT: .sgpr_count: 0x22{{$}}
|
||||||
|
; GISEL-NEXT: .sgpr_count: 0x24{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||||
|
; SDAG-NEXT: .vgpr_count: 0x3{{$}}
|
||||||
|
; GISEL-NEXT: .vgpr_count: 0x4{{$}}
|
||||||
; GCN-NEXT: multiple_stack:
|
; GCN-NEXT: multiple_stack:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GCN-NEXT: .sgpr_count: 0x21{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x3{{$}}
|
||||||
; GCN-NEXT: no_stack:
|
; GCN-NEXT: no_stack:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GCN-NEXT: .sgpr_count: 0x20{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x1{{$}}
|
||||||
; GCN-NEXT: no_stack_call:
|
; GCN-NEXT: no_stack_call:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GCN-NEXT: .sgpr_count: 0x20{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x1{{$}}
|
||||||
; GCN-NEXT: no_stack_extern_call:
|
; GCN-NEXT: no_stack_extern_call:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||||
|
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||||
; GCN-NEXT: no_stack_extern_call_many_args:
|
; GCN-NEXT: no_stack_extern_call_many_args:
|
||||||
; SDAG-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
; GISEL-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
|
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||||
|
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||||
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||||
; GCN-NEXT: no_stack_indirect_call:
|
; GCN-NEXT: no_stack_indirect_call:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||||
|
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||||
; GCN-NEXT: simple_lds:
|
; GCN-NEXT: simple_lds:
|
||||||
|
; GCN-NEXT: .lds_size: 0x100{{$}}
|
||||||
|
; GCN-NEXT: .sgpr_count: 0x20{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x1{{$}}
|
||||||
; GCN-NEXT: simple_lds_recurse:
|
; GCN-NEXT: simple_lds_recurse:
|
||||||
|
; GCN-NEXT: .lds_size: 0x100{{$}}
|
||||||
|
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||||
|
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||||
; GCN-NEXT: simple_stack:
|
; GCN-NEXT: simple_stack:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GCN-NEXT: .sgpr_count: 0x21{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x2{{$}}
|
||||||
; GCN-NEXT: simple_stack_call:
|
; GCN-NEXT: simple_stack_call:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GCN-NEXT: .sgpr_count: 0x22{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x3{{$}}
|
||||||
; GCN-NEXT: simple_stack_extern_call:
|
; GCN-NEXT: simple_stack_extern_call:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||||
|
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||||
; GCN-NEXT: simple_stack_indirect_call:
|
; GCN-NEXT: simple_stack_indirect_call:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||||
|
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||||
; GCN-NEXT: simple_stack_recurse:
|
; GCN-NEXT: simple_stack_recurse:
|
||||||
|
; GCN-NEXT: .lds_size: 0{{$}}
|
||||||
|
; GFX8-NEXT: .sgpr_count: 0x68{{$}}
|
||||||
|
; GFX9-NEXT: .sgpr_count: 0x66{{$}}
|
||||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
|
||||||
|
; GCN-NEXT: .vgpr_count: 0x40{{$}}
|
||||||
; GCN-NEXT: ...
|
; GCN-NEXT: ...
|
||||||
|
Loading…
Reference in New Issue
Block a user