mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
[AMDGPU][IndirectCalls] Fix register usage propagation for indirect/external calls
This patch computes max SGPRs and VGPRs used by module in presence of indirect calls and makes that as register requirement for functions/kernels which makes indirect calls. This patch also refactors code AMDGPUSubTarget.cpp which add a "base" variants of getMaxNumSGPRs which is used by MachineFunction and new Function version. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D103636
This commit is contained in:
parent
09c0e58fa9
commit
1d0f3b309b
@ -627,6 +627,21 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUAsmPrinter::doInitialization(Module &M) {
|
||||
NonKernelMaxSGPRs = 0;
|
||||
NonKernelMaxVGPRs = 0;
|
||||
// Compute upper bound on the number of SGPRs and VGPRs
|
||||
// for non-kernel functions.
|
||||
for (const Function &F : M) {
|
||||
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
|
||||
const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
|
||||
NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, STM.getMaxNumSGPRs(F));
|
||||
NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, STM.getMaxNumVGPRs(F));
|
||||
}
|
||||
}
|
||||
return AsmPrinter::doInitialization(M);
|
||||
}
|
||||
|
||||
// TODO: Fold this into emitFunctionBodyStart.
|
||||
void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
|
||||
// In the beginning all features are either 'Any' or 'NotSupported',
|
||||
@ -1020,14 +1035,20 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
|
||||
AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
|
||||
report_fatal_error("invalid call to entry function");
|
||||
|
||||
// If this is a call to an external function, we can't do much. Make
|
||||
// conservative guesses.
|
||||
|
||||
// 48 SGPRs - vcc, - flat_scr, -xnack
|
||||
int MaxSGPRGuess =
|
||||
47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
|
||||
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
|
||||
MaxVGPR = std::max(MaxVGPR, 23);
|
||||
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
|
||||
TM.getMCSubtargetInfo(), false, ST.hasFlatAddressSpace());
|
||||
// If this is a call to an external function, we put the
|
||||
// max values computed in doInitialization().
|
||||
// Subtract extra SGPRs in case of indirect calls.
|
||||
// For indirect calls, we take the max for the module
|
||||
// and use that as the register budget for functions
|
||||
// which makes an indirect calls. This max value
|
||||
// includes extra SGPRs too (e.g. flatscratch and vcc).
|
||||
// which are getting added later.
|
||||
// Subtract them here so that they don't get added twice.
|
||||
MaxSGPR = NonKernelMaxSGPRs - ExtraSGPRs - 1;
|
||||
MaxVGPR = NonKernelMaxVGPRs - 1;
|
||||
// TODO: handle AGPRs
|
||||
MaxAGPR = std::max(MaxAGPR, 23);
|
||||
|
||||
CalleeFrameSize = std::max(CalleeFrameSize,
|
||||
|
@ -58,6 +58,8 @@ private:
|
||||
|
||||
void initializeTargetID(const Module &M);
|
||||
|
||||
bool doInitialization(Module &M) override;
|
||||
|
||||
SIProgramInfo CurrentProgramInfo;
|
||||
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
|
||||
|
||||
@ -101,6 +103,11 @@ public:
|
||||
explicit AMDGPUAsmPrinter(TargetMachine &TM,
|
||||
std::unique_ptr<MCStreamer> Streamer);
|
||||
|
||||
// To memoize max SGPR usage of non-kernel functions of the module.
|
||||
unsigned NonKernelMaxSGPRs = 0;
|
||||
// To memoize max VGPR usage of non-kernel functions of the module.
|
||||
unsigned NonKernelMaxVGPRs = 0;
|
||||
|
||||
StringRef getPassName() const override;
|
||||
|
||||
const MCSubtargetInfo* getGlobalSTI() const;
|
||||
|
@ -698,12 +698,12 @@ unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
|
||||
return std::min(std::max(getTotalNumVGPRs() / RoundedRegs, 1u), MaxWaves);
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
unsigned
|
||||
GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const {
|
||||
if (getGeneration() >= AMDGPUSubtarget::GFX10)
|
||||
return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
|
||||
|
||||
if (MFI.hasFlatScratchInit()) {
|
||||
if (HasFlatScratchInit) {
|
||||
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
|
||||
if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
|
||||
@ -715,6 +715,26 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
|
||||
return 2; // VCC.
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
return getBaseReservedNumSGPRs(MFI.hasFlatScratchInit());
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::getReservedNumSGPRs(const Function &F) const {
|
||||
// The logic to detect if the function has
|
||||
// flat scratch init is same as how MachineFunctionInfo derives.
|
||||
bool FunctionHasFlatScratchInit = false;
|
||||
bool HasCalls = F.hasFnAttribute("amdgpu-calls");
|
||||
bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
|
||||
if (hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(F.getCallingConv()) &&
|
||||
(isAmdHsaOrMesa(F) || enableFlatScratch()) &&
|
||||
!flatScratchIsArchitected()) {
|
||||
if (HasCalls || HasStackObjects || enableFlatScratch())
|
||||
FunctionHasFlatScratchInit = true;
|
||||
}
|
||||
return getBaseReservedNumSGPRs(FunctionHasFlatScratchInit);
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::computeOccupancy(const Function &F, unsigned LDSSize,
|
||||
unsigned NumSGPRs,
|
||||
unsigned NumVGPRs) const {
|
||||
@ -728,13 +748,11 @@ unsigned GCNSubtarget::computeOccupancy(const Function &F, unsigned LDSSize,
|
||||
return Occupancy;
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
|
||||
const Function &F = MF.getFunction();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
unsigned GCNSubtarget::getBaseMaxNumSGPRs(
|
||||
const Function &F, std::pair<unsigned, unsigned> WavesPerEU,
|
||||
unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const {
|
||||
// Compute maximum number of SGPRs function can use using default/requested
|
||||
// minimum number of waves per execution unit.
|
||||
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
|
||||
unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
|
||||
unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
|
||||
|
||||
@ -745,7 +763,7 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
|
||||
F, "amdgpu-num-sgpr", MaxNumSGPRs);
|
||||
|
||||
// Make sure requested value does not violate subtarget's specifications.
|
||||
if (Requested && (Requested <= getReservedNumSGPRs(MF)))
|
||||
if (Requested && (Requested <= ReservedNumSGPRs))
|
||||
Requested = 0;
|
||||
|
||||
// If more SGPRs are required to support the input user/system SGPRs,
|
||||
@ -755,7 +773,7 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
|
||||
// of reserved special registers in total. Theoretically you could re-use
|
||||
// the last input registers for these special registers, but this would
|
||||
// require a lot of complexity to deal with the weird aliasing.
|
||||
unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
|
||||
unsigned InputNumSGPRs = PreloadedSGPRs;
|
||||
if (Requested && Requested < InputNumSGPRs)
|
||||
Requested = InputNumSGPRs;
|
||||
|
||||
@ -774,17 +792,43 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
|
||||
if (hasSGPRInitBug())
|
||||
MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
||||
|
||||
return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
|
||||
MaxAddressableNumSGPRs);
|
||||
return std::min(MaxNumSGPRs - ReservedNumSGPRs, MaxAddressableNumSGPRs);
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
|
||||
unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
|
||||
const Function &F = MF.getFunction();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
return getBaseMaxNumSGPRs(F, MFI.getWavesPerEU(), MFI.getNumPreloadedSGPRs(),
|
||||
getReservedNumSGPRs(MF));
|
||||
}
|
||||
|
||||
static unsigned getMaxNumPreloadedSGPRs() {
|
||||
// Max number of user SGPRs
|
||||
unsigned MaxUserSGPRs = 4 + // private segment buffer
|
||||
2 + // Dispatch ptr
|
||||
2 + // queue ptr
|
||||
2 + // kernel segment ptr
|
||||
2 + // dispatch ID
|
||||
2 + // flat scratch init
|
||||
2; // Implicit buffer ptr
|
||||
// Max number of system SGPRs
|
||||
unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
|
||||
1 + // WorkGroupIDY
|
||||
1 + // WorkGroupIDZ
|
||||
1 + // WorkGroupInfo
|
||||
1; // private segment wave byte offset
|
||||
return MaxUserSGPRs + MaxSystemSGPRs;
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
|
||||
return getBaseMaxNumSGPRs(F, getWavesPerEU(F), getMaxNumPreloadedSGPRs(),
|
||||
getReservedNumSGPRs(F));
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::getBaseMaxNumVGPRs(
|
||||
const Function &F, std::pair<unsigned, unsigned> WavesPerEU) const {
|
||||
// Compute maximum number of VGPRs function can use using default/requested
|
||||
// minimum number of waves per execution unit.
|
||||
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
|
||||
unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
|
||||
|
||||
// Check if maximum number of VGPRs was explicitly requested using
|
||||
@ -811,6 +855,16 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
|
||||
return MaxNumVGPRs;
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {
|
||||
return getBaseMaxNumVGPRs(F, getWavesPerEU(F));
|
||||
}
|
||||
|
||||
unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
|
||||
const Function &F = MF.getFunction();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
return getBaseMaxNumVGPRs(F, MFI.getWavesPerEU());
|
||||
}
|
||||
|
||||
void GCNSubtarget::adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use,
|
||||
int UseOpIdx, SDep &Dep) const {
|
||||
if (Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
|
||||
|
@ -1033,9 +1033,24 @@ public:
|
||||
return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
|
||||
}
|
||||
|
||||
/// \returns Reserved number of SGPRs for given function \p MF.
|
||||
/// \returns Reserved number of SGPRs. This is common
|
||||
/// utility function called by MachineFunction and
|
||||
/// Function variants of getReservedNumSGPRs.
|
||||
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const;
|
||||
/// \returns Reserved number of SGPRs for given machine function \p MF.
|
||||
unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
|
||||
|
||||
/// \returns Reserved number of SGPRs for given function \p F.
|
||||
unsigned getReservedNumSGPRs(const Function &F) const;
|
||||
|
||||
/// \returns max num SGPRs. This is the common utility
|
||||
/// function called by MachineFunction and Function
|
||||
/// variants of getMaxNumSGPRs.
|
||||
unsigned getBaseMaxNumSGPRs(const Function &F,
|
||||
std::pair<unsigned, unsigned> WavesPerEU,
|
||||
unsigned PreloadedSGPRs,
|
||||
unsigned ReservedNumSGPRs) const;
|
||||
|
||||
/// \returns Maximum number of SGPRs that meets number of waves per execution
|
||||
/// unit requirement for function \p MF, or number of SGPRs explicitly
|
||||
/// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
|
||||
@ -1046,6 +1061,16 @@ public:
|
||||
/// unit requirement.
|
||||
unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
|
||||
|
||||
/// \returns Maximum number of SGPRs that meets number of waves per execution
|
||||
/// unit requirement for function \p F, or number of SGPRs explicitly
|
||||
/// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
|
||||
///
|
||||
/// \returns Value that meets number of waves per execution unit requirement
|
||||
/// if explicitly requested value cannot be converted to integer, violates
|
||||
/// subtarget's specifications, or does not meet number of waves per execution
|
||||
/// unit requirement.
|
||||
unsigned getMaxNumSGPRs(const Function &F) const;
|
||||
|
||||
/// \returns VGPR allocation granularity supported by the subtarget.
|
||||
unsigned getVGPRAllocGranule() const {
|
||||
return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
|
||||
@ -1078,6 +1103,20 @@ public:
|
||||
return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
|
||||
}
|
||||
|
||||
/// \returns max num VGPRs. This is the common utility function
|
||||
/// called by MachineFunction and Function variants of getMaxNumVGPRs.
|
||||
unsigned getBaseMaxNumVGPRs(const Function &F,
|
||||
std::pair<unsigned, unsigned> WavesPerEU) const;
|
||||
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
||||
/// unit requirement for function \p F, or number of VGPRs explicitly
|
||||
/// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
|
||||
///
|
||||
/// \returns Value that meets number of waves per execution unit requirement
|
||||
/// if explicitly requested value cannot be converted to integer, violates
|
||||
/// subtarget's specifications, or does not meet number of waves per execution
|
||||
/// unit requirement.
|
||||
unsigned getMaxNumVGPRs(const Function &F) const;
|
||||
|
||||
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
||||
/// unit requirement for function \p MF, or number of VGPRs explicitly
|
||||
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
|
||||
|
@ -154,21 +154,22 @@ bb:
|
||||
declare void @undef_func()
|
||||
|
||||
; GCN-LABEL: {{^}}kernel_call_undef_func:
|
||||
; GFX908: .amdhsa_next_free_vgpr 24
|
||||
; GFX90A: .amdhsa_next_free_vgpr 48
|
||||
; GFX90A: .amdhsa_accum_offset 24
|
||||
; GCN: NumVgprs: 24
|
||||
; GFX908: .amdhsa_next_free_vgpr 128
|
||||
; GFX90A: .amdhsa_next_free_vgpr 280
|
||||
; GFX90A: .amdhsa_accum_offset 256
|
||||
; GCN908: NumVgprs: 128
|
||||
; GCN90A: NumVgprs: 256
|
||||
; GCN: NumAgprs: 24
|
||||
; GFX908: TotalNumVgprs: 24
|
||||
; GFX90A: TotalNumVgprs: 48
|
||||
; GFX908: VGPRBlocks: 5
|
||||
; GFX90A: VGPRBlocks: 5
|
||||
; GFX908: NumVGPRsForWavesPerEU: 24
|
||||
; GFX90A: NumVGPRsForWavesPerEU: 48
|
||||
; GFX90A: AccumOffset: 24
|
||||
; GFX908: Occupancy: 10
|
||||
; GFX90A: Occupancy: 8
|
||||
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 5
|
||||
; GFX908: TotalNumVgprs: 128
|
||||
; GFX90A: TotalNumVgprs: 280
|
||||
; GFX908: VGPRBlocks: 31
|
||||
; GFX90A: VGPRBlocks: 34
|
||||
; GFX908: NumVGPRsForWavesPerEU: 128
|
||||
; GFX90A: NumVGPRsForWavesPerEU: 280
|
||||
; GFX90A: AccumOffset: 256
|
||||
; GFX908: Occupancy: 2
|
||||
; GFX90A: Occupancy: 1
|
||||
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 63
|
||||
define amdgpu_kernel void @kernel_call_undef_func() #0 {
|
||||
bb:
|
||||
call void @undef_func()
|
||||
|
@ -147,9 +147,9 @@ attributes #0 = { nounwind }
|
||||
|
||||
; GCN: amdpal.pipelines:
|
||||
; GCN-NEXT: - .registers:
|
||||
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
|
||||
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
|
||||
; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}}
|
||||
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf03cf{{$}}
|
||||
; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||
; GCN-NEXT: .shader_functions:
|
||||
; GCN-NEXT: dynamic_stack:
|
||||
|
@ -227,10 +227,10 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
|
||||
; Make sure there's no assert when a sgpr96 is used.
|
||||
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
|
||||
; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}]
|
||||
; CI: NumSgprs: 48
|
||||
; VI-NOBUG: NumSgprs: 48
|
||||
; CI: NumSgprs: 102
|
||||
; VI-NOBUG: NumSgprs: 102
|
||||
; VI-BUG: NumSgprs: 96
|
||||
; GCN: NumVgprs: 24
|
||||
; GCN: NumVgprs: 64
|
||||
define amdgpu_kernel void @count_use_sgpr96_external_call() {
|
||||
entry:
|
||||
tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
|
||||
@ -241,10 +241,10 @@ entry:
|
||||
; Make sure there's no assert when a sgpr160 is used.
|
||||
; GCN-LABEL: {{^}}count_use_sgpr160_external_call
|
||||
; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}]
|
||||
; CI: NumSgprs: 48
|
||||
; VI-NOBUG: NumSgprs: 48
|
||||
; CI: NumSgprs: 102
|
||||
; VI-NOBUG: NumSgprs: 102
|
||||
; VI-BUG: NumSgprs: 96
|
||||
; GCN: NumVgprs: 24
|
||||
; GCN: NumVgprs: 64
|
||||
define amdgpu_kernel void @count_use_sgpr160_external_call() {
|
||||
entry:
|
||||
tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
|
||||
@ -255,10 +255,10 @@ entry:
|
||||
; Make sure there's no assert when a vgpr160 is used.
|
||||
; GCN-LABEL: {{^}}count_use_vgpr160_external_call
|
||||
; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}]
|
||||
; CI: NumSgprs: 48
|
||||
; VI-NOBUG: NumSgprs: 48
|
||||
; CI: NumSgprs: 102
|
||||
; VI-NOBUG: NumSgprs: 102
|
||||
; VI-BUG: NumSgprs: 96
|
||||
; GCN: NumVgprs: 24
|
||||
; GCN: NumVgprs: 64
|
||||
define amdgpu_kernel void @count_use_vgpr160_external_call() {
|
||||
entry:
|
||||
tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
|
||||
|
@ -15,8 +15,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr() {
|
||||
; GCN-NEXT: amd_machine_version_stepping = 0
|
||||
; GCN-NEXT: kernel_code_entry_byte_offset = 256
|
||||
; GCN-NEXT: kernel_code_prefetch_byte_size = 0
|
||||
; GCN-NEXT: granulated_workitem_vgpr_count = 7
|
||||
; GCN-NEXT: granulated_wavefront_sgpr_count = 5
|
||||
; GCN-NEXT: granulated_workitem_vgpr_count = 15
|
||||
; GCN-NEXT: granulated_wavefront_sgpr_count = 12
|
||||
; GCN-NEXT: priority = 0
|
||||
; GCN-NEXT: float_mode = 240
|
||||
; GCN-NEXT: priv = 0
|
||||
@ -59,8 +59,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr() {
|
||||
; GCN-NEXT: gds_segment_byte_size = 0
|
||||
; GCN-NEXT: kernarg_segment_byte_size = 0
|
||||
; GCN-NEXT: workgroup_fbarrier_count = 0
|
||||
; GCN-NEXT: wavefront_sgpr_count = 48
|
||||
; GCN-NEXT: workitem_vgpr_count = 32
|
||||
; GCN-NEXT: wavefront_sgpr_count = 102
|
||||
; GCN-NEXT: workitem_vgpr_count = 64
|
||||
; GCN-NEXT: reserved_vgpr_first = 0
|
||||
; GCN-NEXT: reserved_vgpr_count = 0
|
||||
; GCN-NEXT: reserved_sgpr_first = 0
|
||||
@ -111,8 +111,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg() {
|
||||
; GCN-NEXT: amd_machine_version_stepping = 0
|
||||
; GCN-NEXT: kernel_code_entry_byte_offset = 256
|
||||
; GCN-NEXT: kernel_code_prefetch_byte_size = 0
|
||||
; GCN-NEXT: granulated_workitem_vgpr_count = 7
|
||||
; GCN-NEXT: granulated_wavefront_sgpr_count = 5
|
||||
; GCN-NEXT: granulated_workitem_vgpr_count = 15
|
||||
; GCN-NEXT: granulated_wavefront_sgpr_count = 12
|
||||
; GCN-NEXT: priority = 0
|
||||
; GCN-NEXT: float_mode = 240
|
||||
; GCN-NEXT: priv = 0
|
||||
@ -155,8 +155,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg() {
|
||||
; GCN-NEXT: gds_segment_byte_size = 0
|
||||
; GCN-NEXT: kernarg_segment_byte_size = 0
|
||||
; GCN-NEXT: workgroup_fbarrier_count = 0
|
||||
; GCN-NEXT: wavefront_sgpr_count = 48
|
||||
; GCN-NEXT: workitem_vgpr_count = 32
|
||||
; GCN-NEXT: wavefront_sgpr_count = 102
|
||||
; GCN-NEXT: workitem_vgpr_count = 64
|
||||
; GCN-NEXT: reserved_vgpr_first = 0
|
||||
; GCN-NEXT: reserved_vgpr_count = 0
|
||||
; GCN-NEXT: reserved_sgpr_first = 0
|
||||
|
Loading…
Reference in New Issue
Block a user