mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
AMDGPU : Recalculate SGPRs when trap handler is supported
Differential Revision: https://reviews.llvm.org/D29911 llvm-svn: 332523
This commit is contained in:
parent
f4a05c6e9f
commit
deea84a15a
@ -358,9 +358,11 @@ unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
|
|||||||
|
|
||||||
if (WavesPerEU >= getMaxWavesPerEU(Features))
|
if (WavesPerEU >= getMaxWavesPerEU(Features))
|
||||||
return 0;
|
return 0;
|
||||||
unsigned MinNumSGPRs =
|
|
||||||
alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
|
unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
|
||||||
getSGPRAllocGranule(Features)) + 1;
|
if (Features.test(FeatureTrapHandler))
|
||||||
|
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
|
||||||
|
MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
|
||||||
return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
|
return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -369,11 +371,13 @@ unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
|
|||||||
assert(WavesPerEU != 0);
|
assert(WavesPerEU != 0);
|
||||||
|
|
||||||
IsaVersion Version = getIsaVersion(Features);
|
IsaVersion Version = getIsaVersion(Features);
|
||||||
unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
|
|
||||||
getSGPRAllocGranule(Features));
|
|
||||||
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
|
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
|
||||||
if (Version.Major >= 8 && !Addressable)
|
if (Version.Major >= 8 && !Addressable)
|
||||||
AddressableNumSGPRs = 112;
|
AddressableNumSGPRs = 112;
|
||||||
|
unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
|
||||||
|
if (Features.test(FeatureTrapHandler))
|
||||||
|
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
|
||||||
|
MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
|
||||||
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
|
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,7 +42,8 @@ namespace IsaInfo {
|
|||||||
enum {
|
enum {
|
||||||
// The closed Vulkan driver sets 96, which limits the wave count to 8 but
|
// The closed Vulkan driver sets 96, which limits the wave count to 8 but
|
||||||
// doesn't spill SGPRs as much as when 80 is set.
|
// doesn't spill SGPRs as much as when 80 is set.
|
||||||
FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
|
FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
|
||||||
|
TRAP_NUM_SGPRS = 16
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Instruction set architecture version.
|
/// Instruction set architecture version.
|
||||||
|
70
test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll
Normal file
70
test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+trap-handler < %s | FileCheck %s --check-prefixes=GCN,TRAP-HANDLER-ENABLE
|
||||||
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=-trap-handler < %s | FileCheck %s --check-prefixes=GCN,TRAP-HANDLER-DISABLE
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}amdhsa_trap_num_sgprs
|
||||||
|
; TRAP-HANDLER-ENABLE: NumSgprs: 60
|
||||||
|
; TRAP-HANDLER-DISABLE: NumSgprs: 76
|
||||||
|
define amdgpu_kernel void @amdhsa_trap_num_sgprs(
|
||||||
|
i32 addrspace(1)* %out0, i32 %in0,
|
||||||
|
i32 addrspace(1)* %out1, i32 %in1,
|
||||||
|
i32 addrspace(1)* %out2, i32 %in2,
|
||||||
|
i32 addrspace(1)* %out3, i32 %in3,
|
||||||
|
i32 addrspace(1)* %out4, i32 %in4,
|
||||||
|
i32 addrspace(1)* %out5, i32 %in5,
|
||||||
|
i32 addrspace(1)* %out6, i32 %in6,
|
||||||
|
i32 addrspace(1)* %out7, i32 %in7,
|
||||||
|
i32 addrspace(1)* %out8, i32 %in8,
|
||||||
|
i32 addrspace(1)* %out9, i32 %in9,
|
||||||
|
i32 addrspace(1)* %out10, i32 %in10,
|
||||||
|
i32 addrspace(1)* %out11, i32 %in11,
|
||||||
|
i32 addrspace(1)* %out12, i32 %in12,
|
||||||
|
i32 addrspace(1)* %out13, i32 %in13,
|
||||||
|
i32 addrspace(1)* %out14, i32 %in14,
|
||||||
|
i32 addrspace(1)* %out15, i32 %in15,
|
||||||
|
i32 addrspace(1)* %out16, i32 %in16,
|
||||||
|
i32 addrspace(1)* %out17, i32 %in17,
|
||||||
|
i32 addrspace(1)* %out18, i32 %in18,
|
||||||
|
i32 addrspace(1)* %out19, i32 %in19,
|
||||||
|
i32 addrspace(1)* %out20, i32 %in20,
|
||||||
|
i32 addrspace(1)* %out21, i32 %in21,
|
||||||
|
i32 addrspace(1)* %out22, i32 %in22,
|
||||||
|
i32 addrspace(1)* %out23, i32 %in23,
|
||||||
|
i32 addrspace(1)* %out24, i32 %in24,
|
||||||
|
i32 addrspace(1)* %out25, i32 %in25,
|
||||||
|
i32 addrspace(1)* %out26, i32 %in26,
|
||||||
|
i32 addrspace(1)* %out27, i32 %in27,
|
||||||
|
i32 addrspace(1)* %out28, i32 %in28,
|
||||||
|
i32 addrspace(1)* %out29, i32 %in29) {
|
||||||
|
entry:
|
||||||
|
store i32 %in0, i32 addrspace(1)* %out0
|
||||||
|
store i32 %in1, i32 addrspace(1)* %out1
|
||||||
|
store i32 %in2, i32 addrspace(1)* %out2
|
||||||
|
store i32 %in3, i32 addrspace(1)* %out3
|
||||||
|
store i32 %in4, i32 addrspace(1)* %out4
|
||||||
|
store i32 %in5, i32 addrspace(1)* %out5
|
||||||
|
store i32 %in6, i32 addrspace(1)* %out6
|
||||||
|
store i32 %in7, i32 addrspace(1)* %out7
|
||||||
|
store i32 %in8, i32 addrspace(1)* %out8
|
||||||
|
store i32 %in9, i32 addrspace(1)* %out9
|
||||||
|
store i32 %in10, i32 addrspace(1)* %out10
|
||||||
|
store i32 %in11, i32 addrspace(1)* %out11
|
||||||
|
store i32 %in12, i32 addrspace(1)* %out12
|
||||||
|
store i32 %in13, i32 addrspace(1)* %out13
|
||||||
|
store i32 %in14, i32 addrspace(1)* %out14
|
||||||
|
store i32 %in15, i32 addrspace(1)* %out15
|
||||||
|
store i32 %in16, i32 addrspace(1)* %out16
|
||||||
|
store i32 %in17, i32 addrspace(1)* %out17
|
||||||
|
store i32 %in18, i32 addrspace(1)* %out18
|
||||||
|
store i32 %in19, i32 addrspace(1)* %out19
|
||||||
|
store i32 %in20, i32 addrspace(1)* %out20
|
||||||
|
store i32 %in21, i32 addrspace(1)* %out21
|
||||||
|
store i32 %in22, i32 addrspace(1)* %out22
|
||||||
|
store i32 %in23, i32 addrspace(1)* %out23
|
||||||
|
store i32 %in24, i32 addrspace(1)* %out24
|
||||||
|
store i32 %in25, i32 addrspace(1)* %out25
|
||||||
|
store i32 %in26, i32 addrspace(1)* %out26
|
||||||
|
store i32 %in27, i32 addrspace(1)* %out27
|
||||||
|
store i32 %in28, i32 addrspace(1)* %out28
|
||||||
|
store i32 %in29, i32 addrspace(1)* %out29
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user