1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[AMDGPU] Fix MaxWorkGroupsPerCU for large workgroups

This patch corrects the maximum workgroups per CU if we have big
workgroups (more than 128). This calculation contributes to the
occupancy calculation in respect to LDS size.

Differential Revision: https://reviews.llvm.org/D29974

llvm-svn: 295134
This commit is contained in:
Stanislav Mekhanoshin 2017-02-15 01:03:59 +00:00
parent 14aaabfcef
commit 479d45f82d
2 changed files with 9 additions and 3 deletions

View File

@ -151,7 +151,11 @@ unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
unsigned FlatWorkGroupSize) {
if (!Features.test(FeatureGCN))
return 8;
return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
if (N == 1)
return 40;
N = 40 / N;
return std::min(N, 16u);
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {

View File

@ -69,7 +69,8 @@ entry:
}
; ALL-LABEL: @occupancy_0(
; ALL: alloca [5 x i32]
; CI-NOT: alloca [5 x i32]
; SI: alloca [5 x i32]
define void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 {
entry:
%stack = alloca [5 x i32], align 4
@ -91,7 +92,8 @@ entry:
}
; ALL-LABEL: @occupancy_max(
; ALL: alloca [5 x i32]
; CI-NOT: alloca [5 x i32]
; SI: alloca [5 x i32]
define void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 {
entry:
%stack = alloca [5 x i32], align 4