1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

AMDGPU Reduce reported maximum group size to 1024

While some targets allow encoding 2048, this was never tested or
supported.
This commit is contained in:
Matt Arsenault 2019-08-27 13:03:36 -04:00 committed by Matt Arsenault
parent a76fef4322
commit 5cfd953988
4 changed files with 24 additions and 22 deletions

View File

@ -312,7 +312,8 @@ unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
}
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
return 2048;
// Some subtargets allow encoding 2048, but this isn't tested or supported.
return 1024;
}
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,

View File

@ -34,13 +34,13 @@ entry:
}
attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
; CHECK-LABEL: {{^}}min_1024_max_2048
; CHECK: SGPRBlocks: 1
; CHECK: VGPRBlocks: 7
; CHECK: NumSGPRsForWavesPerEU: 12
; CHECK: NumVGPRsForWavesPerEU: 32
; CHECK-LABEL: {{^}}min_1024_max_1024
; CHECK: SGPRBlocks: 0
; CHECK: VGPRBlocks: 10
; CHECK: NumSGPRsForWavesPerEU: 2{{$}}
; CHECK: NumVGPRsForWavesPerEU: 43
@var = addrspace(1) global float 0.0
define amdgpu_kernel void @min_1024_max_2048() #3 {
define amdgpu_kernel void @min_1024_max_1024() #3 {
%val0 = load volatile float, float addrspace(1)* @var
%val1 = load volatile float, float addrspace(1)* @var
%val2 = load volatile float, float addrspace(1)* @var
@ -127,7 +127,7 @@ define amdgpu_kernel void @min_1024_max_2048() #3 {
ret void
}
attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
; CHECK: amdhsa.kernels:
; CHECK: .max_flat_workgroup_size: 64
@ -136,8 +136,8 @@ attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
; CHECK: .name: min_64_max_128
; CHECK: .max_flat_workgroup_size: 128
; CHECK: .name: min_128_max_128
; CHECK: .max_flat_workgroup_size: 2048
; CHECK: .name: min_1024_max_2048
; CHECK: .max_flat_workgroup_size: 1024
; CHECK: .name: min_1024_max_1024
; CHECK: amdhsa.version:
; CHECK: - 1
; CHECK: - 0

View File

@ -34,13 +34,13 @@ entry:
}
attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
; CHECK-LABEL: {{^}}min_1024_max_2048
; CHECK: SGPRBlocks: 1
; CHECK: VGPRBlocks: 7
; CHECK: NumSGPRsForWavesPerEU: 12
; CHECK: NumVGPRsForWavesPerEU: 32
; CHECK-LABEL: {{^}}min_1024_max_1024
; CHECK: SGPRBlocks: 0
; CHECK: VGPRBlocks: 10
; CHECK: NumSGPRsForWavesPerEU: 2{{$}}
; CHECK: NumVGPRsForWavesPerEU: 43
@var = addrspace(1) global float 0.0
define amdgpu_kernel void @min_1024_max_2048() #3 {
define amdgpu_kernel void @min_1024_max_1024() #3 {
%val0 = load volatile float, float addrspace(1)* @var
%val1 = load volatile float, float addrspace(1)* @var
%val2 = load volatile float, float addrspace(1)* @var
@ -127,7 +127,7 @@ define amdgpu_kernel void @min_1024_max_2048() #3 {
ret void
}
attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
; HSAMD: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
; HSAMD: Version: [ 1, 0 ]
@ -138,5 +138,5 @@ attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
; HSAMD: MaxFlatWorkGroupSize: 128
; HSAMD: - Name: min_128_max_128
; HSAMD: MaxFlatWorkGroupSize: 128
; HSAMD: - Name: min_1024_max_2048
; HSAMD: MaxFlatWorkGroupSize: 2048
; HSAMD: - Name: min_1024_max_1024
; HSAMD: MaxFlatWorkGroupSize: 1024

View File

@ -47,8 +47,9 @@ entry:
ret void
}
; SICI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1600 x [5 x i32]] undef, align 4
; GFX10: alloca [5 x i32]
; SI-NOT: @promote_alloca_size_1600.stack
; CI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4
; GFX10: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4
define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 {
entry:
@ -274,7 +275,7 @@ entry:
attributes #0 = { nounwind "amdgpu-flat-work-group-size"="63,63" }
attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="256,256" }
attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1600,1600" }
attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1024,1024" }
attributes #3 = { nounwind "amdgpu-waves-per-eu"="1,10" }
attributes #4 = { nounwind "amdgpu-waves-per-eu"="1,10" }
attributes #5 = { nounwind "amdgpu-waves-per-eu"="1,6" "amdgpu-flat-work-group-size"="64,64" }