mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
AMDGPU Reduce reported maximum group size to 1024
While some targets allow encoding 2048, this was never tested or supported.
This commit is contained in:
parent
a76fef4322
commit
5cfd953988
@ -312,7 +312,8 @@ unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
|
||||
}
|
||||
|
||||
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
|
||||
return 2048;
|
||||
// Some subtargets allow encoding 2048, but this isn't tested or supported.
|
||||
return 1024;
|
||||
}
|
||||
|
||||
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
|
||||
|
@ -34,13 +34,13 @@ entry:
|
||||
}
|
||||
attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
|
||||
|
||||
; CHECK-LABEL: {{^}}min_1024_max_2048
|
||||
; CHECK: SGPRBlocks: 1
|
||||
; CHECK: VGPRBlocks: 7
|
||||
; CHECK: NumSGPRsForWavesPerEU: 12
|
||||
; CHECK: NumVGPRsForWavesPerEU: 32
|
||||
; CHECK-LABEL: {{^}}min_1024_max_1024
|
||||
; CHECK: SGPRBlocks: 0
|
||||
; CHECK: VGPRBlocks: 10
|
||||
; CHECK: NumSGPRsForWavesPerEU: 2{{$}}
|
||||
; CHECK: NumVGPRsForWavesPerEU: 43
|
||||
@var = addrspace(1) global float 0.0
|
||||
define amdgpu_kernel void @min_1024_max_2048() #3 {
|
||||
define amdgpu_kernel void @min_1024_max_1024() #3 {
|
||||
%val0 = load volatile float, float addrspace(1)* @var
|
||||
%val1 = load volatile float, float addrspace(1)* @var
|
||||
%val2 = load volatile float, float addrspace(1)* @var
|
||||
@ -127,7 +127,7 @@ define amdgpu_kernel void @min_1024_max_2048() #3 {
|
||||
|
||||
ret void
|
||||
}
|
||||
attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
|
||||
attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
|
||||
|
||||
; CHECK: amdhsa.kernels:
|
||||
; CHECK: .max_flat_workgroup_size: 64
|
||||
@ -136,8 +136,8 @@ attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
|
||||
; CHECK: .name: min_64_max_128
|
||||
; CHECK: .max_flat_workgroup_size: 128
|
||||
; CHECK: .name: min_128_max_128
|
||||
; CHECK: .max_flat_workgroup_size: 2048
|
||||
; CHECK: .name: min_1024_max_2048
|
||||
; CHECK: .max_flat_workgroup_size: 1024
|
||||
; CHECK: .name: min_1024_max_1024
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK: - 1
|
||||
; CHECK: - 0
|
||||
|
@ -34,13 +34,13 @@ entry:
|
||||
}
|
||||
attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
|
||||
|
||||
; CHECK-LABEL: {{^}}min_1024_max_2048
|
||||
; CHECK: SGPRBlocks: 1
|
||||
; CHECK: VGPRBlocks: 7
|
||||
; CHECK: NumSGPRsForWavesPerEU: 12
|
||||
; CHECK: NumVGPRsForWavesPerEU: 32
|
||||
; CHECK-LABEL: {{^}}min_1024_max_1024
|
||||
; CHECK: SGPRBlocks: 0
|
||||
; CHECK: VGPRBlocks: 10
|
||||
; CHECK: NumSGPRsForWavesPerEU: 2{{$}}
|
||||
; CHECK: NumVGPRsForWavesPerEU: 43
|
||||
@var = addrspace(1) global float 0.0
|
||||
define amdgpu_kernel void @min_1024_max_2048() #3 {
|
||||
define amdgpu_kernel void @min_1024_max_1024() #3 {
|
||||
%val0 = load volatile float, float addrspace(1)* @var
|
||||
%val1 = load volatile float, float addrspace(1)* @var
|
||||
%val2 = load volatile float, float addrspace(1)* @var
|
||||
@ -127,7 +127,7 @@ define amdgpu_kernel void @min_1024_max_2048() #3 {
|
||||
|
||||
ret void
|
||||
}
|
||||
attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
|
||||
attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
|
||||
|
||||
; HSAMD: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
|
||||
; HSAMD: Version: [ 1, 0 ]
|
||||
@ -138,5 +138,5 @@ attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
|
||||
; HSAMD: MaxFlatWorkGroupSize: 128
|
||||
; HSAMD: - Name: min_128_max_128
|
||||
; HSAMD: MaxFlatWorkGroupSize: 128
|
||||
; HSAMD: - Name: min_1024_max_2048
|
||||
; HSAMD: MaxFlatWorkGroupSize: 2048
|
||||
; HSAMD: - Name: min_1024_max_1024
|
||||
; HSAMD: MaxFlatWorkGroupSize: 1024
|
||||
|
@ -47,8 +47,9 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; SICI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1600 x [5 x i32]] undef, align 4
|
||||
; GFX10: alloca [5 x i32]
|
||||
; SI-NOT: @promote_alloca_size_1600.stack
|
||||
; CI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4
|
||||
; GFX10: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4
|
||||
|
||||
define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 {
|
||||
entry:
|
||||
@ -274,7 +275,7 @@ entry:
|
||||
|
||||
attributes #0 = { nounwind "amdgpu-flat-work-group-size"="63,63" }
|
||||
attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="256,256" }
|
||||
attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1600,1600" }
|
||||
attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1024,1024" }
|
||||
attributes #3 = { nounwind "amdgpu-waves-per-eu"="1,10" }
|
||||
attributes #4 = { nounwind "amdgpu-waves-per-eu"="1,10" }
|
||||
attributes #5 = { nounwind "amdgpu-waves-per-eu"="1,6" "amdgpu-flat-work-group-size"="64,64" }
|
||||
|
Loading…
Reference in New Issue
Block a user