From 5cfd953988063d9003a503cdd371cecd3f226b94 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 27 Aug 2019 13:03:36 -0400 Subject: [PATCH] AMDGPU Reduce reported maximum group size to 1024 While some targets allow encoding 2048, this was never tested or supported. --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 3 ++- .../attr-amdgpu-flat-work-group-size-v3.ll | 18 +++++++++--------- .../AMDGPU/attr-amdgpu-flat-work-group-size.ll | 18 +++++++++--------- .../AMDGPU/large-work-group-promote-alloca.ll | 7 ++++--- 4 files changed, 24 insertions(+), 22 deletions(-) diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 7d27738bf6a..c72f93eb739 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -312,7 +312,8 @@ unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { } unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { - return 2048; + // Some subtargets allow encoding 2048, but this isn't tested or supported. + return 1024; } unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, diff --git a/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll b/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll index e57ce963e3c..d1191992956 100644 --- a/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll +++ b/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll @@ -34,13 +34,13 @@ entry: } attributes #2 = {"amdgpu-flat-work-group-size"="128,128"} -; CHECK-LABEL: {{^}}min_1024_max_2048 -; CHECK: SGPRBlocks: 1 -; CHECK: VGPRBlocks: 7 -; CHECK: NumSGPRsForWavesPerEU: 12 -; CHECK: NumVGPRsForWavesPerEU: 32 +; CHECK-LABEL: {{^}}min_1024_max_1024 +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 10 +; CHECK: NumSGPRsForWavesPerEU: 2{{$}} +; CHECK: NumVGPRsForWavesPerEU: 43 @var = addrspace(1) global float 0.0 -define amdgpu_kernel void @min_1024_max_2048() #3 { +define amdgpu_kernel void @min_1024_max_1024() #3 { %val0 = load volatile float, float addrspace(1)* @var %val1 = load volatile float, float addrspace(1)* @var %val2 = load volatile float, float addrspace(1)* @var @@ -127,7 +127,7 @@ define amdgpu_kernel void @min_1024_max_2048() #3 { ret void } -attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"} +attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"} ; CHECK: amdhsa.kernels: ; CHECK: .max_flat_workgroup_size: 64 @@ -136,8 +136,8 @@ attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"} ; CHECK: .name: min_64_max_128 ; CHECK: .max_flat_workgroup_size: 128 ; CHECK: .name: min_128_max_128 -; CHECK: .max_flat_workgroup_size: 2048 -; CHECK: .name: min_1024_max_2048 +; CHECK: .max_flat_workgroup_size: 1024 +; CHECK: .name: min_1024_max_1024 ; CHECK: amdhsa.version: ; CHECK: - 1 ; CHECK: - 0 diff --git a/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll b/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll index d51e2d6e938..f372fcb4266 100644 --- a/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll +++ b/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll @@ -34,13 +34,13 @@ entry: } attributes #2 = {"amdgpu-flat-work-group-size"="128,128"} -; CHECK-LABEL: {{^}}min_1024_max_2048 -; CHECK: SGPRBlocks: 1 -; CHECK: VGPRBlocks: 7 -; CHECK: NumSGPRsForWavesPerEU: 12 -; CHECK: NumVGPRsForWavesPerEU: 32 +; CHECK-LABEL: {{^}}min_1024_max_1024 +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 10 +; CHECK: NumSGPRsForWavesPerEU: 2{{$}} +; CHECK: NumVGPRsForWavesPerEU: 43 @var = addrspace(1) global float 0.0 -define amdgpu_kernel void @min_1024_max_2048() #3 { +define amdgpu_kernel void @min_1024_max_1024() #3 { %val0 = load volatile float, float addrspace(1)* @var %val1 = load volatile float, float addrspace(1)* @var %val2 = load volatile float, float addrspace(1)* @var @@ -127,7 +127,7 @@ define amdgpu_kernel void @min_1024_max_2048() #3 { ret void } -attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"} +attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"} ; HSAMD: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) ; HSAMD: Version: [ 1, 0 ] @@ -138,5 +138,5 @@ attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"} ; HSAMD: MaxFlatWorkGroupSize: 128 ; HSAMD: - Name: min_128_max_128 ; HSAMD: MaxFlatWorkGroupSize: 128 -; HSAMD: - Name: min_1024_max_2048 -; HSAMD: MaxFlatWorkGroupSize: 2048 +; HSAMD: - Name: min_1024_max_1024 +; HSAMD: MaxFlatWorkGroupSize: 1024 diff --git a/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll b/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll index 0702f4091c2..778e9ed9a8f 100644 --- a/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll +++ b/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll @@ -47,8 +47,9 @@ entry: ret void } -; SICI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1600 x [5 x i32]] undef, align 4 -; GFX10: alloca [5 x i32] +; SI-NOT: @promote_alloca_size_1600.stack +; CI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4 +; GFX10: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4 define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 { entry: @@ -274,7 +275,7 @@ entry: attributes #0 = { nounwind "amdgpu-flat-work-group-size"="63,63" } attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="256,256" } -attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1600,1600" } +attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1024,1024" } attributes #3 = { nounwind "amdgpu-waves-per-eu"="1,10" } attributes #4 = { nounwind "amdgpu-waves-per-eu"="1,10" } attributes #5 = { nounwind "amdgpu-waves-per-eu"="1,6" "amdgpu-flat-work-group-size"="64,64" }