From 413b267e1e98e503ba89fdd62ebd72df77257857 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 7 May 2020 14:58:05 -0400 Subject: [PATCH] Verifier: Disallow byval and similar for AMDGPU calling conventions These imply stack-like semantics, which doesn't make any sense for entry points. --- lib/IR/Verifier.cpp | 11 +++ .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 5 +- .../DivergenceAnalysis/AMDGPU/kernel-args.ll | 14 ++-- .../AMDGPU/kernel-args.ll | 12 ++-- test/Verifier/amdgpu-cc.ll | 72 ++++++++++++++++++- 5 files changed, 94 insertions(+), 20 deletions(-) diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index edd822bfaa6..f638ed7040b 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -2313,6 +2313,17 @@ void Verifier::visitFunction(const Function &F) { case CallingConv::AMDGPU_CS: Assert(!F.hasStructRetAttr(), "Calling convention does not allow sret", &F); + if (F.getCallingConv() != CallingConv::SPIR_KERNEL) { + for (unsigned i = 0, e = F.arg_size(); i != e; ++i) { + Assert(!Attrs.hasParamAttribute(i, Attribute::ByVal), + "Calling convention disallows byval", &F); + Assert(!Attrs.hasParamAttribute(i, Attribute::Preallocated), + "Calling convention disallows preallocated", &F); + Assert(!Attrs.hasParamAttribute(i, Attribute::InAlloca), + "Calling convention disallows inalloca", &F); + } + } + LLVM_FALLTHROUGH; case CallingConv::Fast: case CallingConv::Cold: diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 9ca851c4d74..7998b0cb9f6 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -723,10 +723,9 @@ static bool isArgPassedInSGPR(const Argument *A) { case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: - // For non-compute shaders, SGPR inputs are marked with either inreg or byval. + // For non-compute shaders, SGPR inputs are marked with either inreg. // Everything else is in VGPRs. - return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || - F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); + return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg); default: // TODO: Should calls support inreg for SGPR inputs? return false; diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll index d7e050de1f3..85712e63d5f 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll @@ -1,16 +1,14 @@ ; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps': -; CHECK: DIVERGENT: -; CHECK-NOT: %arg0 -; CHECK-NOT: %arg1 -; CHECK-NOT: %arg2 -; CHECK: <2 x i32> %arg3 +; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0 +; CHECK-NOT: DIVERGENT +; CHECK: DIVERGENT: <2 x i32> %arg3 ; CHECK: DIVERGENT: <3 x i32> %arg4 ; CHECK: DIVERGENT: float %arg5 ; CHECK: DIVERGENT: i32 %arg6 -define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { +define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(4)* byref([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { ret void } @@ -22,7 +20,7 @@ define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(2)* byval %arg0, ; CHECK-NOT: %arg4 ; CHECK-NOT: %arg5 ; CHECK-NOT: %arg6 -define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { +define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(4)* byref([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { ret void } @@ -34,7 +32,7 @@ define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(2)* byva ; CHECK: DIVERGENT: ; CHECK: DIVERGENT: ; CHECK: DIVERGENT: -define void @test_c([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { +define void @test_c([4 x <16 x i8>] addrspace(5)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { ret void } diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll index ddf8e8e1f9b..7276634f225 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll @@ -1,16 +1,14 @@ ; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps': -; CHECK: DIVERGENT: -; CHECK-NOT: %arg0 -; CHECK-NOT: %arg1 -; CHECK-NOT: %arg2 -; CHECK: <2 x i32> %arg3 +; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0 +; CHECK-NOT: DIVERGENT +; CHECK: DIVERGENT: <2 x i32> %arg3 ; CHECK: DIVERGENT: <3 x i32> %arg4 ; CHECK: DIVERGENT: float %arg5 ; CHECK: DIVERGENT: i32 %arg6 -define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(4)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { +define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(4)* byref([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { ret void } @@ -22,7 +20,7 @@ define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(4)* byval %arg0, ; CHECK-NOT: %arg4 ; CHECK-NOT: %arg5 ; CHECK-NOT: %arg6 -define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(4)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { +define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(4)* byref([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { ret void } diff --git a/test/Verifier/amdgpu-cc.ll b/test/Verifier/amdgpu-cc.ll index 68c7f309b6e..25b8d9088ac 100644 --- a/test/Verifier/amdgpu-cc.ll +++ b/test/Verifier/amdgpu-cc.ll @@ -1,5 +1,7 @@ ; RUN: not llvm-as < %s 2>&1 | FileCheck %s +target datalayout = "A5" + ; CHECK: Calling convention requires void return type ; CHECK-NEXT: i32 ()* @nonvoid_cc_amdgpu_kernel define amdgpu_kernel i32 @nonvoid_cc_amdgpu_kernel() { @@ -13,8 +15,14 @@ define amdgpu_kernel void @varargs_amdgpu_kernel(...) { } ; CHECK: Calling convention does not allow sret -; CHECK-NEXT: void (i32*)* @sret_cc_amdgpu_kernel -define amdgpu_kernel void @sret_cc_amdgpu_kernel(i32* sret %ptr) { +; CHECK-NEXT: void (i32*)* @sret_cc_amdgpu_kernel_as0 +define amdgpu_kernel void @sret_cc_amdgpu_kernel_as0(i32* sret %ptr) { + ret void +} + +; CHECK: Calling convention does not allow sret +; CHECK-NEXT: void (i32 addrspace(5)*)* @sret_cc_amdgpu_kernel +define amdgpu_kernel void @sret_cc_amdgpu_kernel(i32 addrspace(5)* sret %ptr) { ret void } @@ -53,3 +61,63 @@ define spir_kernel i32 @nonvoid_cc_spir_kernel() { define spir_kernel void @varargs_spir_kernel(...) { ret void } + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: void (i32 addrspace(5)*)* @byval_cc_amdgpu_kernel +define amdgpu_kernel void @byval_cc_amdgpu_kernel(i32 addrspace(5)* byval %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: void (i32 addrspace(1)*)* @byval_as1_cc_amdgpu_kernel +define amdgpu_kernel void @byval_as1_cc_amdgpu_kernel(i32 addrspace(1)* byval %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: void (i32*)* @byval_as0_cc_amdgpu_kernel +define amdgpu_kernel void @byval_as0_cc_amdgpu_kernel(i32* byval %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: void (i32 addrspace(5)*)* @byval_cc_amdgpu_vs +define amdgpu_vs void @byval_cc_amdgpu_vs(i32 addrspace(5)* byval %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: void (i32 addrspace(5)*)* @byval_cc_amdgpu_hs +define amdgpu_hs void @byval_cc_amdgpu_hs(i32 addrspace(5)* byval %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: void (i32 addrspace(5)*)* @byval_cc_amdgpu_gs +define amdgpu_gs void @byval_cc_amdgpu_gs(i32 addrspace(5)* byval %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: void (i32 addrspace(5)*)* @byval_cc_amdgpu_ps +define amdgpu_ps void @byval_cc_amdgpu_ps(i32 addrspace(5)* byval %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: void (i32 addrspace(5)*)* @byval_cc_amdgpu_cs +define amdgpu_cs void @byval_cc_amdgpu_cs(i32 addrspace(5)* byval %ptr) { + ret void +} + +; CHECK: Calling convention disallows preallocated +; CHECK-NEXT: void (i32*)* @preallocated_as0_cc_amdgpu_kernel +define amdgpu_kernel void @preallocated_as0_cc_amdgpu_kernel(i32* preallocated(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows inalloca +; CHECK-NEXT: void (i32*)* @inalloca_as0_cc_amdgpu_kernel +define amdgpu_kernel void @inalloca_as0_cc_amdgpu_kernel(i32* inalloca %ptr) { + ret void +}