1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[AMDGPU] Support for "uniform-work-group-size" attribute

Updated the annotate-kernel-features pass to support the propagation of uniform-work-group attribute from the kernel to the called functions. Once this pass is run, all kernels, even the ones which initially did not have the attribute, will be able to indicate weather or not they have uniform work group size depending on the value of the attribute. 

Differential Revision: https://reviews.llvm.org/D50200

llvm-svn: 348971
This commit is contained in:
Aakanksha Patil 2018-12-12 20:49:17 +00:00
parent 93b0314296
commit 57ff848c4d
9 changed files with 263 additions and 31 deletions

View File

@ -46,8 +46,11 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
private:
const TargetMachine *TM = nullptr;
SmallVector<CallGraphNode*, 8> NodeList;
bool addFeatureAttributes(Function &F);
bool processUniformWorkGroupAttribute();
bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
public:
static char ID;
@ -186,7 +189,6 @@ static bool handleAttr(Function &Parent, const Function &Callee,
Parent.addFnAttr(Name);
return true;
}
return false;
}
@ -213,6 +215,56 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
handleAttr(Parent, Callee, AttrName);
}
bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
bool Changed = false;
for (auto *Node : reverse(NodeList)) {
Function *Caller = Node->getFunction();
for (auto I : *Node) {
Function *Callee = std::get<1>(I)->getFunction();
if (Callee)
Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
}
}
return Changed;
}
bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
Function &Caller, Function &Callee) {
// Check for externally defined function
if (!Callee.hasExactDefinition()) {
Callee.addFnAttr("uniform-work-group-size", "false");
if (!Caller.hasFnAttribute("uniform-work-group-size"))
Caller.addFnAttr("uniform-work-group-size", "false");
return true;
}
// Check if the Caller has the attribute
if (Caller.hasFnAttribute("uniform-work-group-size")) {
// Check if the value of the attribute is true
if (Caller.getFnAttribute("uniform-work-group-size")
.getValueAsString().equals("true")) {
// Propagate the attribute to the Callee, if it does not have it
if (!Callee.hasFnAttribute("uniform-work-group-size")) {
Callee.addFnAttr("uniform-work-group-size", "true");
return true;
}
} else {
Callee.addFnAttr("uniform-work-group-size", "false");
return true;
}
} else {
// If the attribute is absent, set it as false
Caller.addFnAttr("uniform-work-group-size", "false");
Callee.addFnAttr("uniform-work-group-size", "false");
return true;
}
return false;
}
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
bool HasFlat = ST.hasFlatAddressSpace();
@ -293,15 +345,19 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
}
bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
Module &M = SCC.getCallGraph().getModule();
Triple TT(M.getTargetTriple());
bool Changed = false;
for (CallGraphNode *I : SCC) {
Function *F = I->getFunction();
// Build a list of CallGraphNodes from most number of uses to least
if (I->getNumReferences())
NodeList.push_back(I);
else
processUniformWorkGroupAttribute();
Function *F = I->getFunction();
// Add feature attributes
if (!F || F->isDeclaration())
continue;
Changed |= addFeatureAttributes(*F);
}

View File

@ -683,6 +683,9 @@ void AMDGPUPassConfig::addIRPasses() {
}
void AMDGPUPassConfig::addCodeGenPrepare() {
if (TM->getTargetTriple().getArch() == Triple::amdgcn)
addPass(createAMDGPUAnnotateKernelFeaturesPass());
if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
EnableLowerKernelArguments)
addPass(createAMDGPULowerKernelArgumentsPass());
@ -770,7 +773,6 @@ bool GCNPassConfig::addPreISel() {
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
addPass(createAMDGPUAnnotateKernelFeaturesPass());
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
// regions formed by them.

View File

@ -244,52 +244,52 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
ret void
}
; HSA: define void @use_implicitarg_ptr() #15 {
; HSA: define void @use_implicitarg_ptr() #16 {
define void @use_implicitarg_ptr() #1 {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
; HSA: define void @func_indirect_use_implicitarg_ptr() #15 {
; HSA: define void @func_indirect_use_implicitarg_ptr() #16 {
define void @func_indirect_use_implicitarg_ptr() #1 {
call void @use_implicitarg_ptr()
ret void
}
; HSA: declare void @external.func() #16
; HSA: declare void @external.func() #17
declare void @external.func() #3
; HSA: define internal void @defined.func() #16 {
; HSA: define internal void @defined.func() #17 {
define internal void @defined.func() #3 {
ret void
}
; HSA: define void @func_call_external() #16 {
; HSA: define void @func_call_external() #17 {
define void @func_call_external() #3 {
call void @external.func()
ret void
}
; HSA: define void @func_call_defined() #16 {
; HSA: define void @func_call_defined() #17 {
define void @func_call_defined() #3 {
call void @defined.func()
ret void
}
; HSA: define void @func_call_asm() #16 {
; HSA: define void @func_call_asm() #18 {
define void @func_call_asm() #3 {
call void asm sideeffect "", ""() #3
ret void
}
; HSA: define amdgpu_kernel void @kern_call_external() #17 {
; HSA: define amdgpu_kernel void @kern_call_external() #19 {
define amdgpu_kernel void @kern_call_external() #3 {
call void @external.func()
ret void
}
; HSA: define amdgpu_kernel void @func_kern_defined() #17 {
; HSA: define amdgpu_kernel void @func_kern_defined() #19 {
define amdgpu_kernel void @func_kern_defined() #3 {
call void @defined.func()
ret void
@ -301,20 +301,22 @@ attributes #2 = { nounwind "target-cpu"="gfx900" }
attributes #3 = { nounwind }
; HSA: attributes #0 = { nounwind readnone speculatable }
; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" }
; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" }
; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" }
; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" }
; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" }
; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" }
; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" }
; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" }
; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" }
; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
; HSA: attributes #11 = { nounwind "target-cpu"="fiji" }
; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" }
; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
; HSA: attributes #11 = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
; HSA: attributes #16 = { nounwind }
; HSA: attributes #17 = { nounwind "amdgpu-flat-scratch" }
; HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" }
; HSA: attributes #18 = { nounwind }
; HSA: attributes #19 = { nounwind "amdgpu-flat-scratch" "uniform-work-group-size"="false" }

View File

@ -0,0 +1,18 @@
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
; CHECK: define void @foo() #[[FOO:[0-9]+]] {
define void @foo() #0 {
ret void
}
; CHECK: define amdgpu_kernel void @kernel1() #[[FOO]] {
define amdgpu_kernel void @kernel1() #1 {
call void @foo()
ret void
}
attributes #0 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FOO]] = { "uniform-work-group-size"="false" }

View File

@ -0,0 +1,24 @@
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Test to verify if the attribute gets propagated across nested function calls
; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
define void @func1() #0 {
ret void
}
; CHECK: define void @func2() #[[FUNC]] {
define void @func2() #1 {
call void @func1()
ret void
}
; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] {
define amdgpu_kernel void @kernel3() #2 {
call void @func2()
ret void
}
attributes #2 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }

View File

@ -0,0 +1,25 @@
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Two kernels with different values of the uniform-work-group-attribute call the same function
; CHECK: define void @func() #[[FUNC:[0-9]+]] {
define void @func() #0 {
ret void
}
; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
define amdgpu_kernel void @kernel1() #1 {
call void @func()
ret void
}
; CHECK: define amdgpu_kernel void @kernel2() #[[FUNC]] {
define amdgpu_kernel void @kernel2() #2 {
call void @func()
ret void
}
attributes #1 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="false" }
; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="true" }

View File

@ -0,0 +1,33 @@
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it
; CHECK: define void @func() #[[FUNC:[0-9]+]] {
define void @func() #0 {
ret void
}
; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
define amdgpu_kernel void @kernel1() #1 {
call void @func()
ret void
}
; External declaration of a function
; CHECK: define weak_odr void @weak_func() #[[FUNC]] {
define weak_odr void @weak_func() #0 {
ret void
}
; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] {
define amdgpu_kernel void @kernel2() #2 {
call void @weak_func()
ret void
}
attributes #0 = { nounwind }
attributes #1 = { "uniform-work-group-size"="false" }
attributes #2 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FUNC]] = { nounwind "uniform-work-group-size"="false" }
; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="false" }
; CHECK: attributes #[[KERNEL2]] = { "uniform-work-group-size"="true" }

View File

@ -0,0 +1,37 @@
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Test to ensure recursive functions exhibit proper behaviour
; Test to generate fibonacci numbers
; CHECK: define i32 @fib(i32 %n) #[[FIB:[0-9]+]] {
define i32 @fib(i32 %n) #0 {
%cmp1 = icmp eq i32 %n, 0
br i1 %cmp1, label %exit, label %cont1
cont1:
%cmp2 = icmp eq i32 %n, 1
br i1 %cmp2, label %exit, label %cont2
cont2:
%nm1 = sub i32 %n, 1
%fibm1 = call i32 @fib(i32 %nm1)
%nm2 = sub i32 %n, 2
%fibm2 = call i32 @fib(i32 %nm2)
%retval = add i32 %fibm1, %fibm2
ret i32 %retval
exit:
ret i32 1
}
; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[FIB]] {
define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
%r = call i32 @fib(i32 5)
store i32 %r, i32 addrspace(1)* %m
ret void
}
attributes #1 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FIB]] = { "uniform-work-group-size"="true" }

View File

@ -0,0 +1,35 @@
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
define void @func1() #0 {
ret void
}
; CHECK: define void @func4() #[[FUNC]] {
define void @func4() #1 {
ret void
}
; CHECK: define void @func2() #[[FUNC]] {
define void @func2() #1 {
call void @func4()
call void @func1()
ret void
}
; CHECK: define void @func3() #[[FUNC]] {
define void @func3() #1 {
call void @func1()
ret void
}
; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC]] {
define amdgpu_kernel void @kernel3() #2 {
call void @func2()
call void @func3()
ret void
}
attributes #2 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }