mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
Revert r348971: [AMDGPU] Support for "uniform-work-group-size" attribute
This patch breaks RADV (and probably RadeonSI as well) llvm-svn: 349084
This commit is contained in:
parent
c25740dde7
commit
ee2b4d8ed1
@ -46,11 +46,8 @@ namespace {
|
||||
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
|
||||
private:
|
||||
const TargetMachine *TM = nullptr;
|
||||
SmallVector<CallGraphNode*, 8> NodeList;
|
||||
|
||||
bool addFeatureAttributes(Function &F);
|
||||
bool processUniformWorkGroupAttribute();
|
||||
bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
@ -189,6 +186,7 @@ static bool handleAttr(Function &Parent, const Function &Callee,
|
||||
Parent.addFnAttr(Name);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -215,56 +213,6 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
|
||||
handleAttr(Parent, Callee, AttrName);
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
|
||||
bool Changed = false;
|
||||
|
||||
for (auto *Node : reverse(NodeList)) {
|
||||
Function *Caller = Node->getFunction();
|
||||
|
||||
for (auto I : *Node) {
|
||||
Function *Callee = std::get<1>(I)->getFunction();
|
||||
if (Callee)
|
||||
Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
|
||||
Function &Caller, Function &Callee) {
|
||||
|
||||
// Check for externally defined function
|
||||
if (!Callee.hasExactDefinition()) {
|
||||
Callee.addFnAttr("uniform-work-group-size", "false");
|
||||
if (!Caller.hasFnAttribute("uniform-work-group-size"))
|
||||
Caller.addFnAttr("uniform-work-group-size", "false");
|
||||
|
||||
return true;
|
||||
}
|
||||
// Check if the Caller has the attribute
|
||||
if (Caller.hasFnAttribute("uniform-work-group-size")) {
|
||||
// Check if the value of the attribute is true
|
||||
if (Caller.getFnAttribute("uniform-work-group-size")
|
||||
.getValueAsString().equals("true")) {
|
||||
// Propagate the attribute to the Callee, if it does not have it
|
||||
if (!Callee.hasFnAttribute("uniform-work-group-size")) {
|
||||
Callee.addFnAttr("uniform-work-group-size", "true");
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
Callee.addFnAttr("uniform-work-group-size", "false");
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
// If the attribute is absent, set it as false
|
||||
Caller.addFnAttr("uniform-work-group-size", "false");
|
||||
Callee.addFnAttr("uniform-work-group-size", "false");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
|
||||
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
|
||||
bool HasFlat = ST.hasFlatAddressSpace();
|
||||
@ -345,19 +293,15 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
|
||||
bool Changed = false;
|
||||
|
||||
for (CallGraphNode *I : SCC) {
|
||||
// Build a list of CallGraphNodes from most number of uses to least
|
||||
if (I->getNumReferences())
|
||||
NodeList.push_back(I);
|
||||
else
|
||||
processUniformWorkGroupAttribute();
|
||||
Module &M = SCC.getCallGraph().getModule();
|
||||
Triple TT(M.getTargetTriple());
|
||||
|
||||
Function *F = I->getFunction();
|
||||
// Add feature attributes
|
||||
bool Changed = false;
|
||||
for (CallGraphNode *I : SCC) {
|
||||
Function *F = I->getFunction();
|
||||
if (!F || F->isDeclaration())
|
||||
continue;
|
||||
|
||||
Changed |= addFeatureAttributes(*F);
|
||||
}
|
||||
|
||||
|
@ -683,9 +683,6 @@ void AMDGPUPassConfig::addIRPasses() {
|
||||
}
|
||||
|
||||
void AMDGPUPassConfig::addCodeGenPrepare() {
|
||||
if (TM->getTargetTriple().getArch() == Triple::amdgcn)
|
||||
addPass(createAMDGPUAnnotateKernelFeaturesPass());
|
||||
|
||||
if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
|
||||
EnableLowerKernelArguments)
|
||||
addPass(createAMDGPULowerKernelArgumentsPass());
|
||||
@ -773,6 +770,7 @@ bool GCNPassConfig::addPreISel() {
|
||||
|
||||
// FIXME: We need to run a pass to propagate the attributes when calls are
|
||||
// supported.
|
||||
addPass(createAMDGPUAnnotateKernelFeaturesPass());
|
||||
|
||||
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
|
||||
// regions formed by them.
|
||||
|
@ -244,52 +244,52 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define void @use_implicitarg_ptr() #16 {
|
||||
; HSA: define void @use_implicitarg_ptr() #15 {
|
||||
define void @use_implicitarg_ptr() #1 {
|
||||
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define void @func_indirect_use_implicitarg_ptr() #16 {
|
||||
; HSA: define void @func_indirect_use_implicitarg_ptr() #15 {
|
||||
define void @func_indirect_use_implicitarg_ptr() #1 {
|
||||
call void @use_implicitarg_ptr()
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: declare void @external.func() #17
|
||||
; HSA: declare void @external.func() #16
|
||||
declare void @external.func() #3
|
||||
|
||||
; HSA: define internal void @defined.func() #17 {
|
||||
; HSA: define internal void @defined.func() #16 {
|
||||
define internal void @defined.func() #3 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define void @func_call_external() #17 {
|
||||
; HSA: define void @func_call_external() #16 {
|
||||
define void @func_call_external() #3 {
|
||||
call void @external.func()
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define void @func_call_defined() #17 {
|
||||
; HSA: define void @func_call_defined() #16 {
|
||||
define void @func_call_defined() #3 {
|
||||
call void @defined.func()
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define void @func_call_asm() #18 {
|
||||
; HSA: define void @func_call_asm() #16 {
|
||||
define void @func_call_asm() #3 {
|
||||
call void asm sideeffect "", ""() #3
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define amdgpu_kernel void @kern_call_external() #19 {
|
||||
; HSA: define amdgpu_kernel void @kern_call_external() #17 {
|
||||
define amdgpu_kernel void @kern_call_external() #3 {
|
||||
call void @external.func()
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA: define amdgpu_kernel void @func_kern_defined() #19 {
|
||||
; HSA: define amdgpu_kernel void @func_kern_defined() #17 {
|
||||
define amdgpu_kernel void @func_kern_defined() #3 {
|
||||
call void @defined.func()
|
||||
ret void
|
||||
@ -301,22 +301,20 @@ attributes #2 = { nounwind "target-cpu"="gfx900" }
|
||||
attributes #3 = { nounwind }
|
||||
|
||||
; HSA: attributes #0 = { nounwind readnone speculatable }
|
||||
; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" }
|
||||
; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" }
|
||||
; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" }
|
||||
; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" }
|
||||
; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" }
|
||||
; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" }
|
||||
; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" }
|
||||
; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" }
|
||||
; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" }
|
||||
; HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
|
||||
; HSA: attributes #11 = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #11 = { nounwind "target-cpu"="fiji" }
|
||||
; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" }
|
||||
; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
|
||||
; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
|
||||
; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
|
||||
; HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #18 = { nounwind }
|
||||
; HSA: attributes #19 = { nounwind "amdgpu-flat-scratch" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #16 = { nounwind }
|
||||
; HSA: attributes #17 = { nounwind "amdgpu-flat-scratch" }
|
||||
|
@ -1,18 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
|
||||
; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
|
||||
|
||||
; CHECK: define void @foo() #[[FOO:[0-9]+]] {
|
||||
define void @foo() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel1() #[[FOO]] {
|
||||
define amdgpu_kernel void @kernel1() #1 {
|
||||
call void @foo()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
|
||||
; CHECK: attributes #[[FOO]] = { "uniform-work-group-size"="false" }
|
@ -1,24 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
|
||||
; Test to verify if the attribute gets propagated across nested function calls
|
||||
|
||||
; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
|
||||
define void @func1() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define void @func2() #[[FUNC]] {
|
||||
define void @func2() #1 {
|
||||
call void @func1()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] {
|
||||
define amdgpu_kernel void @kernel3() #2 {
|
||||
call void @func2()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #2 = { "uniform-work-group-size"="true" }
|
||||
|
||||
; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }
|
@ -1,25 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
|
||||
; Two kernels with different values of the uniform-work-group-attribute call the same function
|
||||
|
||||
; CHECK: define void @func() #[[FUNC:[0-9]+]] {
|
||||
define void @func() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
|
||||
define amdgpu_kernel void @kernel1() #1 {
|
||||
call void @func()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel2() #[[FUNC]] {
|
||||
define amdgpu_kernel void @kernel2() #2 {
|
||||
call void @func()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #1 = { "uniform-work-group-size"="true" }
|
||||
|
||||
; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="true" }
|
@ -1,33 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
|
||||
; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it
|
||||
; CHECK: define void @func() #[[FUNC:[0-9]+]] {
|
||||
define void @func() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
|
||||
define amdgpu_kernel void @kernel1() #1 {
|
||||
call void @func()
|
||||
ret void
|
||||
}
|
||||
|
||||
; External declaration of a function
|
||||
; CHECK: define weak_odr void @weak_func() #[[FUNC]] {
|
||||
define weak_odr void @weak_func() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] {
|
||||
define amdgpu_kernel void @kernel2() #2 {
|
||||
call void @weak_func()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { "uniform-work-group-size"="false" }
|
||||
attributes #2 = { "uniform-work-group-size"="true" }
|
||||
|
||||
; CHECK: attributes #[[FUNC]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[KERNEL2]] = { "uniform-work-group-size"="true" }
|
@ -1,37 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
|
||||
; Test to ensure recursive functions exhibit proper behaviour
|
||||
; Test to generate fibonacci numbers
|
||||
|
||||
; CHECK: define i32 @fib(i32 %n) #[[FIB:[0-9]+]] {
|
||||
define i32 @fib(i32 %n) #0 {
|
||||
%cmp1 = icmp eq i32 %n, 0
|
||||
br i1 %cmp1, label %exit, label %cont1
|
||||
|
||||
cont1:
|
||||
%cmp2 = icmp eq i32 %n, 1
|
||||
br i1 %cmp2, label %exit, label %cont2
|
||||
|
||||
cont2:
|
||||
%nm1 = sub i32 %n, 1
|
||||
%fibm1 = call i32 @fib(i32 %nm1)
|
||||
%nm2 = sub i32 %n, 2
|
||||
%fibm2 = call i32 @fib(i32 %nm2)
|
||||
%retval = add i32 %fibm1, %fibm2
|
||||
|
||||
ret i32 %retval
|
||||
|
||||
exit:
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[FIB]] {
|
||||
define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
|
||||
%r = call i32 @fib(i32 5)
|
||||
store i32 %r, i32 addrspace(1)* %m
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #1 = { "uniform-work-group-size"="true" }
|
||||
|
||||
; CHECK: attributes #[[FIB]] = { "uniform-work-group-size"="true" }
|
@ -1,35 +0,0 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
|
||||
; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
|
||||
define void @func1() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define void @func4() #[[FUNC]] {
|
||||
define void @func4() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define void @func2() #[[FUNC]] {
|
||||
define void @func2() #1 {
|
||||
call void @func4()
|
||||
call void @func1()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define void @func3() #[[FUNC]] {
|
||||
define void @func3() #1 {
|
||||
call void @func1()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC]] {
|
||||
define amdgpu_kernel void @kernel3() #2 {
|
||||
call void @func2()
|
||||
call void @func3()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #2 = { "uniform-work-group-size"="true" }
|
||||
|
||||
; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }
|
Loading…
Reference in New Issue
Block a user