mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU: Fix AMDGPUUnifyDivergentExitNodes with no normal returns
Summary: The code was assuming in a few places that if there was only one exit from the function that it was a normal return, which is invalid. It could be an infinite loop, in which case we still need to insert the usual fake edge so that the null export happens. This fixes shaders that end with an infinite loop that discards. Reviewers: arsenm, nhaehnle, critson Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71192
This commit is contained in:
parent
b3f994414f
commit
f758df6c01
@ -195,8 +195,6 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
|
||||
|
||||
bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
|
||||
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
|
||||
if (PDT.getRoots().size() <= 1)
|
||||
return false;
|
||||
|
||||
LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
|
||||
|
||||
@ -321,7 +319,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
|
||||
if (ReturningBlocks.empty())
|
||||
return false; // No blocks return
|
||||
|
||||
if (ReturningBlocks.size() == 1)
|
||||
if (ReturningBlocks.size() == 1 && !InsertExport)
|
||||
return false; // Already has a single return block
|
||||
|
||||
const TargetTransformInfo &TTI
|
||||
|
@ -45,6 +45,22 @@ end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; test the case where there's only a kill in an infinite loop
|
||||
; CHECK-LABEL: only_kill
|
||||
; CHECK: exp null off, off, off, off done vm
|
||||
; CHECK-NEXT: s_endpgm
|
||||
; SIInsertSkips inserts an extra null export here, but it should be harmless.
|
||||
; CHECK: exp null off, off, off, off done vm
|
||||
; CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @only_kill() #0 {
|
||||
main_body:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
call void @llvm.amdgcn.kill(i1 false) #3
|
||||
br label %loop
|
||||
}
|
||||
|
||||
; In case there's an epilog, we shouldn't have to do this.
|
||||
; CHECK-LABEL: return_nonvoid
|
||||
; CHECK-NOT: exp null off, off, off, off done vm
|
||||
|
Loading…
Reference in New Issue
Block a user