mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
[SDA] Bug fix: Use IPD outside the loop as divergence bound
Summary: The immediate post dominator of the loop header may be part of the divergent loop. Since this /was/ the divergence propagation bound the SDA would not detect joins of divergent paths outside the loop. Reviewers: nhaehnle Reviewed By: nhaehnle Subscribers: mmasten, arsenm, jvesely, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59042 llvm-svn: 358681
This commit is contained in:
parent
dff6412af4
commit
b3249a7b08
@ -218,14 +218,9 @@ struct DivergencePropagator {
|
|||||||
template <typename SuccessorIterable>
|
template <typename SuccessorIterable>
|
||||||
std::unique_ptr<ConstBlockSet>
|
std::unique_ptr<ConstBlockSet>
|
||||||
computeJoinPoints(const BasicBlock &RootBlock,
|
computeJoinPoints(const BasicBlock &RootBlock,
|
||||||
SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
|
SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
|
||||||
assert(JoinBlocks);
|
assert(JoinBlocks);
|
||||||
|
|
||||||
// immediate post dominator (no join block beyond that block)
|
|
||||||
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
|
|
||||||
const auto *IpdNode = PdNode->getIDom();
|
|
||||||
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
|
|
||||||
|
|
||||||
// bootstrap with branch targets
|
// bootstrap with branch targets
|
||||||
for (const auto *SuccBlock : NodeSuccessors) {
|
for (const auto *SuccBlock : NodeSuccessors) {
|
||||||
DefMap.emplace(SuccBlock, SuccBlock);
|
DefMap.emplace(SuccBlock, SuccBlock);
|
||||||
@ -340,13 +335,23 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
|
|||||||
|
|
||||||
// already available in cache?
|
// already available in cache?
|
||||||
auto ItCached = CachedLoopExitJoins.find(&Loop);
|
auto ItCached = CachedLoopExitJoins.find(&Loop);
|
||||||
if (ItCached != CachedLoopExitJoins.end())
|
if (ItCached != CachedLoopExitJoins.end()) {
|
||||||
return *ItCached->second;
|
return *ItCached->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
// dont propagte beyond the immediate post dom of the loop
|
||||||
|
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
|
||||||
|
const auto *IpdNode = PdNode->getIDom();
|
||||||
|
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
|
||||||
|
while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
|
||||||
|
IpdNode = IpdNode->getIDom();
|
||||||
|
PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
// compute all join points
|
// compute all join points
|
||||||
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
|
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
|
||||||
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
|
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
|
||||||
*Loop.getHeader(), LoopExits, Loop.getParentLoop());
|
*Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
|
||||||
|
|
||||||
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
|
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
|
||||||
assert(ItInserted.second);
|
assert(ItInserted.second);
|
||||||
@ -365,11 +370,16 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
|
|||||||
if (ItCached != CachedBranchJoins.end())
|
if (ItCached != CachedBranchJoins.end())
|
||||||
return *ItCached->second;
|
return *ItCached->second;
|
||||||
|
|
||||||
|
// dont propagate beyond the immediate post dominator of the branch
|
||||||
|
const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
|
||||||
|
const auto *IpdNode = PdNode->getIDom();
|
||||||
|
const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
|
||||||
|
|
||||||
// compute all join points
|
// compute all join points
|
||||||
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
|
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
|
||||||
const auto &TermBlock = *Term.getParent();
|
const auto &TermBlock = *Term.getParent();
|
||||||
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
|
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
|
||||||
TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
|
TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
|
||||||
|
|
||||||
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
|
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
|
||||||
assert(ItInserted.second);
|
assert(ItInserted.second);
|
||||||
|
@ -21,6 +21,43 @@ merge:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 {
|
||||||
|
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd'
|
||||||
|
entry:
|
||||||
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||||
|
%cond.var = icmp slt i32 %tid, 0
|
||||||
|
; CHECK: DIVERGENT: %cond.var = icmp
|
||||||
|
%cond.uni = icmp slt i32 %n, 0
|
||||||
|
; CHECK-NOT: DIVERGENT: %cond.uni = icmp
|
||||||
|
br label %for.header
|
||||||
|
for.header:
|
||||||
|
br i1 %cond.var, label %A, label %B
|
||||||
|
A:
|
||||||
|
br label %C
|
||||||
|
B:
|
||||||
|
br label %C
|
||||||
|
C:
|
||||||
|
br i1 %cond.uni, label %E, label %D
|
||||||
|
D:
|
||||||
|
br i1 %cond.var, label %for.header, label %F
|
||||||
|
|
||||||
|
E:
|
||||||
|
%e.lcssa.uni = phi i32 [ 0, %C ]
|
||||||
|
; CHECK-NOT: DIVERGENT: %e.lcssa.uni = phi i32
|
||||||
|
br label %G
|
||||||
|
|
||||||
|
F:
|
||||||
|
%f.lcssa.uni = phi i32 [ 1, %D ]
|
||||||
|
; CHECK-NOT: DIVERGENT: %f.lcssa.uni = phi i32
|
||||||
|
br label %G
|
||||||
|
|
||||||
|
G:
|
||||||
|
%g.join.var = phi i32 [ %e.lcssa.uni, %E ], [ %f.lcssa.uni, %F ]
|
||||||
|
; CHECK: DIVERGENT: %g.join.var = phi i32
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||||
|
|
||||||
attributes #0 = { nounwind readnone }
|
attributes #0 = { nounwind readnone }
|
||||||
|
Loading…
Reference in New Issue
Block a user