1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[Loop Peeling] Fix idom detection algorithm.

We'd like to determine the idom of exit block after peeling one iteration.
Let Exit is exit block.
Let ExitingSet - is a set of predecessors of Exit block. They are exiting blocks.
Let Latch' and ExitingSet' are copies after a peeling.
We'd like to find an idom'(Exit) - idom of Exit after peeling.
It is an evident that idom'(Exit) will be the nearest common dominator of ExitingSet and ExitingSet'.
idom(Exit) is a nearest common dominator of ExitingSet.
idom(Exit)' is a nearest common dominator of ExitingSet'.
Taking into account that we have a single Latch, Latch' will dominate Header and idom(Exit).
So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
All these basic blocks are in the same loop, so what we find is
(nearest common dominator of idom(Exit) and Latch)'.

Reviewers: reames, fhahn
Reviewed By: reames
Subscribers: hiraditya, zzheng, llvm-commits
Differential Revision: https://reviews.llvm.org/D65292

llvm-svn: 367044
This commit is contained in:
Serguei Katkov 2019-07-25 19:31:50 +00:00
parent 4bd434d1b0
commit 2f8bbfaa0b
2 changed files with 66 additions and 1 deletions

View File

@ -575,11 +575,30 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
if (DT) {
// We'd like to determine the idom of exit block after peeling one
// iteration.
// Let Exit is exit block.
// Let ExitingSet - is a set of predecessors of Exit block. They are exiting
// blocks.
// Let Latch' and ExitingSet' are copies after a peeling.
// We'd like to find an idom'(Exit) - idom of Exit after peeling.
// It is an evident that idom'(Exit) will be the nearest common dominator
// of ExitingSet and ExitingSet'.
// idom(Exit) is a nearest common dominator of ExitingSet.
// idom(Exit)' is a nearest common dominator of ExitingSet'.
// Taking into account that we have a single Latch, Latch' will dominate
// Header and idom(Exit).
// So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
// All these basic blocks are in the same loop, so what we find is
// (nearest common dominator of idom(Exit) and Latch)'.
// In the loop below we remember nearest common dominator of idom(Exit) and
// Latch to update idom of Exit later.
assert(L->hasDedicatedExits() && "No dedicated exits?");
for (auto Edge : ExitEdges) {
if (ExitIDom.count(Edge.second))
continue;
BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
BasicBlock *BB = DT->findNearestCommonDominator(
DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
assert(L->contains(BB) && "IDom is not in a loop");
ExitIDom[Edge.second] = BB;
}

View File

@ -0,0 +1,46 @@
; REQUIRES: asserts
; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s
; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require<profile-summary>,function(require<opt-remark-emit>,unroll)' 2>&1 | FileCheck %s
; Regression test for setting the correct idom for exit blocks.
; CHECK: Loop Unroll: F[basic]
; CHECK: PEELING loop %for.body with iteration count 1!
define i32 @basic(i32* %p, i32 %k, i1 %c1, i1 %c2) #0 !prof !3 {
entry:
br label %for.body
for.body:
%i.05 = phi i32 [ 0, %entry ], [ %inc, %latch ]
%p.addr.04 = phi i32* [ %p, %entry ], [ %incdec.ptr, %latch ]
%incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
store i32 %i.05, i32* %p.addr.04, align 4
%inc = add nsw i32 %i.05, 1
%cmp = icmp slt i32 %inc, %k
br i1 %c1, label %left, label %right
left:
br label %latch
right:
br i1 %c1, label %latch, label %side_exit, !prof !2
latch:
br i1 %cmp, label %for.body, label %for.end, !prof !1
for.end:
ret i32 %inc
side_exit:
%rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %inc) ]
ret i32 %rval
}
declare i32 @llvm.experimental.deoptimize.i32(...)
attributes #0 = { nounwind }
!1 = !{!"branch_weights", i32 1, i32 1}
!2 = !{!"branch_weights", i32 1, i32 0}
!3 = !{!"function_entry_count", i64 1}