1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[LoopFusion] Move instructions from FC1.GuardBlock to FC0.GuardBlock and

from FC0.ExitBlock to FC1.ExitBlock when proven safe.

Summary:
Currently LoopFusion give up when the second loop nest guard
block or the first loop nest exit block is not empty. For example:

if (0 < N) {
  for (int i = 0; i < N; ++i) {}
  x+=1;
}
y+=1;
if (0 < N) {
  for (int i = 0; i < N; ++i) {}
}
The above example should be safe to fuse.
This PR moves instructions in FC1 guard block (e.g. y+=1;) to
FC0 guard block, or instructions in FC0 exit block (e.g. x+=1;) to
FC1 exit block, which then LoopFusion is able to fuse them.
Reviewer: kbarton, jdoerfert, Meinersbur, dmgreen, fhahn, hfinkel,
bmahjour, etiotto
Reviewed By: jdoerfert
Subscribers: hiraditya, llvm-commits
Tag: LLVM
Differential Revision: https://reviews.llvm.org/D73641
This commit is contained in:
Whitney Tsang 2020-01-30 03:57:50 +00:00
parent 402abf7a14
commit 7242e653d2
3 changed files with 241 additions and 52 deletions

View File

@ -91,8 +91,10 @@ STATISTIC(
"Loop has a non-empty preheader with instructions that cannot be moved");
STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
STATISTIC(NonIdenticalGuards, "Candidates have different guards");
STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block");
STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block");
STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block with "
"instructions that cannot be moved");
STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block with "
"instructions that cannot be moved");
STATISTIC(NotRotated, "Candidate is not rotated");
enum FusionDependenceAnalysisChoice {
@ -750,25 +752,30 @@ private:
continue;
}
// The following two checks look for empty blocks in FC0 and FC1. If
// any of these blocks are non-empty, we do not fuse. This is done
// because we currently do not have the safety checks to determine if
// it is safe to move the blocks past other blocks in the loop. Once
// these checks are added, these conditions can be relaxed.
if (FC0->GuardBranch && !isEmptyExitBlock(*FC0)) {
LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty exit "
"block. Not fusing.\n");
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
NonEmptyExitBlock);
continue;
}
if (FC0->GuardBranch) {
assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
if (FC1->GuardBranch && !isEmptyGuardBlock(*FC1)) {
LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty guard "
"block. Not fusing.\n");
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
NonEmptyGuardBlock);
continue;
if (!isSafeToMoveBefore(*FC0->ExitBlock,
*FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
PDT, DI)) {
LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
"instructions in exit block. Not fusing.\n");
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
NonEmptyExitBlock);
continue;
}
if (!isSafeToMoveBefore(
*FC1->GuardBranch->getParent(),
*FC0->GuardBranch->getParent()->getTerminator(), DT, PDT,
DI)) {
LLVM_DEBUG(dbgs()
<< "Fusion candidate contains unsafe "
"instructions in guard block. Not fusing.\n");
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
NonEmptyGuardBlock);
continue;
}
}
// Check the dependencies across the loops and do not fuse if it would
@ -1079,38 +1086,6 @@ private:
return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
}
/// Check that the guard for \p FC *only* contains the cmp/branch for the
/// guard.
/// Once we are able to handle intervening code, any code in the guard block
/// for FC1 will need to be treated as intervening code and checked whether
/// it can safely move around the loops.
bool isEmptyGuardBlock(const FusionCandidate &FC) const {
assert(FC.GuardBranch && "Expecting a fusion candidate with guard branch.");
if (auto *CmpInst = dyn_cast<Instruction>(FC.GuardBranch->getCondition())) {
auto *GuardBlock = FC.GuardBranch->getParent();
// If the generation of the cmp value is in GuardBlock, then the size of
// the guard block should be 2 (cmp + branch). If the generation of the
// cmp value is in a different block, then the size of the guard block
// should only be 1.
if (CmpInst->getParent() == GuardBlock)
return GuardBlock->size() == 2;
else
return GuardBlock->size() == 1;
}
return false;
}
bool isEmptyPreheader(const FusionCandidate &FC) const {
assert(FC.Preheader && "Expecting a valid preheader");
return FC.Preheader->size() == 1;
}
bool isEmptyExitBlock(const FusionCandidate &FC) const {
assert(FC.ExitBlock && "Expecting a valid exit block");
return FC.ExitBlock->size() == 1;
}
/// Simplify the condition of the latch branch of \p FC to true, when both of
/// its successors are the same.
void simplifyLatchBranch(const FusionCandidate &FC) const {
@ -1390,6 +1365,14 @@ private:
BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
// Move instructions from the exit block of FC0 to the beginning of the exit
// block of FC1.
moveInstructionsToTheBeginning(*FC0.ExitBlock, *FC1.ExitBlock, DT, PDT, DI);
// Move instructions from the guard block of FC1 to the end of the guard
// block of FC0.
moveInstructionsToTheEnd(*FC1GuardBlock, *FC0GuardBlock, DT, PDT, DI);
assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent");
SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;

View File

@ -213,6 +213,93 @@ for.second:
for.end:
ret void
}
; CHECK: remark: diagnostics_missed.c:67:3: [unsafe_exitblock]: for.first.preheader and for.second.preheader: Candidate has a non-empty exit block with instructions that cannot be moved
define void @unsafe_exitblock(i32* noalias %A, i32* noalias %B, i64 %N) {
for.first.guard:
%cmp3 = icmp slt i64 0, %N
br i1 %cmp3, label %for.first.preheader, label %for.second.guard
for.first.preheader:
br label %for.first, !dbg !83
for.first:
%i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
store i32 0, i32* %arrayidx, align 4
%inc = add nsw i64 %i.04, 1
%cmp = icmp slt i64 %inc, %N
br i1 %cmp, label %for.first, label %for.first.exit
for.first.exit:
call void @bar()
br label %for.second.guard
for.second.guard:
%cmp21 = icmp slt i64 0, %N
br i1 %cmp21, label %for.second.preheader, label %for.end
for.second.preheader:
br label %for.second
for.second:
%j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
store i32 0, i32* %arrayidx4, align 4
%inc6 = add nsw i64 %j.02, 1
%cmp2 = icmp slt i64 %inc6, %N
br i1 %cmp2, label %for.second, label %for.second.exit
for.second.exit:
br label %for.end
for.end:
ret void
}
; CHECK: remark: diagnostics_missed.c:72:3: [unsafe_guardblock]: for.first.preheader and for.second.preheader: Candidate has a non-empty guard block with instructions that cannot be moved
define void @unsafe_guardblock(i32* noalias %A, i32* noalias %B, i64 %N) {
for.first.guard:
%cmp3 = icmp slt i64 0, %N
br i1 %cmp3, label %for.first.preheader, label %for.second.guard
for.first.preheader:
br label %for.first, !dbg !86
for.first:
%i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
store i32 0, i32* %arrayidx, align 4
%inc = add nsw i64 %i.04, 1
%cmp = icmp slt i64 %inc, %N
br i1 %cmp, label %for.first, label %for.first.exit
for.first.exit:
br label %for.second.guard
for.second.guard:
call void @bar()
%cmp21 = icmp slt i64 0, %N
br i1 %cmp21, label %for.second.preheader, label %for.end
for.second.preheader:
br label %for.second
for.second:
%j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
store i32 0, i32* %arrayidx4, align 4
%inc6 = add nsw i64 %j.02, 1
%cmp2 = icmp slt i64 %inc6, %N
br i1 %cmp2, label %for.second, label %for.second.exit
for.second.exit:
br label %for.end
for.end:
ret void
}
declare void @bar()
attributes #0 = { nounwind readnone speculatable willreturn }
@ -301,3 +388,9 @@ attributes #0 = { nounwind readnone speculatable willreturn }
!78 = !{}
!79 = distinct !DILexicalBlock(scope: !77, file: !3, line: 3, column: 5)
!80 = !DILocation(line: 62, column: 3, scope: !79)
!81 = distinct !DISubprogram(name: "unsafe_exitblock", scope: !3, file: !3, line: 65, type: !15, scopeLine: 60, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !78)
!82 = distinct !DILexicalBlock(scope: !81, file: !3, line: 3, column: 5)
!83 = !DILocation(line: 67, column: 3, scope: !82)
!84 = distinct !DISubprogram(name: "unsafe_guardblock", scope: !3, file: !3, line: 70, type: !15, scopeLine: 60, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !78)
!85 = distinct !DILexicalBlock(scope: !84, file: !3, line: 3, column: 5)
!86 = !DILocation(line: 72, column: 3, scope: !85)

View File

@ -119,3 +119,116 @@ for.second.exit:
for.end:
ret void
}
; Test that `%add` is moved in for.second.exit, and the two loops for.first
; and for.second are fused.
; CHECK: void @moveinsts_exitblock
; CHECK-LABEL: for.first.guard:
; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
; CHECK-LABEL: for.first.preheader:
; CHECK-NEXT: br label %for.first
; CHECK-LABEL: for.first:
; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit
; CHECK-LABEL: for.second.exit:
; CHECK-NEXT: %add = add nsw i32 %x, 1
; CHECK-NEXT: br label %for.end
; CHECK-LABEL: for.end:
; CHECK-NEXT: ret void
define void @moveinsts_exitblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
for.first.guard:
%cmp.guard = icmp slt i64 0, %N
br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
for.first.preheader:
br label %for.first
for.first:
%i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
store i32 0, i32* %arrayidx, align 4
%inc = add nsw i64 %i.04, 1
%cmp = icmp slt i64 %inc, %N
br i1 %cmp, label %for.first, label %for.first.exit
for.first.exit:
%add = add nsw i32 %x, 1
br label %for.second.guard
for.second.guard:
br i1 %cmp.guard, label %for.second.preheader, label %for.end
for.second.preheader:
br label %for.second
for.second:
%j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
store i32 0, i32* %arrayidx4, align 4
%inc6 = add nsw i64 %j.02, 1
%cmp.j = icmp slt i64 %inc6, %N
br i1 %cmp.j, label %for.second, label %for.second.exit
for.second.exit:
br label %for.end
for.end:
ret void
}
; Test that `%add` is moved in for.first.guard, and the two loops for.first
; and for.second are fused.
; CHECK: void @moveinsts_guardblock
; CHECK-LABEL: for.first.guard:
; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N
; CHECK-NEXT: %add = add nsw i32 %x, 1
; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
; CHECK-LABEL: for.first.preheader:
; CHECK-NEXT: br label %for.first
; CHECK-LABEL: for.first:
; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit
; CHECK-LABEL: for.second.exit:
; CHECK-NEXT: br label %for.end
; CHECK-LABEL: for.end:
; CHECK-NEXT: ret void
define void @moveinsts_guardblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
for.first.guard:
%cmp.guard = icmp slt i64 0, %N
br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
for.first.preheader:
br label %for.first
for.first:
%i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
store i32 0, i32* %arrayidx, align 4
%inc = add nsw i64 %i.04, 1
%cmp = icmp slt i64 %inc, %N
br i1 %cmp, label %for.first, label %for.first.exit
for.first.exit:
br label %for.second.guard
for.second.guard:
%add = add nsw i32 %x, 1
br i1 %cmp.guard, label %for.second.preheader, label %for.end
for.second.preheader:
br label %for.second
for.second:
%j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
store i32 0, i32* %arrayidx4, align 4
%inc6 = add nsw i64 %j.02, 1
%cmp.j = icmp slt i64 %inc6, %N
br i1 %cmp.j, label %for.second, label %for.second.exit
for.second.exit:
br label %for.end
for.end:
ret void
}