mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[LoopFusion] Move instructions from FC1.GuardBlock to FC0.GuardBlock and
from FC0.ExitBlock to FC1.ExitBlock when proven safe. Summary: Currently LoopFusion give up when the second loop nest guard block or the first loop nest exit block is not empty. For example: if (0 < N) { for (int i = 0; i < N; ++i) {} x+=1; } y+=1; if (0 < N) { for (int i = 0; i < N; ++i) {} } The above example should be safe to fuse. This PR moves instructions in FC1 guard block (e.g. y+=1;) to FC0 guard block, or instructions in FC0 exit block (e.g. x+=1;) to FC1 exit block, which then LoopFusion is able to fuse them. Reviewer: kbarton, jdoerfert, Meinersbur, dmgreen, fhahn, hfinkel, bmahjour, etiotto Reviewed By: jdoerfert Subscribers: hiraditya, llvm-commits Tag: LLVM Differential Revision: https://reviews.llvm.org/D73641
This commit is contained in:
parent
402abf7a14
commit
7242e653d2
@ -91,8 +91,10 @@ STATISTIC(
|
||||
"Loop has a non-empty preheader with instructions that cannot be moved");
|
||||
STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
|
||||
STATISTIC(NonIdenticalGuards, "Candidates have different guards");
|
||||
STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block");
|
||||
STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block");
|
||||
STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block with "
|
||||
"instructions that cannot be moved");
|
||||
STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block with "
|
||||
"instructions that cannot be moved");
|
||||
STATISTIC(NotRotated, "Candidate is not rotated");
|
||||
|
||||
enum FusionDependenceAnalysisChoice {
|
||||
@ -750,25 +752,30 @@ private:
|
||||
continue;
|
||||
}
|
||||
|
||||
// The following two checks look for empty blocks in FC0 and FC1. If
|
||||
// any of these blocks are non-empty, we do not fuse. This is done
|
||||
// because we currently do not have the safety checks to determine if
|
||||
// it is safe to move the blocks past other blocks in the loop. Once
|
||||
// these checks are added, these conditions can be relaxed.
|
||||
if (FC0->GuardBranch && !isEmptyExitBlock(*FC0)) {
|
||||
LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty exit "
|
||||
"block. Not fusing.\n");
|
||||
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
|
||||
NonEmptyExitBlock);
|
||||
continue;
|
||||
}
|
||||
if (FC0->GuardBranch) {
|
||||
assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
|
||||
|
||||
if (FC1->GuardBranch && !isEmptyGuardBlock(*FC1)) {
|
||||
LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty guard "
|
||||
"block. Not fusing.\n");
|
||||
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
|
||||
NonEmptyGuardBlock);
|
||||
continue;
|
||||
if (!isSafeToMoveBefore(*FC0->ExitBlock,
|
||||
*FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
|
||||
PDT, DI)) {
|
||||
LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
|
||||
"instructions in exit block. Not fusing.\n");
|
||||
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
|
||||
NonEmptyExitBlock);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isSafeToMoveBefore(
|
||||
*FC1->GuardBranch->getParent(),
|
||||
*FC0->GuardBranch->getParent()->getTerminator(), DT, PDT,
|
||||
DI)) {
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "Fusion candidate contains unsafe "
|
||||
"instructions in guard block. Not fusing.\n");
|
||||
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
|
||||
NonEmptyGuardBlock);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the dependencies across the loops and do not fuse if it would
|
||||
@ -1079,38 +1086,6 @@ private:
|
||||
return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
|
||||
}
|
||||
|
||||
/// Check that the guard for \p FC *only* contains the cmp/branch for the
|
||||
/// guard.
|
||||
/// Once we are able to handle intervening code, any code in the guard block
|
||||
/// for FC1 will need to be treated as intervening code and checked whether
|
||||
/// it can safely move around the loops.
|
||||
bool isEmptyGuardBlock(const FusionCandidate &FC) const {
|
||||
assert(FC.GuardBranch && "Expecting a fusion candidate with guard branch.");
|
||||
if (auto *CmpInst = dyn_cast<Instruction>(FC.GuardBranch->getCondition())) {
|
||||
auto *GuardBlock = FC.GuardBranch->getParent();
|
||||
// If the generation of the cmp value is in GuardBlock, then the size of
|
||||
// the guard block should be 2 (cmp + branch). If the generation of the
|
||||
// cmp value is in a different block, then the size of the guard block
|
||||
// should only be 1.
|
||||
if (CmpInst->getParent() == GuardBlock)
|
||||
return GuardBlock->size() == 2;
|
||||
else
|
||||
return GuardBlock->size() == 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isEmptyPreheader(const FusionCandidate &FC) const {
|
||||
assert(FC.Preheader && "Expecting a valid preheader");
|
||||
return FC.Preheader->size() == 1;
|
||||
}
|
||||
|
||||
bool isEmptyExitBlock(const FusionCandidate &FC) const {
|
||||
assert(FC.ExitBlock && "Expecting a valid exit block");
|
||||
return FC.ExitBlock->size() == 1;
|
||||
}
|
||||
|
||||
/// Simplify the condition of the latch branch of \p FC to true, when both of
|
||||
/// its successors are the same.
|
||||
void simplifyLatchBranch(const FusionCandidate &FC) const {
|
||||
@ -1390,6 +1365,14 @@ private:
|
||||
BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
|
||||
BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
|
||||
|
||||
// Move instructions from the exit block of FC0 to the beginning of the exit
|
||||
// block of FC1.
|
||||
moveInstructionsToTheBeginning(*FC0.ExitBlock, *FC1.ExitBlock, DT, PDT, DI);
|
||||
|
||||
// Move instructions from the guard block of FC1 to the end of the guard
|
||||
// block of FC0.
|
||||
moveInstructionsToTheEnd(*FC1GuardBlock, *FC0GuardBlock, DT, PDT, DI);
|
||||
|
||||
assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent");
|
||||
|
||||
SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
|
||||
|
@ -213,6 +213,93 @@ for.second:
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: remark: diagnostics_missed.c:67:3: [unsafe_exitblock]: for.first.preheader and for.second.preheader: Candidate has a non-empty exit block with instructions that cannot be moved
|
||||
define void @unsafe_exitblock(i32* noalias %A, i32* noalias %B, i64 %N) {
|
||||
for.first.guard:
|
||||
%cmp3 = icmp slt i64 0, %N
|
||||
br i1 %cmp3, label %for.first.preheader, label %for.second.guard
|
||||
|
||||
for.first.preheader:
|
||||
br label %for.first, !dbg !83
|
||||
|
||||
for.first:
|
||||
%i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
|
||||
store i32 0, i32* %arrayidx, align 4
|
||||
%inc = add nsw i64 %i.04, 1
|
||||
%cmp = icmp slt i64 %inc, %N
|
||||
br i1 %cmp, label %for.first, label %for.first.exit
|
||||
|
||||
for.first.exit:
|
||||
call void @bar()
|
||||
br label %for.second.guard
|
||||
|
||||
for.second.guard:
|
||||
%cmp21 = icmp slt i64 0, %N
|
||||
br i1 %cmp21, label %for.second.preheader, label %for.end
|
||||
|
||||
for.second.preheader:
|
||||
br label %for.second
|
||||
|
||||
for.second:
|
||||
%j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
|
||||
store i32 0, i32* %arrayidx4, align 4
|
||||
%inc6 = add nsw i64 %j.02, 1
|
||||
%cmp2 = icmp slt i64 %inc6, %N
|
||||
br i1 %cmp2, label %for.second, label %for.second.exit
|
||||
|
||||
for.second.exit:
|
||||
br label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: remark: diagnostics_missed.c:72:3: [unsafe_guardblock]: for.first.preheader and for.second.preheader: Candidate has a non-empty guard block with instructions that cannot be moved
|
||||
define void @unsafe_guardblock(i32* noalias %A, i32* noalias %B, i64 %N) {
|
||||
for.first.guard:
|
||||
%cmp3 = icmp slt i64 0, %N
|
||||
br i1 %cmp3, label %for.first.preheader, label %for.second.guard
|
||||
|
||||
for.first.preheader:
|
||||
br label %for.first, !dbg !86
|
||||
|
||||
for.first:
|
||||
%i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
|
||||
store i32 0, i32* %arrayidx, align 4
|
||||
%inc = add nsw i64 %i.04, 1
|
||||
%cmp = icmp slt i64 %inc, %N
|
||||
br i1 %cmp, label %for.first, label %for.first.exit
|
||||
|
||||
for.first.exit:
|
||||
br label %for.second.guard
|
||||
|
||||
for.second.guard:
|
||||
call void @bar()
|
||||
%cmp21 = icmp slt i64 0, %N
|
||||
br i1 %cmp21, label %for.second.preheader, label %for.end
|
||||
|
||||
for.second.preheader:
|
||||
br label %for.second
|
||||
|
||||
for.second:
|
||||
%j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
|
||||
store i32 0, i32* %arrayidx4, align 4
|
||||
%inc6 = add nsw i64 %j.02, 1
|
||||
%cmp2 = icmp slt i64 %inc6, %N
|
||||
br i1 %cmp2, label %for.second, label %for.second.exit
|
||||
|
||||
for.second.exit:
|
||||
br label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @bar()
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable willreturn }
|
||||
@ -301,3 +388,9 @@ attributes #0 = { nounwind readnone speculatable willreturn }
|
||||
!78 = !{}
|
||||
!79 = distinct !DILexicalBlock(scope: !77, file: !3, line: 3, column: 5)
|
||||
!80 = !DILocation(line: 62, column: 3, scope: !79)
|
||||
!81 = distinct !DISubprogram(name: "unsafe_exitblock", scope: !3, file: !3, line: 65, type: !15, scopeLine: 60, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !78)
|
||||
!82 = distinct !DILexicalBlock(scope: !81, file: !3, line: 3, column: 5)
|
||||
!83 = !DILocation(line: 67, column: 3, scope: !82)
|
||||
!84 = distinct !DISubprogram(name: "unsafe_guardblock", scope: !3, file: !3, line: 70, type: !15, scopeLine: 60, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !78)
|
||||
!85 = distinct !DILexicalBlock(scope: !84, file: !3, line: 3, column: 5)
|
||||
!86 = !DILocation(line: 72, column: 3, scope: !85)
|
||||
|
@ -119,3 +119,116 @@ for.second.exit:
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that `%add` is moved in for.second.exit, and the two loops for.first
|
||||
; and for.second are fused.
|
||||
|
||||
; CHECK: void @moveinsts_exitblock
|
||||
; CHECK-LABEL: for.first.guard:
|
||||
; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
|
||||
; CHECK-LABEL: for.first.preheader:
|
||||
; CHECK-NEXT: br label %for.first
|
||||
; CHECK-LABEL: for.first:
|
||||
; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit
|
||||
; CHECK-LABEL: for.second.exit:
|
||||
; CHECK-NEXT: %add = add nsw i32 %x, 1
|
||||
; CHECK-NEXT: br label %for.end
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
define void @moveinsts_exitblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
|
||||
for.first.guard:
|
||||
%cmp.guard = icmp slt i64 0, %N
|
||||
br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
|
||||
|
||||
for.first.preheader:
|
||||
br label %for.first
|
||||
|
||||
for.first:
|
||||
%i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
|
||||
store i32 0, i32* %arrayidx, align 4
|
||||
%inc = add nsw i64 %i.04, 1
|
||||
%cmp = icmp slt i64 %inc, %N
|
||||
br i1 %cmp, label %for.first, label %for.first.exit
|
||||
|
||||
for.first.exit:
|
||||
%add = add nsw i32 %x, 1
|
||||
br label %for.second.guard
|
||||
|
||||
for.second.guard:
|
||||
br i1 %cmp.guard, label %for.second.preheader, label %for.end
|
||||
|
||||
for.second.preheader:
|
||||
br label %for.second
|
||||
|
||||
for.second:
|
||||
%j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
|
||||
store i32 0, i32* %arrayidx4, align 4
|
||||
%inc6 = add nsw i64 %j.02, 1
|
||||
%cmp.j = icmp slt i64 %inc6, %N
|
||||
br i1 %cmp.j, label %for.second, label %for.second.exit
|
||||
|
||||
for.second.exit:
|
||||
br label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that `%add` is moved in for.first.guard, and the two loops for.first
|
||||
; and for.second are fused.
|
||||
|
||||
; CHECK: void @moveinsts_guardblock
|
||||
; CHECK-LABEL: for.first.guard:
|
||||
; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N
|
||||
; CHECK-NEXT: %add = add nsw i32 %x, 1
|
||||
; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
|
||||
; CHECK-LABEL: for.first.preheader:
|
||||
; CHECK-NEXT: br label %for.first
|
||||
; CHECK-LABEL: for.first:
|
||||
; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit
|
||||
; CHECK-LABEL: for.second.exit:
|
||||
; CHECK-NEXT: br label %for.end
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
define void @moveinsts_guardblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
|
||||
for.first.guard:
|
||||
%cmp.guard = icmp slt i64 0, %N
|
||||
br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
|
||||
|
||||
for.first.preheader:
|
||||
br label %for.first
|
||||
|
||||
for.first:
|
||||
%i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
|
||||
store i32 0, i32* %arrayidx, align 4
|
||||
%inc = add nsw i64 %i.04, 1
|
||||
%cmp = icmp slt i64 %inc, %N
|
||||
br i1 %cmp, label %for.first, label %for.first.exit
|
||||
|
||||
for.first.exit:
|
||||
br label %for.second.guard
|
||||
|
||||
for.second.guard:
|
||||
%add = add nsw i32 %x, 1
|
||||
br i1 %cmp.guard, label %for.second.preheader, label %for.end
|
||||
|
||||
for.second.preheader:
|
||||
br label %for.second
|
||||
|
||||
for.second:
|
||||
%j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
|
||||
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
|
||||
store i32 0, i32* %arrayidx4, align 4
|
||||
%inc6 = add nsw i64 %j.02, 1
|
||||
%cmp.j = icmp slt i64 %inc6, %N
|
||||
br i1 %cmp.j, label %for.second, label %for.second.exit
|
||||
|
||||
for.second.exit:
|
||||
br label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user