1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[LV] Fix-up external IV users after updating dominator tree

This patch delays the fix-up step for external induction variable users until
after the dominator tree has been properly updated. This should fix PR30742.
The SCEVExpander in InductionDescriptor::transform can generate code in the
wrong location if the dominator tree is not up-to-date. We should work towards
keeping the dominator tree up-to-date throughout the transformation.

Reference: https://llvm.org/bugs/show_bug.cgi?id=30742
Differential Revision: https://reviews.llvm.org/D28168

llvm-svn: 291462
This commit is contained in:
Matthew Simpson 2017-01-09 19:05:29 +00:00
parent 4872e0b42c
commit d47c4174f6
2 changed files with 65 additions and 7 deletions

View File

@ -783,6 +783,10 @@ protected:
// Similarly, we create a new latch condition when setting up the structure
// of the new loop, so the old one can become dead.
SmallPtrSet<Instruction *, 4> DeadInstructions;
// Holds the end values for each induction variable. We save the end values
// so we can later fix-up the external users of the induction variables.
DenseMap<PHINode *, Value *> IVEndValues;
};
class InnerLoopUnroller : public InnerLoopVectorizer {
@ -3417,7 +3421,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
// Create phi nodes to merge from the backedge-taken check block.
PHINode *BCResumeVal = PHINode::Create(
OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
Value *EndValue;
Value *&EndValue = IVEndValues[OrigPhi];
if (OrigPhi == OldInduction) {
// We know what the end value is.
EndValue = CountRoundDown;
@ -3436,9 +3440,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
// or the value at the end of the vectorized loop.
BCResumeVal->addIncoming(EndValue, MiddleBlock);
// Fix up external users of the induction variable.
fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
@ -4109,11 +4110,23 @@ void InnerLoopVectorizer::vectorizeLoop() {
Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
} // end of for each Phi in PHIsToFix.
fixLCSSAPHIs();
// Make sure DomTree is updated.
// Update the dominator tree.
//
// FIXME: After creating the structure of the new loop, the dominator tree is
// no longer up-to-date, and it remains that way until we update it
// here. An out-of-date dominator tree is problematic for SCEV,
// because SCEVExpander uses it to guide code generation. The
// vectorizer use SCEVExpanders in several places. Instead, we should
// keep the dominator tree up-to-date as we go.
updateAnalysis();
// Fix-up external users of the induction variables.
for (auto &Entry : *Legal->getInductionVars())
fixupIVUsers(Entry.first, Entry.second,
getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
IVEndValues[Entry.first], LoopMiddleBlock);
fixLCSSAPHIs();
predicateInstructions();
// Remove redundant induction instructions.

View File

@ -133,3 +133,48 @@ for.end:
store i32 %phi2, i32* %p
ret i32 %phi
}
; CHECK-LABEL: @PR30742
; CHECK: min.iters.checked
; CHECK: %[[N_MOD_VF:.+]] = urem i32 %[[T5:.+]], 2
; CHECK: %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
; CHECK: middle.block
; CHECK: %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]]
; CHECK: %[[T15:.+]] = add i32 %tmp03, -7
; CHECK: %[[T16:.+]] = shl i32 %[[N_MOD_VF]], 3
; CHECK: %[[T17:.+]] = add i32 %[[T15]], %[[T16]]
; CHECK: %[[T18:.+]] = shl i32 {{.*}}, 3
; CHECK: %ind.escape = sub i32 %[[T17]], %[[T18]]
; CHECK: br i1 %[[CMP]], label %BB3, label %scalar.ph
define void @PR30742() {
BB0:
br label %BB1
BB1:
%tmp00 = load i32, i32* undef, align 16
%tmp01 = sub i32 %tmp00, undef
%tmp02 = icmp slt i32 %tmp01, 1
%tmp03 = select i1 %tmp02, i32 1, i32 %tmp01
%tmp04 = add nsw i32 %tmp03, -7
br label %BB2
BB2:
%tmp05 = phi i32 [ %tmp04, %BB1 ], [ %tmp06, %BB2 ]
%tmp06 = add i32 %tmp05, -8
%tmp07 = icmp sgt i32 %tmp06, 0
br i1 %tmp07, label %BB2, label %BB3
BB3:
%tmp08 = phi i32 [ %tmp05, %BB2 ]
%tmp09 = sub i32 %tmp00, undef
%tmp10 = icmp slt i32 %tmp09, 1
%tmp11 = select i1 %tmp10, i32 1, i32 %tmp09
%tmp12 = add nsw i32 %tmp11, -7
br label %BB4
BB4:
%tmp13 = phi i32 [ %tmp12, %BB3 ], [ %tmp14, %BB4 ]
%tmp14 = add i32 %tmp13, -8
%tmp15 = icmp sgt i32 %tmp14, 0
br i1 %tmp15, label %BB4, label %BB1
}