[SCEV] Guard movement of insertion point for loop-invariants

This reinstates r347934, along with a tweak to address a problem with PHI node ordering that that commit created (or exposed). (That commit was reverted at r348426, due to the PHI node issue.) Original commit message: r320789 suppressed moving the insertion point of SCEV expressions with dev/rem operations to the loop header in non-loop-invariant situations. This, and similar, hoisting is also unsafe in the loop-invariant case, since there may be a guard against a zero denominator. This is an adjustment to the fix of r320789 to suppress the movement even in the loop-invariant case. This fixes PR30806. Differential Revision: https://reviews.llvm.org/D57428 llvm-svn: 356392
2024-10-19 02:52:53 +02:00 · 2019-03-18 18:52:35 +00:00 · 2019-03-18 18:52:35 +00:00 · d2b89688b2
commit d2b89688b2
parent badc888cad
3 changed files with 178 additions and 41 deletions
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@ -1731,49 +1731,55 @@ Value *SCEVExpander::expand(const SCEV *S) {
  // Compute an insertion point for this SCEV object. Hoist the instructions
  // as far out in the loop nest as possible.
  Instruction *InsertPt = &*Builder.GetInsertPoint();
-  for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
-       L = L->getParentLoop())
-    if (SE.isLoopInvariant(S, L)) {
-      if (!L) break;
-      if (BasicBlock *Preheader = L->getLoopPreheader())
-        InsertPt = Preheader->getTerminator();
-      else {
-        // LSR sets the insertion point for AddRec start/step values to the
-        // block start to simplify value reuse, even though it's an invalid
-        // position. SCEVExpander must correct for this in all cases.
-        InsertPt = &*L->getHeader()->getFirstInsertionPt();
+
+  // We can move insertion point only if there is no div or rem operations
+  // otherwise we are risky to move it over the check for zero denominator.
+  auto SafeToHoist = [](const SCEV *S) {
+    return !SCEVExprContains(S, [](const SCEV *S) {
+              if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
+                if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
+                  // Division by non-zero constants can be hoisted.
+                  return SC->getValue()->isZero();
+                // All other divisions should not be moved as they may be
+                // divisions by zero and should be kept within the
+                // conditions of the surrounding loops that guard their
+                // execution (see PR35406).
+                return true;
+              }
+              return false;
+            });
+  };
+  if (SafeToHoist(S)) {
+    for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
+         L = L->getParentLoop()) {
+      if (SE.isLoopInvariant(S, L)) {
+        if (!L) break;
+        if (BasicBlock *Preheader = L->getLoopPreheader())
+          InsertPt = Preheader->getTerminator();
+        else
+          // LSR sets the insertion point for AddRec start/step values to the
+          // block start to simplify value reuse, even though it's an invalid
+          // position. SCEVExpander must correct for this in all cases.
+          InsertPt = &*L->getHeader()->getFirstInsertionPt();
+      } else {
+        // If the SCEV is computable at this level, insert it into the header
+        // after the PHIs (and after any other instructions that we've inserted
+        // there) so that it is guaranteed to dominate any user inside the loop.
+        if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+          InsertPt = &*L->getHeader()->getFirstInsertionPt();
+        while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
+               (isInsertedInstruction(InsertPt) ||
+                isa<DbgInfoIntrinsic>(InsertPt)))
+          InsertPt = &*std::next(InsertPt->getIterator());
+        break;
      }
-    } else {
-      // We can move insertion point only if there is no div or rem operations
-      // otherwise we are risky to move it over the check for zero denominator.
-      auto SafeToHoist = [](const SCEV *S) {
-        return !SCEVExprContains(S, [](const SCEV *S) {
-                  if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
-                    if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
-                      // Division by non-zero constants can be hoisted.
-                      return SC->getValue()->isZero();
-                    // All other divisions should not be moved as they may be
-                    // divisions by zero and should be kept within the
-                    // conditions of the surrounding loops that guard their
-                    // execution (see PR35406).
-                    return true;
-                  }
-                  return false;
-                });
-      };
-      // If the SCEV is computable at this level, insert it into the header
-      // after the PHIs (and after any other instructions that we've inserted
-      // there) so that it is guaranteed to dominate any user inside the loop.
-      if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
-          SafeToHoist(S))
-        InsertPt = &*L->getHeader()->getFirstInsertionPt();
-      while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
-             (isInsertedInstruction(InsertPt) ||
-              isa<DbgInfoIntrinsic>(InsertPt))) {
-        InsertPt = &*std::next(InsertPt->getIterator());
-      }
-      break;
    }
+  }
+
+  // IndVarSimplify sometimes sets the insertion point at the block start, even
+  // when there are PHIs at that point.  We must correct for this.
+  if (isa<PHINode>(*InsertPt)) 
+    InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();

  // Check to see if we already expanded this here.
  auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
--- a/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
+++ b/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
@ -0,0 +1,66 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; Produced from the test-case:
+;
+; extern void foo(char *, unsigned , unsigned *);
+; extern void bar(int *, long);
+; extern char *processBuf(char *);
+;
+; extern unsigned theSize;
+;
+; void foo(char *buf, unsigned denominator, unsigned *flag) {
+;   int incr = (int) (theSize / denominator);
+;   int inx = 0;
+;   while (*flag) {
+;     int itmp = inx + incr;
+;     int i = (int) theSize;
+;     bar(&i, (long) itmp);
+;     buf = processBuf(buf);
+;     inx = itmp;
+;   }
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@theSize = external local_unnamed_addr global i32, align 4
+
+define void @foo(i8* %buf, i32 %denominator, i32* %flag) local_unnamed_addr {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i32, i32* @theSize, align 4
+  %div = udiv i32 %0, %denominator
+  %1 = load i32, i32* %flag, align 4
+  %tobool5 = icmp eq i32 %1, 0
+  br i1 %tobool5, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:                                 ; preds = %entry
+  %2 = bitcast i32* %i to i8*
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.lr.ph, %while.body
+; Check that there are two PHIs followed by a 'sext' in the same block, and that
+; the test does not crash.
+; CHECK:        phi
+; CHECK-NEXT:   phi
+; CHECK-NEXT:   sext
+  %buf.addr.07 = phi i8* [ %buf, %while.body.lr.ph ], [ %call, %while.body ]
+  %inx.06 = phi i32 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
+  %add = add nsw i32 %inx.06, %div
+  %3 = load i32, i32* @theSize, align 4
+  store i32 %3, i32* %i, align 4
+  %conv = sext i32 %add to i64
+  call void @bar(i32* nonnull %i, i64 %conv)
+  %call = call i8* @processBuf(i8* %buf.addr.07)
+  %4 = load i32, i32* %flag, align 4
+  %tobool = icmp eq i32 %4, 0
+  br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  ret void
+}
+
+declare void @bar(i32*, i64) local_unnamed_addr
+declare i8* @processBuf(i8*) local_unnamed_addr
--- a/test/Transforms/LoopVectorize/pr30806.ll
+++ b/test/Transforms/LoopVectorize/pr30806.ll
@ -0,0 +1,65 @@
+; RUN: opt -loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+; Produced from test-case:
+;
+; void testGuardedInnerLoop(uint32_t *ptr, uint32_t denom, uint32_t numer, uint32_t outer_lim)
+; {
+;   for(uint32_t outer_i = 0; outer_i < outer_lim; ++outer_i) {
+;     if (denom > 0) {
+;       const uint32_t lim = numer / denom;
+;
+;       for (uint32_t i = 0; i < lim; ++i)
+;         ptr[i] = 1;
+;     }
+;   }
+; }
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @testGuardedInnerLoop(i32* %ptr, i32 %denom, i32 %numer, i32 %outer_lim) {
+entry:
+  %cmp1 = icmp eq i32 %outer_lim, 0
+  br i1 %cmp1, label %exit, label %loop1.preheader
+
+; Verify that a 'udiv' does not appear between the 'loop1.preheader' label, and
+; whatever label comes next.
+loop1.preheader:
+; CHECK-LABEL: loop1.preheader:
+; CHECK-NOT: udiv
+; CHECK-LABEL: :
+  br label %loop1
+
+loop1:
+  %outer_i = phi i32 [ %inc1, %loop2.exit ], [ 0, %loop1.preheader ]
+  %0 = add i32 %denom, -1
+  %1 = icmp ult i32 %0, %numer
+  br i1 %1, label %loop2.preheader, label %loop2.exit
+
+; Verify that a 'udiv' does appear between the 'loop2.preheader' label, and
+; whatever label comes next.
+loop2.preheader:
+; CHECK-LABEL: loop2.preheader:
+; CHECK: udiv
+; CHECK-LABEL: :
+  %lim = udiv i32 %numer, %denom
+  %2 = zext i32 %lim to i64
+  br label %loop2
+
+loop2:
+  %indvar.loop2 = phi i64 [ 0, %loop2.preheader ], [ %indvar.loop2.next, %loop2 ]
+  %arrayidx = getelementptr inbounds i32, i32* %ptr, i64 %indvar.loop2
+  store i32 1, i32* %arrayidx, align 4
+  %indvar.loop2.next = add nuw nsw i64 %indvar.loop2, 1
+  %cmp2 = icmp ult i64 %indvar.loop2.next, %2
+  br i1 %cmp2, label %loop2, label %loop2.exit
+
+loop2.exit:
+  %inc1 = add nuw i32 %outer_i, 1
+  %exitcond = icmp eq i32 %inc1, %outer_lim
+  br i1 %exitcond, label %exit, label %loop1
+
+exit:
+  ret void
+}