From a68e72a0c84f813ba3009abe02919223a995a28d Mon Sep 17 00:00:00 2001
From: Silviu Baranga <silviu.baranga@arm.com>
Date: Tue, 12 Sep 2017 07:48:22 +0000
Subject: [PATCH] [LAA] Allow more run-time alias checks by coercing pointer
 expressions to AddRecExprs

Summary:
LAA can only emit run-time alias checks for pointers with affine AddRec
SCEV expressions. However, non-AddRecExprs can be now be converted to
affine AddRecExprs using SCEV predicates.

This change tries to add the minimal set of SCEV predicates in order
to enable run-time alias checking.

Reviewers: anemet, mzolotukhin, mkuper, sanjoy, hfinkel

Reviewed By: hfinkel

Subscribers: mssimpso, Ayal, dorit, roman.shirokiy, mzolotukhin, llvm-commits

Differential Revision: https://reviews.llvm.org/D17080

llvm-svn: 313012
---
 lib/Analysis/LoopAccessAnalysis.cpp           | 123 ++++++++++++++----
 .../memcheck-wrapping-pointers.ll             | 107 +++++++++++++++
 2 files changed, 203 insertions(+), 27 deletions(-)
 create mode 100644 test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 4ba12583ff8..9cda14db294 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -522,6 +522,21 @@ public:
     Accesses.insert(MemAccessInfo(Ptr, true));
   }
 
+  /// \brief Check if we can emit a run-time no-alias check for \p Access.
+  ///
+  /// Returns true if we can emit a run-time no alias check for \p Access.
+  /// If we can check this access, this also adds it to a dependence set and
+  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
+  /// we will attempt to use additional run-time checks in order to get
+  /// the bounds of the pointer.
+  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
+                            MemAccessInfo Access,
+                            const ValueToValueMap &Strides,
+                            DenseMap<Value *, unsigned> &DepSetId,
+                            Loop *TheLoop, unsigned &RunningDepId,
+                            unsigned ASId, bool ShouldCheckStride,
+                            bool Assume);
+
   /// \brief Check whether we can check the pointers at runtime for
   /// non-intersection.
   ///
@@ -597,9 +612,11 @@ private:
 } // end anonymous namespace
 
 /// \brief Check whether a pointer can participate in a runtime bounds check.
+/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
+/// by adding run-time checks (overflow checks) if necessary.
 static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
                                 const ValueToValueMap &Strides, Value *Ptr,
-                                Loop *L) {
+                                Loop *L, bool Assume) {
   const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
 
   // The bounds for loop-invariant pointer is trivial.
@@ -607,6 +624,10 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
     return true;
 
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+
+  if (!AR && Assume)
+    AR = PSE.getAsAddRec(Ptr);
+
   if (!AR)
     return false;
 
@@ -621,9 +642,53 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
     return true;
 
   int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
-  return Stride == 1;
+  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
+    return true;
+
+  return false;
 }
 
+bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
+                                          MemAccessInfo Access,
+                                          const ValueToValueMap &StridesMap,
+                                          DenseMap<Value *, unsigned> &DepSetId,
+                                          Loop *TheLoop, unsigned &RunningDepId,
+                                          unsigned ASId, bool ShouldCheckWrap,
+                                          bool Assume) {
+  Value *Ptr = Access.getPointer();
+
+  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
+    return false;
+
+  // When we run after a failing dependency check we have to make sure
+  // we don't have wrapping pointers.
+  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
+    auto *Expr = PSE.getSCEV(Ptr);
+    if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+      return false;
+    PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+  }
+
+  // The id of the dependence set.
+  unsigned DepId;
+
+  if (isDependencyCheckNeeded()) {
+    Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+    unsigned &LeaderId = DepSetId[Leader];
+    if (!LeaderId)
+      LeaderId = RunningDepId++;
+    DepId = LeaderId;
+  } else
+    // Each access has its own dependence set.
+    DepId = RunningDepId++;
+
+  bool IsWrite = Access.getInt();
+  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
+  DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+
+  return true;
+ }
+
 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
                                      ScalarEvolution *SE, Loop *TheLoop,
                                      const ValueToValueMap &StridesMap,
@@ -643,12 +708,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
   for (auto &AS : AST) {
     int NumReadPtrChecks = 0;
     int NumWritePtrChecks = 0;
+    bool CanDoAliasSetRT = true;
 
     // We assign consecutive id to access from different dependence sets.
     // Accesses within the same set don't need a runtime check.
     unsigned RunningDepId = 1;
     DenseMap<Value *, unsigned> DepSetId;
 
+    SmallVector<MemAccessInfo, 4> Retries;
+
     for (auto A : AS) {
       Value *Ptr = A.getValue();
       bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
@@ -659,29 +727,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
       else
         ++NumReadPtrChecks;
 
-      if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) &&
-          // When we run after a failing dependency check we have to make sure
-          // we don't have wrapping pointers.
-          (!ShouldCheckWrap || isNoWrap(PSE, StridesMap, Ptr, TheLoop))) {
-        // The id of the dependence set.
-        unsigned DepId;
-
-        if (IsDepCheckNeeded) {
-          Value *Leader = DepCands.getLeaderValue(Access).getPointer();
-          unsigned &LeaderId = DepSetId[Leader];
-          if (!LeaderId)
-            LeaderId = RunningDepId++;
-          DepId = LeaderId;
-        } else
-          // Each access has its own dependence set.
-          DepId = RunningDepId++;
-
-        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
-
-        DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
-      } else {
+      if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
+                                RunningDepId, ASId, ShouldCheckWrap, false)) {
         DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
-        CanDoRT = false;
+        Retries.push_back(Access);
+        CanDoAliasSetRT = false;
       }
     }
 
@@ -693,10 +743,29 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
     // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
     // for which we couldn't find the bounds but we don't actually need to emit
     // any checks so it does not matter.
-    if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
-      NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
-                                                 NumWritePtrChecks >= 1));
+    bool NeedsAliasSetRTCheck = false;
+    if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
+      NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
+                             (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
 
+    // We need to perform run-time alias checks, but some pointers had bounds
+    // that couldn't be checked.
+    if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
+      // Reset the CanDoSetRt flag and retry all accesses that have failed.
+      // We know that we need these checks, so we can now be more aggressive
+      // and add further checks if required (overflow checks).
+      CanDoAliasSetRT = true;
+      for (auto Access : Retries)
+        if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
+                                  TheLoop, RunningDepId, ASId,
+                                  ShouldCheckWrap, /*Assume=*/true)) {
+          CanDoAliasSetRT = false;
+          break;
+        }
+    }
+
+    CanDoRT &= CanDoAliasSetRT;
+    NeedRTCheck |= NeedsAliasSetRTCheck;
     ++ASId;
   }
 
diff --git a/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll b/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
new file mode 100644
index 00000000000..0263e93a6ca
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
@@ -0,0 +1,107 @@
+; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+; i and i + 1 can overflow in the following kernel:
+; void test1(unsigned long long x, int *a, int *b) {
+;  for (unsigned i = 0; i < x; ++i)
+;    b[i] = a[i+1] + 1;
+; }
+;
+; If accesses to a and b can alias, we need to emit a run-time alias check
+; between accesses to a and b. However, when i and i + 1 can wrap, their
+; SCEV expression is not an AddRec. We need to create SCEV predicates and
+; coerce the expressions to AddRecs in order to be able to emit the run-time
+; alias check.
+;
+; The accesses at b[i] and a[i+1] correspond to the addresses %arrayidx and
+; %arrayidx4 in the test. The SCEV expressions for these are:
+;  ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nsw>
+;  ((4 * (zext i32 {0,+,1}<%for.body> to i64))<nuw><nsw> + %b)<nsw>
+;
+; The transformed expressions are:
+;  i64 {(4 + %a),+,4}<%for.body>
+;  i64 {(4 + %b),+,4}<%for.body>
+
+; CHECK-LABEL: test1
+; CHECK:         Memory dependences are safe with run-time checks
+; CHECK-NEXT:    Dependences:
+; CHECK-NEXT:    Run-time memory checks:
+; CHECK-NEXT:    Check 0:
+; CHECK-NEXT:      Comparing group
+; CHECK-NEXT:        %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+; CHECK-NEXT:      Against group
+; CHECK-NEXT:        %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11
+; CHECK-NEXT:    Grouped accesses:
+; CHECK-NEXT:      Group
+; CHECK-NEXT:        (Low: (4 + %a) High: (4 + (4 * (1 umax %x)) + %a))
+; CHECK-NEXT:          Member: {(4 + %a),+,4}<%for.body>
+; CHECK-NEXT:      Group
+; CHECK-NEXT:        (Low: %b High: ((4 * (1 umax %x)) + %b))
+; CHECK-NEXT:          Member: {%b,+,4}<%for.body>
+; CHECK:         Store to invariant address was not found in loop.
+; CHECK-NEXT:    SCEV assumptions:
+; CHECK-NEXT:    {1,+,1}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT:    {0,+,1}<%for.body> Added Flags: <nusw>
+; CHECK:         Expressions re-written:
+; CHECK-NEXT:    [PSE]  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom:
+; CHECK-NEXT:      ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nsw>
+; CHECK-NEXT:      --> {(4 + %a),+,4}<%for.body>
+; CHECK-NEXT:    [PSE]  %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11:
+; CHECK-NEXT:      ((4 * (zext i32 {0,+,1}<%for.body> to i64))<nuw><nsw> + %b)<nsw>
+; CHECK-NEXT:      --> {%b,+,4}<%for.body>
+define void @test1(i64 %x, i32* %a, i32* %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %conv11 = phi i64 [ %conv, %for.body ], [ 0, %entry ]
+  %i.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %add = add i32 %i.010, 1
+  %idxprom = zext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+  %ld = load i32, i32* %arrayidx, align 4
+  %add2 = add nsw i32 %ld, 1
+  %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %conv11
+  store i32 %add2, i32* %arrayidx4, align 4
+  %conv = zext i32 %add to i64
+  %cmp = icmp ult i64 %conv, %x
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+; i can overflow in the following kernel:
+; void test2(unsigned long long x, int *a) {
+;   for (unsigned i = 0; i < x; ++i)
+;     a[i] = a[i] + 1;
+; }
+;
+; We need to check that i doesn't wrap, but we don't need a run-time alias
+; check. We also need an extra no-wrap check to get the backedge taken count.
+
+; CHECK-LABEL: test2
+; CHECK: Memory dependences are safe
+; CHECK: SCEV assumptions:
+; CHECK-NEXT:   {1,+,1}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT:   {0,+,1}<%for.body> Added Flags: <nusw>
+ define void @test2(i64 %x, i32* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %conv11 = phi i64 [ %conv, %for.body ], [ 0, %entry ]
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %conv11
+  %ld = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %ld, 1
+  store i32 %add, i32* %arrayidx, align 4
+  %inc = add i32 %i.010, 1
+  %conv = zext i32 %inc to i64
+  %cmp = icmp ult i64 %conv, %x
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}