From 0f3bf80439282510096550e46163df620cb97a03 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 5 Dec 2020 17:26:33 +0100
Subject: [PATCH] [BasicAA] Migrate "same base pointer" logic to decomposed
 GEPs

BasicAA has some special bit of logic for "same base pointer" GEPs
that performs a structural comparison: It only looks at two GEPs
with the same base (as opposed to two GEP chains with a MustAlias
base) and compares their indexes in a limited way. I generalized
part of this code in D91027, and this patch merges the remainder
into the normal decomposed GEP logic.

What this code ultimately wants to do is to determine that
gep %base, %idx1 and gep %base, %idx2 don't alias if %idx1 != %idx2,
and the access size fits within the stride.

We can express this in terms of a decomposed GEP expression with
two indexes scale*%idx1 + -scale*%idx2 where %idx1 != %idx2, and
some appropriate checks for sizes and offsets.

This makes the reasoning slightly more powerful, and more
importantly brings all the GEP logic under a common umbrella.

Differential Revision: https://reviews.llvm.org/D92723
---
 include/llvm/Analysis/BasicAliasAnalysis.h |   6 -
 lib/Analysis/BasicAliasAnalysis.cpp        | 149 ++++-----------------
 test/Analysis/BasicAA/fallback-mayalias.ll |  13 +-
 test/Analysis/BasicAA/sequential-gep.ll    |  10 +-
 4 files changed, 36 insertions(+), 142 deletions(-)

diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h
index eedecd2a438..d9a17495169 100644
--- a/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -202,12 +202,6 @@ private:
       const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
       LocationSize ObjectAccessSize);
 
-  AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
-                                       LocationSize MaybeV1Size,
-                                       const GEPOperator *GEP2,
-                                       LocationSize MaybeV2Size,
-                                       const DataLayout &DL);
-
   /// A Heuristic for aliasGEP that searches for a constant offset
   /// between the variables.
   ///
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 5e611a9e193..7e48b7303be 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1030,114 +1030,6 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
   return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
 }
 
-/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
-/// both having the exact same pointer operand.
-AliasResult BasicAAResult::aliasSameBasePointerGEPs(const GEPOperator *GEP1,
-                                                    LocationSize MaybeV1Size,
-                                                    const GEPOperator *GEP2,
-                                                    LocationSize MaybeV2Size,
-                                                    const DataLayout &DL) {
-  assert(GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() ==
-             GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() &&
-         GEP1->getPointerOperandType() == GEP2->getPointerOperandType() &&
-         "Expected GEPs with the same pointer operand");
-
-  // Try to determine whether GEP1 and GEP2 index through arrays, into structs,
-  // such that the struct field accesses provably cannot alias.
-  // We also need at least two indices (the pointer, and the struct field).
-  if (GEP1->getNumIndices() != GEP2->getNumIndices() ||
-      GEP1->getNumIndices() < 2)
-    return MayAlias;
-
-  // If we don't know the size of the accesses through both GEPs, we can't
-  // determine whether the struct fields accessed can't alias.
-  if (!MaybeV1Size.hasValue() || !MaybeV2Size.hasValue())
-    return MayAlias;
-
-  const uint64_t V1Size = MaybeV1Size.getValue();
-  const uint64_t V2Size = MaybeV2Size.getValue();
-
-  ConstantInt *C1 =
-      dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1));
-  ConstantInt *C2 =
-      dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1));
-
-  // If the last (struct) indices are constants and are equal, the other indices
-  // might be also be dynamically equal, so the GEPs can alias.
-  if (C1 && C2) {
-    unsigned BitWidth = std::max(C1->getBitWidth(), C2->getBitWidth());
-    if (C1->getValue().sextOrSelf(BitWidth) ==
-        C2->getValue().sextOrSelf(BitWidth))
-      return MayAlias;
-  }
-
-  // Find the last-indexed type of the GEP, i.e., the type you'd get if
-  // you stripped the last index.
-  // On the way, look at each indexed type.  If there's something other
-  // than an array, different indices can lead to different final types.
-  SmallVector<Value *, 8> IntermediateIndices;
-
-  // Insert the first index; we don't need to check the type indexed
-  // through it as it only drops the pointer indirection.
-  assert(GEP1->getNumIndices() > 1 && "Not enough GEP indices to examine");
-  IntermediateIndices.push_back(GEP1->getOperand(1));
-
-  // Insert all the remaining indices but the last one.
-  // Also, check that they all index through arrays.
-  for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) {
-    if (!isa<ArrayType>(GetElementPtrInst::getIndexedType(
-            GEP1->getSourceElementType(), IntermediateIndices)))
-      return MayAlias;
-    IntermediateIndices.push_back(GEP1->getOperand(i + 1));
-  }
-
-  auto *Ty = GetElementPtrInst::getIndexedType(
-    GEP1->getSourceElementType(), IntermediateIndices);
-  if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
-    // We know that:
-    // - both GEPs begin indexing from the exact same pointer;
-    // - the last indices in both GEPs are constants, indexing into a sequential
-    //   type (array or vector);
-    // - both GEPs only index through arrays prior to that.
-    //
-    // Because array indices greater than the number of elements are valid in
-    // GEPs, unless we know the intermediate indices are identical between
-    // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't
-    // partially overlap. We also need to check that the loaded size matches
-    // the element size, otherwise we could still have overlap.
-    Type *LastElementTy = GetElementPtrInst::getTypeAtIndex(Ty, (uint64_t)0);
-    const uint64_t ElementSize =
-        DL.getTypeStoreSize(LastElementTy).getFixedSize();
-    if (V1Size != ElementSize || V2Size != ElementSize)
-      return MayAlias;
-
-    for (unsigned i = 0, e = GEP1->getNumIndices() - 1; i != e; ++i)
-      if (GEP1->getOperand(i + 1) != GEP2->getOperand(i + 1))
-        return MayAlias;
-
-    // Now we know that the array/pointer that GEP1 indexes into and that
-    // that GEP2 indexes into must either precisely overlap or be disjoint.
-    // Because they cannot partially overlap and because fields in an array
-    // cannot overlap, if we can prove the final indices are different between
-    // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias.
-
-    // If the last indices are constants, we've already checked they don't
-    // equal each other so we can exit early.
-    if (C1 && C2)
-      return NoAlias;
-    {
-      // If we're not potentially reasoning about values from different
-      // iterations, see if we can prove them inequal.
-      Value *GEP1LastIdx = GEP1->getOperand(GEP1->getNumOperands() - 1);
-      Value *GEP2LastIdx = GEP2->getOperand(GEP2->getNumOperands() - 1);
-      if (VisitedPhiBBs.empty() &&
-          isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL))
-        return NoAlias;
-    }
-  }
-  return MayAlias;
-}
-
 // If a we have (a) a GEP and (b) a pointer based on an alloca, and the
 // beginning of the object the GEP points would have a negative offset with
 // repsect to the alloca, that means the GEP can not alias pointer (b).
@@ -1254,21 +1146,6 @@ AliasResult BasicAAResult::aliasGEP(
       return BaseAlias;
     }
 
-    // Otherwise, we have a MustAlias.  Since the base pointers alias each other
-    // exactly, see if the computed offset from the common pointer tells us
-    // about the relation of the resulting pointer.
-    // If we know the two GEPs are based off of the exact same pointer (and not
-    // just the same underlying object), see if that tells us anything about
-    // the resulting pointers.
-    if (GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() ==
-            GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() &&
-        GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) {
-      AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
-      // If we couldn't find anything interesting, don't abandon just yet.
-      if (R != MayAlias)
-        return R;
-    }
-
     // Subtract the GEP2 pointer from the GEP1 pointer to find out their
     // symbolic difference.
     DecompGEP1.Offset -= DecompGEP2.Offset;
@@ -1396,6 +1273,32 @@ AliasResult BasicAAResult::aliasGEP(
         (-DecompGEP1.Offset).uge(V1Size.getValue()))
       return NoAlias;
 
+    // Try to determine whether the variable part of the GEP is non-zero, in
+    // which case we can add/subtract a minimum scale from the offset.
+    // TODO: Currently this handles the case of Scale*V0-Scale*V1 where V0!=V1.
+    // We could also handle Scale*V0 where V0!=0.
+    if (V1Size.hasValue() && V2Size.hasValue() &&
+        DecompGEP1.VarIndices.size() == 2) {
+      const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0];
+      const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1];
+      // Check that VisitedPhiBBs is empty, to avoid reasoning about inequality
+      // of values across loop iterations.
+      if (Var0.Scale == -Var1.Scale && Var0.ZExtBits == Var1.ZExtBits &&
+          Var0.SExtBits == Var1.SExtBits && VisitedPhiBBs.empty() &&
+          isKnownNonEqual(Var0.V, Var1.V, DL)) {
+        // If the indexes are not equal, the actual offset will have at least
+        // Scale or -Scale added to it.
+        APInt Scale = Var0.Scale.abs();
+        APInt OffsetLo = DecompGEP1.Offset - Scale;
+        APInt OffsetHi = DecompGEP1.Offset + Scale;
+        // Check that an access at OffsetLo or lower, and an access at OffsetHi
+        // or higher both do not alias.
+        if (OffsetLo.isNegative() && (-OffsetLo).uge(V1Size.getValue()) &&
+            OffsetHi.isNonNegative() && OffsetHi.uge(V2Size.getValue()))
+          return NoAlias;
+      }
+    }
+
     if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size,
                                 DecompGEP1.Offset, &AC, DT))
       return NoAlias;
diff --git a/test/Analysis/BasicAA/fallback-mayalias.ll b/test/Analysis/BasicAA/fallback-mayalias.ll
index 7e0a85c876a..9315e03fc5c 100644
--- a/test/Analysis/BasicAA/fallback-mayalias.ll
+++ b/test/Analysis/BasicAA/fallback-mayalias.ll
@@ -7,17 +7,16 @@
 
 define void @fallback_mayalias(float* noalias nocapture %C, i64 %i, i64 %j) local_unnamed_addr {
 entry:
-  %shl = shl i64 %i, 3
-  %mul = shl nsw i64 %j, 4
-  %addA = add nsw i64 %mul, %shl
-  %orB = or i64 %shl, 1
-  %addB = add nsw i64 %mul, %orB
+  %cmp = icmp ne i64 %i, %j
+  call void @llvm.assume(i1 %cmp)
 
-  %arrayidxA = getelementptr inbounds float, float* %C, i64 %addA
+  %arrayidxA = getelementptr inbounds float, float* %C, i64 %i
   store float undef, float* %arrayidxA, align 4
 
-  %arrayidxB = getelementptr inbounds float, float* %C, i64 %addB
+  %arrayidxB = getelementptr inbounds float, float* %C, i64 %j
   store float undef, float* %arrayidxB, align 4
 
   ret void
 }
+
+declare void @llvm.assume(i1)
diff --git a/test/Analysis/BasicAA/sequential-gep.ll b/test/Analysis/BasicAA/sequential-gep.ll
index 0b53958d6f6..bc455a04adc 100644
--- a/test/Analysis/BasicAA/sequential-gep.ll
+++ b/test/Analysis/BasicAA/sequential-gep.ll
@@ -52,8 +52,7 @@ define void @t5([8 x i32]* %p, i32 %addend, i32* %q) {
 }
 
 ; CHECK-LABEL: Function: add_non_zero_simple
-; CHECK: MayAlias: i32* %gep1, i32* %gep2
-; TODO: This could be NoAlias.
+; CHECK: NoAlias: i32* %gep1, i32* %gep2
 define void @add_non_zero_simple(i32* %p, i32 %addend, i32* %q) {
   %knownnonzero = load i32, i32* %q, !range !0
   %add = add i32 %addend, %knownnonzero
@@ -74,15 +73,14 @@ define void @add_non_zero_different_scales(i32* %p, i32 %addend, i32* %q) {
 }
 
 ; CHECK-LABEL: Function: add_non_zero_different_sizes
-; CHECK: MayAlias: i16* %gep1.16, i32* %gep2
-; CHECK: MayAlias: i16* %gep2.16, i32* %gep1
-; CHECK: MayAlias: i16* %gep1.16, i16* %gep2.16
+; CHECK: NoAlias: i16* %gep1.16, i32* %gep2
+; CHECK: NoAlias: i16* %gep2.16, i32* %gep1
+; CHECK: NoAlias: i16* %gep1.16, i16* %gep2.16
 ; CHECK: MayAlias: i32* %gep2, i64* %gep1.64
 ; CHECK: MayAlias: i16* %gep2.16, i64* %gep1.64
 ; CHECK: MayAlias: i32* %gep1, i64* %gep2.64
 ; CHECK: MayAlias: i16* %gep1.16, i64* %gep2.64
 ; CHECK: MayAlias: i64* %gep1.64, i64* %gep2.64
-; TODO: The first three could be NoAlias.
 define void @add_non_zero_different_sizes(i32* %p, i32 %addend, i32* %q) {
   %knownnonzero = load i32, i32* %q, !range !0
   %add = add i32 %addend, %knownnonzero