[LoopAccessAnalysis] Fix an integer overflow

We were inappropriately using 32-bit types to account for quantities that can be far larger. Fixed in PR28443. llvm-svn: 274737
2025-01-31 20:51:52 +01:00 · 2016-07-07 06:24:36 +00:00 · 2016-07-07 06:24:36 +00:00 · 5d1b0fc11f
commit 5d1b0fc11f
parent 004e56339e
3 changed files with 66 additions and 30 deletions
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@ -228,7 +228,7 @@ public:

  /// \brief The maximum number of bytes of a vector register we can vectorize
  /// the accesses safely with.
-  unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+  uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }

  /// \brief In same cases when the dependency check fails we can still
  /// vectorize the loop with a dynamic array access check.
@ -284,7 +284,7 @@ private:
  unsigned AccessIdx;

  // We can access this many bytes in parallel safely.
-  unsigned MaxSafeDepDistBytes;
+  uint64_t MaxSafeDepDistBytes;

  /// \brief If we see a non-constant dependence distance we can still try to
  /// vectorize this loop with runtime checks.
@ -324,7 +324,7 @@ private:
  ///
  /// \return false if we shouldn't vectorize at all or avoid larger
  /// vectorization factors by limiting MaxSafeDepDistBytes.
-  bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
+  bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
 };

 /// \brief Holds information about the memory runtime legality checks to verify
@ -575,7 +575,7 @@ public:
  /// Returns true if the value V is uniform within the loop.
  bool isUniform(Value *V) const;

-  unsigned getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
+  uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
  unsigned getNumStores() const { return NumStores; }
  unsigned getNumLoads() const { return NumLoads;}

@ -672,7 +672,7 @@ private:
  unsigned NumLoads;
  unsigned NumStores;

-  unsigned MaxSafeDepDistBytes;
+  uint64_t MaxSafeDepDistBytes;

  /// \brief Cache the result of analyzeLoop.
  bool CanVecMem;
@ -719,9 +719,9 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
 /// to \p PtrToStride and therefore add further predicates to \p PSE.
 /// The \p Assume parameter indicates if we are allowed to make additional
 /// run-time assumptions.
-int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
-                 const ValueToValueMap &StridesMap = ValueToValueMap(),
-                 bool Assume = false);
+int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
+                     const ValueToValueMap &StridesMap = ValueToValueMap(),
+                     bool Assume = false);

 /// \brief Returns true if the memory operations \p A and \p B are consecutive.
 /// This is a simple API that does not depend on the analysis pass. 
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@ -575,7 +575,7 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
    return true;

-  int Stride = getPtrStride(PSE, Ptr, L, Strides);
+  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
  return Stride == 1;
 }

@ -866,9 +866,9 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
 }

 /// \brief Check whether the access through \p Ptr has a constant stride.
-int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
-                       const Loop *Lp, const ValueToValueMap &StridesMap,
-                       bool Assume) {
+int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
+                           const Loop *Lp, const ValueToValueMap &StridesMap,
+                           bool Assume) {
  Type *Ty = Ptr->getType();
  assert(Ty->isPointerTy() && "Unexpected non-ptr");

@ -1097,8 +1097,8 @@ bool MemoryDepChecker::Dependence::isForward() const {
  llvm_unreachable("unexpected DepType!");
 }

-bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
-                                                    unsigned TypeByteSize) {
+bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
+                                                    uint64_t TypeByteSize) {
  // If loads occur at a distance that is not a multiple of a feasible vector
  // factor store-load forwarding does not take place.
  // Positive dependences might cause troubles because vectorizing them might
@ -1111,13 +1111,13 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,

  // After this many iterations store-to-load forwarding conflicts should not
  // cause any slowdowns.
-  const unsigned NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
+  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
  // Maximum vector factor.
-  unsigned MaxVFWithoutSLForwardIssues = std::min(
+  uint64_t MaxVFWithoutSLForwardIssues = std::min(
      VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);

  // Compute the smallest VF at which the store and load would be misaligned.
-  for (unsigned VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
+  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
       VF *= 2) {
    // If the number of vector iteration between the store and the load are
    // small we could incur conflicts.
@ -1145,8 +1145,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
 /// bytes.
 ///
 /// \returns true if they are independent.
-static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
-                                          unsigned TypeByteSize) {
+static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
+                                          uint64_t TypeByteSize) {
  assert(Stride > 1 && "The stride must be greater than 1");
  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
  assert(Distance > 0 && "The distance must be non-zero");
@ -1155,7 +1155,7 @@ static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
  if (Distance % TypeByteSize)
    return false;

-  unsigned ScaledDist = Distance / TypeByteSize;
+  uint64_t ScaledDist = Distance / TypeByteSize;

  // No dependence if the scaled distance is not multiple of the stride.
  // E.g.
@ -1196,8 +1196,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
      BPtr->getType()->getPointerAddressSpace())
    return Dependence::Unknown;

-  int StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
-  int StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
+  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
+  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);

  const SCEV *Src = PSE.getSCEV(APtr);
  const SCEV *Sink = PSE.getSCEV(BPtr);
@ -1237,11 +1237,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
  Type *ATy = APtr->getType()->getPointerElementType();
  Type *BTy = BPtr->getType()->getPointerElementType();
  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
-  unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
+  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);

  const APInt &Val = C->getAPInt();
  int64_t Distance = Val.getSExtValue();
-  unsigned Stride = std::abs(StrideAPtr);
+  uint64_t Stride = std::abs(StrideAPtr);

  // Attempt to prove strided accesses independent.
  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
@ -1315,9 +1315,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
  // the minimum distance needed is 28, which is greater than distance. It is
  // not safe to do vectorization.
-  unsigned MinDistanceNeeded =
+  uint64_t MinDistanceNeeded =
      TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
-  if (MinDistanceNeeded > Distance) {
+  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
    DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
                 << '\n');
    return Dependence::Backward;
@ -1347,7 +1347,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
  // is 8, which is less than 2 and forbidden vectorization, But actually
  // both A and B could be vectorized by 2 iterations.
  MaxSafeDepDistBytes =
-      Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
+      std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);

  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
@ -1365,7 +1365,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
                                   MemAccessInfoSet &CheckDeps,
                                   const ValueToValueMap &Strides) {

-  MaxSafeDepDistBytes = -1U;
+  MaxSafeDepDistBytes = -1;
  while (!CheckDeps.empty()) {
    MemAccessInfo CurAccess = *CheckDeps.begin();

@ -1926,7 +1926,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
      PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
      DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
      DL(&DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
-      MaxSafeDepDistBytes(-1U), CanVecMem(false),
+      MaxSafeDepDistBytes(-1), CanVecMem(false),
      StoreToLoopInvariantAddress(false) {
  if (canAnalyzeLoop())
    analyzeLoop();
@ -1935,7 +1935,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
  if (CanVecMem) {
    OS.indent(Depth) << "Memory dependences are safe";
-    if (MaxSafeDepDistBytes != -1U)
+    if (MaxSafeDepDistBytes != -1ULL)
      OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
         << " bytes";
    if (PtrRtChecking->Need)
--- a/test/Transforms/LoopDistribute/pr28443.ll
+++ b/test/Transforms/LoopDistribute/pr28443.ll
@ -0,0 +1,36 @@
+; RUN: opt -basicaa -loop-distribute -verify-loop-info -verify-dom-info -S \
+; RUN:   < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fn1(i64 %a, i64* %b) {
+entry:
+  br label %for.body
+
+for.body:
+  %add75.epil = phi i64 [ %add7.epil, %for.body ], [ %a, %entry ]
+  %add1.epil = add nsw i64 %add75.epil, 268435457
+  %arrayidx.epil = getelementptr inbounds i64, i64* %b, i64 %add1.epil
+  %load = load i64, i64* %arrayidx.epil, align 8
+  %add5.epil = add nsw i64 %add75.epil, 805306369
+  %arrayidx6.epil = getelementptr inbounds i64, i64* %b, i64 %add5.epil
+  store i64 %load, i64* %arrayidx6.epil, align 8
+  %add7.epil = add nsw i64 %add75.epil, 2
+  %epil.iter.cmp = icmp eq i64 %add7.epil, 0
+  br i1 %epil.iter.cmp, label %for.end, label %for.body
+
+  ; CHECK: %[[phi:.*]]  = phi i64
+  ; CHECK: %[[add1:.*]] = add nsw i64 %[[phi]], 268435457
+  ; CHECK: %[[gep1:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add1]]
+  ; CHECK: %[[load:.*]] = load i64, i64* %[[gep1]], align 8
+  ; CHECK: %[[add2:.*]] = add nsw i64 %[[phi]], 805306369
+  ; CHECK: %[[gep2:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add2]]
+  ; CHECK: store i64 %[[load]], i64* %[[gep2]], align 8
+  ; CHECK: %[[incr:.*]] = add nsw i64 %[[phi]], 2
+  ; CHECK: %[[cmp:.*]]  = icmp eq i64 %[[incr]], 0
+  ; CHECK: br i1 %[[cmp]]
+
+for.end:
+  ret void
+}