diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h index 532254fa6ca..04b307db875 100644 --- a/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/include/llvm/Analysis/LoopAccessAnalysis.h @@ -228,7 +228,7 @@ public: /// \brief The maximum number of bytes of a vector register we can vectorize /// the accesses safely with. - unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } + uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } /// \brief In same cases when the dependency check fails we can still /// vectorize the loop with a dynamic array access check. @@ -284,7 +284,7 @@ private: unsigned AccessIdx; // We can access this many bytes in parallel safely. - unsigned MaxSafeDepDistBytes; + uint64_t MaxSafeDepDistBytes; /// \brief If we see a non-constant dependence distance we can still try to /// vectorize this loop with runtime checks. @@ -324,7 +324,7 @@ private: /// /// \return false if we shouldn't vectorize at all or avoid larger /// vectorization factors by limiting MaxSafeDepDistBytes. - bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize); + bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize); }; /// \brief Holds information about the memory runtime legality checks to verify @@ -575,7 +575,7 @@ public: /// Returns true if the value V is uniform within the loop. bool isUniform(Value *V) const; - unsigned getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; } + uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; } unsigned getNumStores() const { return NumStores; } unsigned getNumLoads() const { return NumLoads;} @@ -672,7 +672,7 @@ private: unsigned NumLoads; unsigned NumStores; - unsigned MaxSafeDepDistBytes; + uint64_t MaxSafeDepDistBytes; /// \brief Cache the result of analyzeLoop. bool CanVecMem; @@ -719,9 +719,9 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, /// to \p PtrToStride and therefore add further predicates to \p PSE. /// The \p Assume parameter indicates if we are allowed to make additional /// run-time assumptions. -int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap = ValueToValueMap(), - bool Assume = false); +int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap = ValueToValueMap(), + bool Assume = false); /// \brief Returns true if the memory operations \p A and \p B are consecutive. /// This is a simple API that does not depend on the analysis pass. diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index ead137b9435..0725df05b43 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -575,7 +575,7 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE, if (PSE.getSE()->isLoopInvariant(PtrScev, L)) return true; - int Stride = getPtrStride(PSE, Ptr, L, Strides); + int64_t Stride = getPtrStride(PSE, Ptr, L, Strides); return Stride == 1; } @@ -866,9 +866,9 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, } /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, - const Loop *Lp, const ValueToValueMap &StridesMap, - bool Assume) { +int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, + const Loop *Lp, const ValueToValueMap &StridesMap, + bool Assume) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -1097,8 +1097,8 @@ bool MemoryDepChecker::Dependence::isForward() const { llvm_unreachable("unexpected DepType!"); } -bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, - unsigned TypeByteSize) { +bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, + uint64_t TypeByteSize) { // If loads occur at a distance that is not a multiple of a feasible vector // factor store-load forwarding does not take place. // Positive dependences might cause troubles because vectorizing them might @@ -1111,13 +1111,13 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, // After this many iterations store-to-load forwarding conflicts should not // cause any slowdowns. - const unsigned NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; + const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; // Maximum vector factor. - unsigned MaxVFWithoutSLForwardIssues = std::min( + uint64_t MaxVFWithoutSLForwardIssues = std::min( VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes); // Compute the smallest VF at which the store and load would be misaligned. - for (unsigned VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; + for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; VF *= 2) { // If the number of vector iteration between the store and the load are // small we could incur conflicts. @@ -1145,8 +1145,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, /// bytes. /// /// \returns true if they are independent. -static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride, - unsigned TypeByteSize) { +static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, + uint64_t TypeByteSize) { assert(Stride > 1 && "The stride must be greater than 1"); assert(TypeByteSize > 0 && "The type size in byte must be non-zero"); assert(Distance > 0 && "The distance must be non-zero"); @@ -1155,7 +1155,7 @@ static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride, if (Distance % TypeByteSize) return false; - unsigned ScaledDist = Distance / TypeByteSize; + uint64_t ScaledDist = Distance / TypeByteSize; // No dependence if the scaled distance is not multiple of the stride. // E.g. @@ -1196,8 +1196,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, BPtr->getType()->getPointerAddressSpace()) return Dependence::Unknown; - int StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true); - int StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true); + int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true); + int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true); const SCEV *Src = PSE.getSCEV(APtr); const SCEV *Sink = PSE.getSCEV(BPtr); @@ -1237,11 +1237,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, Type *ATy = APtr->getType()->getPointerElementType(); Type *BTy = BPtr->getType()->getPointerElementType(); auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); - unsigned TypeByteSize = DL.getTypeAllocSize(ATy); + uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); const APInt &Val = C->getAPInt(); int64_t Distance = Val.getSExtValue(); - unsigned Stride = std::abs(StrideAPtr); + uint64_t Stride = std::abs(StrideAPtr); // Attempt to prove strided accesses independent. if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy && @@ -1315,9 +1315,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4), // the minimum distance needed is 28, which is greater than distance. It is // not safe to do vectorization. - unsigned MinDistanceNeeded = + uint64_t MinDistanceNeeded = TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; - if (MinDistanceNeeded > Distance) { + if (MinDistanceNeeded > static_cast(Distance)) { DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance << '\n'); return Dependence::Backward; @@ -1347,7 +1347,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // is 8, which is less than 2 and forbidden vectorization, But actually // both A and B could be vectorized by 2 iterations. MaxSafeDepDistBytes = - Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes; + std::min(static_cast(Distance), MaxSafeDepDistBytes); bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && EnableForwardingConflictDetection && @@ -1365,7 +1365,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides) { - MaxSafeDepDistBytes = -1U; + MaxSafeDepDistBytes = -1; while (!CheckDeps.empty()) { MemAccessInfo CurAccess = *CheckDeps.begin(); @@ -1926,7 +1926,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, PtrRtChecking(llvm::make_unique(SE)), DepChecker(llvm::make_unique(*PSE, L)), TheLoop(L), DL(&DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), - MaxSafeDepDistBytes(-1U), CanVecMem(false), + MaxSafeDepDistBytes(-1), CanVecMem(false), StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) analyzeLoop(); @@ -1935,7 +1935,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { OS.indent(Depth) << "Memory dependences are safe"; - if (MaxSafeDepDistBytes != -1U) + if (MaxSafeDepDistBytes != -1ULL) OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes << " bytes"; if (PtrRtChecking->Need) diff --git a/test/Transforms/LoopDistribute/pr28443.ll b/test/Transforms/LoopDistribute/pr28443.ll new file mode 100644 index 00000000000..0b8839c8477 --- /dev/null +++ b/test/Transforms/LoopDistribute/pr28443.ll @@ -0,0 +1,36 @@ +; RUN: opt -basicaa -loop-distribute -verify-loop-info -verify-dom-info -S \ +; RUN: < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @fn1(i64 %a, i64* %b) { +entry: + br label %for.body + +for.body: + %add75.epil = phi i64 [ %add7.epil, %for.body ], [ %a, %entry ] + %add1.epil = add nsw i64 %add75.epil, 268435457 + %arrayidx.epil = getelementptr inbounds i64, i64* %b, i64 %add1.epil + %load = load i64, i64* %arrayidx.epil, align 8 + %add5.epil = add nsw i64 %add75.epil, 805306369 + %arrayidx6.epil = getelementptr inbounds i64, i64* %b, i64 %add5.epil + store i64 %load, i64* %arrayidx6.epil, align 8 + %add7.epil = add nsw i64 %add75.epil, 2 + %epil.iter.cmp = icmp eq i64 %add7.epil, 0 + br i1 %epil.iter.cmp, label %for.end, label %for.body + + ; CHECK: %[[phi:.*]] = phi i64 + ; CHECK: %[[add1:.*]] = add nsw i64 %[[phi]], 268435457 + ; CHECK: %[[gep1:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add1]] + ; CHECK: %[[load:.*]] = load i64, i64* %[[gep1]], align 8 + ; CHECK: %[[add2:.*]] = add nsw i64 %[[phi]], 805306369 + ; CHECK: %[[gep2:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add2]] + ; CHECK: store i64 %[[load]], i64* %[[gep2]], align 8 + ; CHECK: %[[incr:.*]] = add nsw i64 %[[phi]], 2 + ; CHECK: %[[cmp:.*]] = icmp eq i64 %[[incr]], 0 + ; CHECK: br i1 %[[cmp]] + +for.end: + ret void +}