1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[LoopAccessAnalysis] Fix an integer overflow

We were inappropriately using 32-bit types to account for quantities
that can be far larger.

Fixed in PR28443.

llvm-svn: 274737
This commit is contained in:
David Majnemer 2016-07-07 06:24:36 +00:00
parent 004e56339e
commit 5d1b0fc11f
3 changed files with 66 additions and 30 deletions

View File

@ -228,7 +228,7 @@ public:
/// \brief The maximum number of bytes of a vector register we can vectorize
/// the accesses safely with.
unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
/// \brief In same cases when the dependency check fails we can still
/// vectorize the loop with a dynamic array access check.
@ -284,7 +284,7 @@ private:
unsigned AccessIdx;
// We can access this many bytes in parallel safely.
unsigned MaxSafeDepDistBytes;
uint64_t MaxSafeDepDistBytes;
/// \brief If we see a non-constant dependence distance we can still try to
/// vectorize this loop with runtime checks.
@ -324,7 +324,7 @@ private:
///
/// \return false if we shouldn't vectorize at all or avoid larger
/// vectorization factors by limiting MaxSafeDepDistBytes.
bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
};
/// \brief Holds information about the memory runtime legality checks to verify
@ -575,7 +575,7 @@ public:
/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V) const;
unsigned getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
unsigned getNumStores() const { return NumStores; }
unsigned getNumLoads() const { return NumLoads;}
@ -672,7 +672,7 @@ private:
unsigned NumLoads;
unsigned NumStores;
unsigned MaxSafeDepDistBytes;
uint64_t MaxSafeDepDistBytes;
/// \brief Cache the result of analyzeLoop.
bool CanVecMem;
@ -719,9 +719,9 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// to \p PtrToStride and therefore add further predicates to \p PSE.
/// The \p Assume parameter indicates if we are allowed to make additional
/// run-time assumptions.
int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap = ValueToValueMap(),
bool Assume = false);
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap = ValueToValueMap(),
bool Assume = false);
/// \brief Returns true if the memory operations \p A and \p B are consecutive.
/// This is a simple API that does not depend on the analysis pass.

View File

@ -575,7 +575,7 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
int Stride = getPtrStride(PSE, Ptr, L, Strides);
int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
return Stride == 1;
}
@ -866,9 +866,9 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}
/// \brief Check whether the access through \p Ptr has a constant stride.
int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
const Loop *Lp, const ValueToValueMap &StridesMap,
bool Assume) {
int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
const Loop *Lp, const ValueToValueMap &StridesMap,
bool Assume) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
@ -1097,8 +1097,8 @@ bool MemoryDepChecker::Dependence::isForward() const {
llvm_unreachable("unexpected DepType!");
}
bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
unsigned TypeByteSize) {
bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
uint64_t TypeByteSize) {
// If loads occur at a distance that is not a multiple of a feasible vector
// factor store-load forwarding does not take place.
// Positive dependences might cause troubles because vectorizing them might
@ -1111,13 +1111,13 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
// After this many iterations store-to-load forwarding conflicts should not
// cause any slowdowns.
const unsigned NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
// Maximum vector factor.
unsigned MaxVFWithoutSLForwardIssues = std::min(
uint64_t MaxVFWithoutSLForwardIssues = std::min(
VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
// Compute the smallest VF at which the store and load would be misaligned.
for (unsigned VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
VF *= 2) {
// If the number of vector iteration between the store and the load are
// small we could incur conflicts.
@ -1145,8 +1145,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
/// bytes.
///
/// \returns true if they are independent.
static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
unsigned TypeByteSize) {
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
uint64_t TypeByteSize) {
assert(Stride > 1 && "The stride must be greater than 1");
assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
assert(Distance > 0 && "The distance must be non-zero");
@ -1155,7 +1155,7 @@ static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
if (Distance % TypeByteSize)
return false;
unsigned ScaledDist = Distance / TypeByteSize;
uint64_t ScaledDist = Distance / TypeByteSize;
// No dependence if the scaled distance is not multiple of the stride.
// E.g.
@ -1196,8 +1196,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
BPtr->getType()->getPointerAddressSpace())
return Dependence::Unknown;
int StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
int StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
@ -1237,11 +1237,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
Type *ATy = APtr->getType()->getPointerElementType();
Type *BTy = BPtr->getType()->getPointerElementType();
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
const APInt &Val = C->getAPInt();
int64_t Distance = Val.getSExtValue();
unsigned Stride = std::abs(StrideAPtr);
uint64_t Stride = std::abs(StrideAPtr);
// Attempt to prove strided accesses independent.
if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
@ -1315,9 +1315,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
// the minimum distance needed is 28, which is greater than distance. It is
// not safe to do vectorization.
unsigned MinDistanceNeeded =
uint64_t MinDistanceNeeded =
TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
if (MinDistanceNeeded > Distance) {
if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
<< '\n');
return Dependence::Backward;
@ -1347,7 +1347,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// is 8, which is less than 2 and forbidden vectorization, But actually
// both A and B could be vectorized by 2 iterations.
MaxSafeDepDistBytes =
Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
@ -1365,7 +1365,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
MemAccessInfoSet &CheckDeps,
const ValueToValueMap &Strides) {
MaxSafeDepDistBytes = -1U;
MaxSafeDepDistBytes = -1;
while (!CheckDeps.empty()) {
MemAccessInfo CurAccess = *CheckDeps.begin();
@ -1926,7 +1926,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
DL(&DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
MaxSafeDepDistBytes(-1), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
if (canAnalyzeLoop())
analyzeLoop();
@ -1935,7 +1935,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (CanVecMem) {
OS.indent(Depth) << "Memory dependences are safe";
if (MaxSafeDepDistBytes != -1U)
if (MaxSafeDepDistBytes != -1ULL)
OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
<< " bytes";
if (PtrRtChecking->Need)

View File

@ -0,0 +1,36 @@
; RUN: opt -basicaa -loop-distribute -verify-loop-info -verify-dom-info -S \
; RUN: < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @fn1(i64 %a, i64* %b) {
entry:
br label %for.body
for.body:
%add75.epil = phi i64 [ %add7.epil, %for.body ], [ %a, %entry ]
%add1.epil = add nsw i64 %add75.epil, 268435457
%arrayidx.epil = getelementptr inbounds i64, i64* %b, i64 %add1.epil
%load = load i64, i64* %arrayidx.epil, align 8
%add5.epil = add nsw i64 %add75.epil, 805306369
%arrayidx6.epil = getelementptr inbounds i64, i64* %b, i64 %add5.epil
store i64 %load, i64* %arrayidx6.epil, align 8
%add7.epil = add nsw i64 %add75.epil, 2
%epil.iter.cmp = icmp eq i64 %add7.epil, 0
br i1 %epil.iter.cmp, label %for.end, label %for.body
; CHECK: %[[phi:.*]] = phi i64
; CHECK: %[[add1:.*]] = add nsw i64 %[[phi]], 268435457
; CHECK: %[[gep1:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add1]]
; CHECK: %[[load:.*]] = load i64, i64* %[[gep1]], align 8
; CHECK: %[[add2:.*]] = add nsw i64 %[[phi]], 805306369
; CHECK: %[[gep2:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add2]]
; CHECK: store i64 %[[load]], i64* %[[gep2]], align 8
; CHECK: %[[incr:.*]] = add nsw i64 %[[phi]], 2
; CHECK: %[[cmp:.*]] = icmp eq i64 %[[incr]], 0
; CHECK: br i1 %[[cmp]]
for.end:
ret void
}