mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[DA] Delinearization of fixed-size multi-dimensional arrays
Summary: Currently the dependence analysis in LLVM is unable to compute accurate dependence vectors for multi-dimensional fixed size arrays. This is mainly because the delinearization algorithm in scalar evolution relies on parametric terms to be present in the access functions. In the case of fixed size arrays such parametric terms are not present, but we can use the indexes from GEP instructions to recover the subscripts for each dimension of the arrays. This patch adds this ability under the existing option `-da-disable-delinearization-checks`. Authored By: bmahjour Reviewer: Meinersbur, sebpop, fhahn, dmgreen, grosser, etiotto, bollu Reviewed By: Meinersbur Subscribers: hiraditya, arphaman, Whitney, ppc-slack, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D72178
This commit is contained in:
parent
a5ed19f7c7
commit
4af1e9e981
@ -924,10 +924,28 @@ template <typename T> class ArrayRef;
|
|||||||
void updateDirection(Dependence::DVEntry &Level,
|
void updateDirection(Dependence::DVEntry &Level,
|
||||||
const Constraint &CurConstraint) const;
|
const Constraint &CurConstraint) const;
|
||||||
|
|
||||||
|
/// Given a linear access function, tries to recover subscripts
|
||||||
|
/// for each dimension of the array element access.
|
||||||
bool tryDelinearize(Instruction *Src, Instruction *Dst,
|
bool tryDelinearize(Instruction *Src, Instruction *Dst,
|
||||||
SmallVectorImpl<Subscript> &Pair);
|
SmallVectorImpl<Subscript> &Pair);
|
||||||
|
|
||||||
private:
|
/// Tries to delinearize access function for a fixed size multi-dimensional
|
||||||
|
/// array, by deriving subscripts from GEP instructions. Returns true upon
|
||||||
|
/// success and false otherwise.
|
||||||
|
bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst,
|
||||||
|
const SCEV *SrcAccessFn,
|
||||||
|
const SCEV *DstAccessFn,
|
||||||
|
SmallVectorImpl<const SCEV *> &SrcSubscripts,
|
||||||
|
SmallVectorImpl<const SCEV *> &DstSubscripts);
|
||||||
|
|
||||||
|
/// Tries to delinearize access function for a multi-dimensional array with
|
||||||
|
/// symbolic runtime sizes.
|
||||||
|
/// Returns true upon success and false otherwise.
|
||||||
|
bool tryDelinearizeParametricSize(
|
||||||
|
Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
|
||||||
|
const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
|
||||||
|
SmallVectorImpl<const SCEV *> &DstSubscripts);
|
||||||
|
|
||||||
/// checkSubscript - Helper function for checkSrcSubscript and
|
/// checkSubscript - Helper function for checkSrcSubscript and
|
||||||
/// checkDstSubscript to avoid duplicate code
|
/// checkDstSubscript to avoid duplicate code
|
||||||
bool checkSubscript(const SCEV *Expr, const Loop *LoopNest,
|
bool checkSubscript(const SCEV *Expr, const Loop *LoopNest,
|
||||||
|
@ -3264,16 +3264,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
|
|||||||
assert(isLoadOrStore(Dst) && "instruction is not load or store");
|
assert(isLoadOrStore(Dst) && "instruction is not load or store");
|
||||||
Value *SrcPtr = getLoadStorePointerOperand(Src);
|
Value *SrcPtr = getLoadStorePointerOperand(Src);
|
||||||
Value *DstPtr = getLoadStorePointerOperand(Dst);
|
Value *DstPtr = getLoadStorePointerOperand(Dst);
|
||||||
|
|
||||||
Loop *SrcLoop = LI->getLoopFor(Src->getParent());
|
Loop *SrcLoop = LI->getLoopFor(Src->getParent());
|
||||||
Loop *DstLoop = LI->getLoopFor(Dst->getParent());
|
Loop *DstLoop = LI->getLoopFor(Dst->getParent());
|
||||||
|
const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);
|
||||||
// Below code mimics the code in Delinearization.cpp
|
const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);
|
||||||
const SCEV *SrcAccessFn =
|
|
||||||
SE->getSCEVAtScope(SrcPtr, SrcLoop);
|
|
||||||
const SCEV *DstAccessFn =
|
|
||||||
SE->getSCEVAtScope(DstPtr, DstLoop);
|
|
||||||
|
|
||||||
const SCEVUnknown *SrcBase =
|
const SCEVUnknown *SrcBase =
|
||||||
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
|
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
|
||||||
const SCEVUnknown *DstBase =
|
const SCEVUnknown *DstBase =
|
||||||
@ -3282,6 +3276,123 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
|
|||||||
if (!SrcBase || !DstBase || SrcBase != DstBase)
|
if (!SrcBase || !DstBase || SrcBase != DstBase)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
|
||||||
|
|
||||||
|
if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
|
||||||
|
SrcSubscripts, DstSubscripts) &&
|
||||||
|
!tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
|
||||||
|
SrcSubscripts, DstSubscripts))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int Size = SrcSubscripts.size();
|
||||||
|
LLVM_DEBUG({
|
||||||
|
dbgs() << "\nSrcSubscripts: ";
|
||||||
|
for (int I = 0; I < Size; I++)
|
||||||
|
dbgs() << *SrcSubscripts[I];
|
||||||
|
dbgs() << "\nDstSubscripts: ";
|
||||||
|
for (int I = 0; I < Size; I++)
|
||||||
|
dbgs() << *DstSubscripts[I];
|
||||||
|
});
|
||||||
|
|
||||||
|
// The delinearization transforms a single-subscript MIV dependence test into
|
||||||
|
// a multi-subscript SIV dependence test that is easier to compute. So we
|
||||||
|
// resize Pair to contain as many pairs of subscripts as the delinearization
|
||||||
|
// has found, and then initialize the pairs following the delinearization.
|
||||||
|
Pair.resize(Size);
|
||||||
|
for (int I = 0; I < Size; ++I) {
|
||||||
|
Pair[I].Src = SrcSubscripts[I];
|
||||||
|
Pair[I].Dst = DstSubscripts[I];
|
||||||
|
unifySubscriptType(&Pair[I]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DependenceInfo::tryDelinearizeFixedSize(
|
||||||
|
Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
|
||||||
|
const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
|
||||||
|
SmallVectorImpl<const SCEV *> &DstSubscripts) {
|
||||||
|
|
||||||
|
// In general we cannot safely assume that the subscripts recovered from GEPs
|
||||||
|
// are in the range of values defined for their corresponding array
|
||||||
|
// dimensions. For example some C language usage/interpretation make it
|
||||||
|
// impossible to verify this at compile-time. As such we give up here unless
|
||||||
|
// we can assume that the subscripts do not overlap into neighboring
|
||||||
|
// dimensions and that the number of dimensions matches the number of
|
||||||
|
// subscripts being recovered.
|
||||||
|
if (!DisableDelinearizationChecks)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Value *SrcPtr = getLoadStorePointerOperand(Src);
|
||||||
|
Value *DstPtr = getLoadStorePointerOperand(Dst);
|
||||||
|
const SCEVUnknown *SrcBase =
|
||||||
|
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
|
||||||
|
const SCEVUnknown *DstBase =
|
||||||
|
dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
|
||||||
|
assert(SrcBase && DstBase && SrcBase == DstBase &&
|
||||||
|
"expected src and dst scev unknowns to be equal");
|
||||||
|
|
||||||
|
// Check the simple case where the array dimensions are fixed size.
|
||||||
|
auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
|
||||||
|
auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr);
|
||||||
|
if (!SrcGEP || !DstGEP)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
SmallVector<int, 4> SrcSizes, DstSizes;
|
||||||
|
SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes);
|
||||||
|
SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes);
|
||||||
|
|
||||||
|
// Check that the two size arrays are non-empty and equal in length and
|
||||||
|
// value.
|
||||||
|
if (SrcSizes.empty() || SrcSubscripts.size() <= 1 ||
|
||||||
|
SrcSizes.size() != DstSizes.size() ||
|
||||||
|
!std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
|
||||||
|
SrcSubscripts.clear();
|
||||||
|
DstSubscripts.clear();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *SrcBasePtr = SrcGEP->getOperand(0);
|
||||||
|
Value *DstBasePtr = DstGEP->getOperand(0);
|
||||||
|
while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr))
|
||||||
|
SrcBasePtr = PCast->getOperand(0);
|
||||||
|
while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr))
|
||||||
|
DstBasePtr = PCast->getOperand(0);
|
||||||
|
|
||||||
|
// Check that for identical base pointers we do not miss index offsets
|
||||||
|
// that have been added before this GEP is applied.
|
||||||
|
if (SrcBasePtr == SrcBase->getValue() && DstBasePtr == DstBase->getValue()) {
|
||||||
|
assert(SrcSubscripts.size() == DstSubscripts.size() &&
|
||||||
|
SrcSubscripts.size() == SrcSizes.size() + 1 &&
|
||||||
|
"Expected equal number of entries in the list of sizes and "
|
||||||
|
"subscripts.");
|
||||||
|
LLVM_DEBUG({
|
||||||
|
dbgs() << "Delinearized subscripts of fixed-size array\n"
|
||||||
|
<< "SrcGEP:" << *SrcGEP << "\n"
|
||||||
|
<< "DstGEP:" << *DstGEP << "\n";
|
||||||
|
});
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
SrcSubscripts.clear();
|
||||||
|
DstSubscripts.clear();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DependenceInfo::tryDelinearizeParametricSize(
|
||||||
|
Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
|
||||||
|
const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
|
||||||
|
SmallVectorImpl<const SCEV *> &DstSubscripts) {
|
||||||
|
|
||||||
|
Value *SrcPtr = getLoadStorePointerOperand(Src);
|
||||||
|
Value *DstPtr = getLoadStorePointerOperand(Dst);
|
||||||
|
const SCEVUnknown *SrcBase =
|
||||||
|
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
|
||||||
|
const SCEVUnknown *DstBase =
|
||||||
|
dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
|
||||||
|
assert(SrcBase && DstBase && SrcBase == DstBase &&
|
||||||
|
"expected src and dst scev unknowns to be equal");
|
||||||
|
|
||||||
const SCEV *ElementSize = SE->getElementSize(Src);
|
const SCEV *ElementSize = SE->getElementSize(Src);
|
||||||
if (ElementSize != SE->getElementSize(Dst))
|
if (ElementSize != SE->getElementSize(Dst))
|
||||||
return false;
|
return false;
|
||||||
@ -3304,7 +3415,6 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
|
|||||||
SE->findArrayDimensions(Terms, Sizes, ElementSize);
|
SE->findArrayDimensions(Terms, Sizes, ElementSize);
|
||||||
|
|
||||||
// Third step: compute the access functions for each subscript.
|
// Third step: compute the access functions for each subscript.
|
||||||
SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
|
|
||||||
SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
|
SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
|
||||||
SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
|
SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
|
||||||
|
|
||||||
@ -3313,7 +3423,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
|
|||||||
SrcSubscripts.size() != DstSubscripts.size())
|
SrcSubscripts.size() != DstSubscripts.size())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int size = SrcSubscripts.size();
|
size_t Size = SrcSubscripts.size();
|
||||||
|
|
||||||
// Statically check that the array bounds are in-range. The first subscript we
|
// Statically check that the array bounds are in-range. The first subscript we
|
||||||
// don't have a size for and it cannot overflow into another subscript, so is
|
// don't have a size for and it cannot overflow into another subscript, so is
|
||||||
@ -3322,40 +3432,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
|
|||||||
// FIXME: It may be better to record these sizes and add them as constraints
|
// FIXME: It may be better to record these sizes and add them as constraints
|
||||||
// to the dependency checks.
|
// to the dependency checks.
|
||||||
if (!DisableDelinearizationChecks)
|
if (!DisableDelinearizationChecks)
|
||||||
for (int i = 1; i < size; ++i) {
|
for (size_t I = 1; I < Size; ++I) {
|
||||||
if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
|
if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
|
if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
|
if (!isKnownNonNegative(DstSubscripts[I], DstPtr))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
|
if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1]))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVM_DEBUG({
|
|
||||||
dbgs() << "\nSrcSubscripts: ";
|
|
||||||
for (int i = 0; i < size; i++)
|
|
||||||
dbgs() << *SrcSubscripts[i];
|
|
||||||
dbgs() << "\nDstSubscripts: ";
|
|
||||||
for (int i = 0; i < size; i++)
|
|
||||||
dbgs() << *DstSubscripts[i];
|
|
||||||
});
|
|
||||||
|
|
||||||
// The delinearization transforms a single-subscript MIV dependence test into
|
|
||||||
// a multi-subscript SIV dependence test that is easier to compute. So we
|
|
||||||
// resize Pair to contain as many pairs of subscripts as the delinearization
|
|
||||||
// has found, and then initialize the pairs following the delinearization.
|
|
||||||
Pair.resize(size);
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
Pair[i].Src = SrcSubscripts[i];
|
|
||||||
Pair[i].Dst = DstSubscripts[i];
|
|
||||||
unifySubscriptType(&Pair[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,106 @@
|
|||||||
|
; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
|
||||||
|
; RUN: -da-disable-delinearization-checks | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-apple-macosx10.6.0"
|
||||||
|
|
||||||
|
;; for (long int i = 0; i < n; i++) {
|
||||||
|
;; for (long int j = 0; j < n; j++) {
|
||||||
|
;; for (long int k = 0; k < n; k++) {
|
||||||
|
;; A[i][j][k] = i;
|
||||||
|
;; }
|
||||||
|
;; for (long int k = 0; k < n; k++) {
|
||||||
|
;; *B++ = A[i + 3][j + 2][k + 1];
|
||||||
|
|
||||||
|
define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp {
|
||||||
|
entry:
|
||||||
|
%cmp10 = icmp sgt i64 %n, 0
|
||||||
|
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
|
||||||
|
|
||||||
|
; CHECK-LABEL: p2
|
||||||
|
; CHECK: da analyze - none!
|
||||||
|
; CHECK: da analyze - flow [-3 -2]!
|
||||||
|
; CHECK: da analyze - confused!
|
||||||
|
; CHECK: da analyze - none!
|
||||||
|
; CHECK: da analyze - confused!
|
||||||
|
; CHECK: da analyze - output [* * *]!
|
||||||
|
|
||||||
|
for.cond1.preheader.preheader: ; preds = %entry
|
||||||
|
br label %for.cond1.preheader
|
||||||
|
|
||||||
|
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
|
||||||
|
%B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %for.cond1.preheader.preheader ]
|
||||||
|
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
|
||||||
|
%cmp26 = icmp sgt i64 %n, 0
|
||||||
|
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
|
||||||
|
|
||||||
|
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
|
||||||
|
br label %for.cond4.preheader
|
||||||
|
|
||||||
|
for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
|
||||||
|
%B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond4.preheader.preheader ]
|
||||||
|
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
|
||||||
|
%cmp51 = icmp sgt i64 %n, 0
|
||||||
|
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
|
||||||
|
|
||||||
|
for.body6.preheader: ; preds = %for.cond4.preheader
|
||||||
|
br label %for.body6
|
||||||
|
|
||||||
|
for.body6: ; preds = %for.body6.preheader, %for.body6
|
||||||
|
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
|
||||||
|
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02
|
||||||
|
store i64 %i.011, i64* %arrayidx8, align 8
|
||||||
|
%inc = add nsw i64 %k.02, 1
|
||||||
|
%exitcond13 = icmp ne i64 %inc, %n
|
||||||
|
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
|
||||||
|
|
||||||
|
for.cond10.loopexit.loopexit: ; preds = %for.body6
|
||||||
|
br label %for.cond10.loopexit
|
||||||
|
|
||||||
|
for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
|
||||||
|
%cmp113 = icmp sgt i64 %n, 0
|
||||||
|
br i1 %cmp113, label %for.body12.preheader, label %for.inc21
|
||||||
|
|
||||||
|
for.body12.preheader: ; preds = %for.cond10.loopexit
|
||||||
|
br label %for.body12
|
||||||
|
|
||||||
|
for.body12: ; preds = %for.body12.preheader, %for.body12
|
||||||
|
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
|
||||||
|
%B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.body12.preheader ]
|
||||||
|
%add = add nsw i64 %k9.05, 1
|
||||||
|
%add13 = add nsw i64 %j.07, 2
|
||||||
|
%add14 = add nsw i64 %i.011, 3
|
||||||
|
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add
|
||||||
|
%0 = load i64, i64* %arrayidx17, align 8
|
||||||
|
%incdec.ptr = getelementptr inbounds i64, i64* %B.addr.24, i64 1
|
||||||
|
store i64 %0, i64* %B.addr.24, align 8
|
||||||
|
%inc19 = add nsw i64 %k9.05, 1
|
||||||
|
%exitcond = icmp ne i64 %inc19, %n
|
||||||
|
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
|
||||||
|
|
||||||
|
for.inc21.loopexit: ; preds = %for.body12
|
||||||
|
%scevgep = getelementptr i64, i64* %B.addr.18, i64 %n
|
||||||
|
br label %for.inc21
|
||||||
|
|
||||||
|
for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
|
||||||
|
%B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %scevgep, %for.inc21.loopexit ]
|
||||||
|
%inc22 = add nsw i64 %j.07, 1
|
||||||
|
%exitcond14 = icmp ne i64 %inc22, %n
|
||||||
|
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
|
||||||
|
|
||||||
|
for.inc24.loopexit: ; preds = %for.inc21
|
||||||
|
%B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc21 ]
|
||||||
|
br label %for.inc24
|
||||||
|
|
||||||
|
for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
|
||||||
|
%B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc24.loopexit ]
|
||||||
|
%inc25 = add nsw i64 %i.011, 1
|
||||||
|
%exitcond15 = icmp ne i64 %inc25, %n
|
||||||
|
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
|
||||||
|
|
||||||
|
for.end26.loopexit: ; preds = %for.inc24
|
||||||
|
br label %for.end26
|
||||||
|
|
||||||
|
for.end26: ; preds = %for.end26.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
}
|
@ -0,0 +1,120 @@
|
|||||||
|
; RUN: opt < %s -disable-output -passes="print<da>" \
|
||||||
|
; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s
|
||||||
|
; RUN: opt < %s -da -analyze -da-disable-delinearization-checks | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK-LABEL: t1
|
||||||
|
; CHECK: da analyze - none!
|
||||||
|
; CHECK: da analyze - consistent anti [1 -2]!
|
||||||
|
; CHECK: da analyze - none!
|
||||||
|
|
||||||
|
;; #define N 1024
|
||||||
|
;; #define M 2048
|
||||||
|
;; void t1(int a[N][M]) {
|
||||||
|
;; for (int i = 0; i < N-1; ++i)
|
||||||
|
;; for (int j = 2; j < M; ++j)
|
||||||
|
;; a[i][j] = a[i+1][j-2];
|
||||||
|
;; }
|
||||||
|
|
||||||
|
define void @t1([2048 x i32]* %a) {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %entry, %for.inc11
|
||||||
|
%indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ]
|
||||||
|
br label %for.body4
|
||||||
|
|
||||||
|
for.body4: ; preds = %for.body, %for.body4
|
||||||
|
%indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ]
|
||||||
|
%0 = add nuw nsw i64 %indvars.iv4, 1
|
||||||
|
%1 = add nsw i64 %indvars.iv, -2
|
||||||
|
%arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1
|
||||||
|
%2 = load i32, i32* %arrayidx6, align 4
|
||||||
|
%arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %indvars.iv4, i64 %indvars.iv
|
||||||
|
store i32 %2, i32* %arrayidx10, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp ne i64 %indvars.iv.next, 2048
|
||||||
|
br i1 %exitcond, label %for.body4, label %for.inc11
|
||||||
|
|
||||||
|
for.inc11: ; preds = %for.body4
|
||||||
|
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
|
||||||
|
%exitcond7 = icmp ne i64 %indvars.iv.next5, 1023
|
||||||
|
br i1 %exitcond7, label %for.body, label %for.end13
|
||||||
|
|
||||||
|
for.end13: ; preds = %for.inc11
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; CHECK-LABEL: t2
|
||||||
|
; CHECK: da analyze - none!
|
||||||
|
; CHECK: da analyze - consistent anti [1 -2 0 -3 2]!
|
||||||
|
; CHECK: da analyze - none!
|
||||||
|
|
||||||
|
;; #define N 1024
|
||||||
|
;; #define M 2048
|
||||||
|
;; void t2(int a[][N][N][N][M]) {
|
||||||
|
;; for (int i1 = 0; i1 < N-1; ++i1)
|
||||||
|
;; for (int i2 = 2; i2 < N; ++i2)
|
||||||
|
;; for (int i3 = 0; i3 < N; ++i3)
|
||||||
|
;; for (int i4 = 3; i4 < N; ++i4)
|
||||||
|
;; for (int i5 = 0; i5 < M-2; ++i5)
|
||||||
|
;; a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2];
|
||||||
|
;; }
|
||||||
|
|
||||||
|
define void @t2([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %entry, %for.inc46
|
||||||
|
%indvars.iv18 = phi i64 [ 0, %entry ], [ %indvars.iv.next19, %for.inc46 ]
|
||||||
|
br label %for.body4
|
||||||
|
|
||||||
|
for.body4: ; preds = %for.body, %for.inc43
|
||||||
|
%indvars.iv14 = phi i64 [ 2, %for.body ], [ %indvars.iv.next15, %for.inc43 ]
|
||||||
|
br label %for.body8
|
||||||
|
|
||||||
|
for.body8: ; preds = %for.body4, %for.inc40
|
||||||
|
%indvars.iv11 = phi i64 [ 0, %for.body4 ], [ %indvars.iv.next12, %for.inc40 ]
|
||||||
|
br label %for.body12
|
||||||
|
|
||||||
|
for.body12: ; preds = %for.body8, %for.inc37
|
||||||
|
%indvars.iv7 = phi i64 [ 3, %for.body8 ], [ %indvars.iv.next8, %for.inc37 ]
|
||||||
|
br label %for.body16
|
||||||
|
|
||||||
|
for.body16: ; preds = %for.body12, %for.body16
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body12 ], [ %indvars.iv.next, %for.body16 ]
|
||||||
|
%0 = add nuw nsw i64 %indvars.iv18, 1
|
||||||
|
%1 = add nsw i64 %indvars.iv14, -2
|
||||||
|
%2 = add nsw i64 %indvars.iv7, -3
|
||||||
|
%3 = add nuw nsw i64 %indvars.iv, 2
|
||||||
|
%arrayidx26 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3
|
||||||
|
%4 = load i32, i32* %arrayidx26, align 4
|
||||||
|
%arrayidx36 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv
|
||||||
|
store i32 %4, i32* %arrayidx36, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp ne i64 %indvars.iv.next, 2046
|
||||||
|
br i1 %exitcond, label %for.body16, label %for.inc37
|
||||||
|
|
||||||
|
for.inc37: ; preds = %for.body16
|
||||||
|
%indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
|
||||||
|
%exitcond10 = icmp ne i64 %indvars.iv.next8, 1024
|
||||||
|
br i1 %exitcond10, label %for.body12, label %for.inc40
|
||||||
|
|
||||||
|
for.inc40: ; preds = %for.inc37
|
||||||
|
%indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
|
||||||
|
%exitcond13 = icmp ne i64 %indvars.iv.next12, 1024
|
||||||
|
br i1 %exitcond13, label %for.body8, label %for.inc43
|
||||||
|
|
||||||
|
for.inc43: ; preds = %for.inc40
|
||||||
|
%indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1
|
||||||
|
%exitcond17 = icmp ne i64 %indvars.iv.next15, 1024
|
||||||
|
br i1 %exitcond17, label %for.body4, label %for.inc46
|
||||||
|
|
||||||
|
for.inc46: ; preds = %for.inc43
|
||||||
|
%indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
|
||||||
|
%exitcond21 = icmp ne i64 %indvars.iv.next19, 1023
|
||||||
|
br i1 %exitcond21, label %for.body, label %for.end48
|
||||||
|
|
||||||
|
for.end48: ; preds = %for.inc46
|
||||||
|
ret void
|
||||||
|
}
|
@ -2,6 +2,11 @@
|
|||||||
; RUN: -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
|
; RUN: -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
|
||||||
; RUN: FileCheck --input-file=%t %s
|
; RUN: FileCheck --input-file=%t %s
|
||||||
|
|
||||||
|
; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' \
|
||||||
|
; RUN: -da-disable-delinearization-checks -pass-remarks-output=%t \
|
||||||
|
; RUN: -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
|
||||||
|
; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s
|
||||||
|
|
||||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
target triple = "x86_64-unknown-linux-gnu"
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
@ -16,13 +21,14 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||||||
;; for(int j=1;j<N-1;j++)
|
;; for(int j=1;j<N-1;j++)
|
||||||
;; A[j+1][i+1] = A[j+1][i+1] + k;
|
;; A[j+1][i+1] = A[j+1][i+1] + k;
|
||||||
|
|
||||||
; FIXME: Currently fails because of DA changes.
|
|
||||||
; IR-LABEL: @interchange_01
|
; IR-LABEL: @interchange_01
|
||||||
; IR-NOT: split
|
; IR-NOT: split
|
||||||
|
|
||||||
; CHECK: Name: Dependence
|
; CHECK: Name: Dependence
|
||||||
; CHECK-NEXT: Function: interchange_01
|
; CHECK-NEXT: Function: interchange_01
|
||||||
|
|
||||||
|
; DELIN: Name: UnsupportedInsBetweenInduction
|
||||||
|
; DELIN-NEXT: Function: interchange_01
|
||||||
define void @interchange_01(i32 %k, i32 %N) {
|
define void @interchange_01(i32 %k, i32 %N) {
|
||||||
entry:
|
entry:
|
||||||
%sub = add nsw i32 %N, -1
|
%sub = add nsw i32 %N, -1
|
||||||
|
@ -5,6 +5,11 @@
|
|||||||
; RUN: -pass-remarks='loop-interchange' -S
|
; RUN: -pass-remarks='loop-interchange' -S
|
||||||
; RUN: cat %t | FileCheck %s
|
; RUN: cat %t | FileCheck %s
|
||||||
|
|
||||||
|
; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \
|
||||||
|
; RUN: -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
|
||||||
|
; RUN: -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks
|
||||||
|
; RUN: cat %t | FileCheck --check-prefix=DELIN %s
|
||||||
|
|
||||||
@A = common global [100 x [100 x i32]] zeroinitializer
|
@A = common global [100 x [100 x i32]] zeroinitializer
|
||||||
@B = common global [100 x [100 x i32]] zeroinitializer
|
@B = common global [100 x [100 x i32]] zeroinitializer
|
||||||
@C = common global [100 x i32] zeroinitializer
|
@C = common global [100 x i32] zeroinitializer
|
||||||
@ -61,6 +66,18 @@ for.end19:
|
|||||||
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
|
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
|
||||||
; CHECK-NEXT: ...
|
; CHECK-NEXT: ...
|
||||||
|
|
||||||
|
; DELIN: --- !Missed
|
||||||
|
; DELIN-NEXT: Pass: loop-interchange
|
||||||
|
; DELIN-NEXT: Name: InterchangeNotProfitable
|
||||||
|
; DELIN-NEXT: Function: test01
|
||||||
|
; DELIN-NEXT: Args:
|
||||||
|
; DELIN-NEXT: - String: 'Interchanging loops is too costly (cost='
|
||||||
|
; DELIN-NEXT: - Cost: '2'
|
||||||
|
; DELIN-NEXT: - String: ', threshold='
|
||||||
|
; DELIN-NEXT: - Threshold: '0'
|
||||||
|
; DELIN-NEXT: - String: ') and it does not improve parallelism.'
|
||||||
|
; DELIN-NEXT: ...
|
||||||
|
|
||||||
;;--------------------------------------Test case 02------------------------------------
|
;;--------------------------------------Test case 02------------------------------------
|
||||||
;; [FIXME] This loop though valid is currently not interchanged due to the
|
;; [FIXME] This loop though valid is currently not interchanged due to the
|
||||||
;; limitation that we cannot split the inner loop latch due to multiple use of inner induction
|
;; limitation that we cannot split the inner loop latch due to multiple use of inner induction
|
||||||
@ -113,6 +130,14 @@ define void @test02(i32 %k, i32 %N) {
|
|||||||
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
|
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
|
||||||
; CHECK-NEXT: ...
|
; CHECK-NEXT: ...
|
||||||
|
|
||||||
|
; DELIN: --- !Missed
|
||||||
|
; DELIN-NEXT: Pass: loop-interchange
|
||||||
|
; DELIN-NEXT: Name: UnsupportedInsBetweenInduction
|
||||||
|
; DELIN-NEXT: Function: test02
|
||||||
|
; DELIN-NEXT: Args:
|
||||||
|
; DELIN-NEXT: - String: Found unsupported instruction between induction variable increment and branch.
|
||||||
|
; DELIN-NEXT: ...
|
||||||
|
|
||||||
;;-----------------------------------Test case 03-------------------------------
|
;;-----------------------------------Test case 03-------------------------------
|
||||||
;; Test to make sure we can handle output dependencies.
|
;; Test to make sure we can handle output dependencies.
|
||||||
;;
|
;;
|
||||||
@ -161,6 +186,14 @@ for.body4: ; preds = %for.body4, %for.con
|
|||||||
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
|
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
|
||||||
; CHECK-NEXT: ...
|
; CHECK-NEXT: ...
|
||||||
|
|
||||||
|
; DELIN: --- !Passed
|
||||||
|
; DELIN-NEXT: Pass: loop-interchange
|
||||||
|
; DELIN-NEXT: Name: Interchanged
|
||||||
|
; DELIN-NEXT: Function: test03
|
||||||
|
; DELIN-NEXT: Args:
|
||||||
|
; DELIN-NEXT: - String: Loop interchanged with enclosing loop.
|
||||||
|
; DELIN-NEXT: ...
|
||||||
|
|
||||||
;;--------------------------------------Test case 04-------------------------------------
|
;;--------------------------------------Test case 04-------------------------------------
|
||||||
;; Loops not tightly nested are not interchanged
|
;; Loops not tightly nested are not interchanged
|
||||||
;; for(int j=0;j<N;j++) {
|
;; for(int j=0;j<N;j++) {
|
||||||
@ -215,3 +248,11 @@ for.end17:
|
|||||||
; CHECK-NEXT: Args:
|
; CHECK-NEXT: Args:
|
||||||
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
|
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
|
||||||
; CHECK-NEXT: ...
|
; CHECK-NEXT: ...
|
||||||
|
|
||||||
|
; DELIN: --- !Missed
|
||||||
|
; DELIN-NEXT: Pass: loop-interchange
|
||||||
|
; DELIN-NEXT: Name: NotTightlyNested
|
||||||
|
; DELIN-NEXT: Function: test04
|
||||||
|
; DELIN-NEXT: Args:
|
||||||
|
; DELIN-NEXT: - String: Cannot interchange loops because they are not tightly nested.
|
||||||
|
; DELIN-NEXT: ...
|
||||||
|
@ -2,6 +2,11 @@
|
|||||||
; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange
|
; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange
|
||||||
; RUN: FileCheck -input-file %t %s
|
; RUN: FileCheck -input-file %t %s
|
||||||
|
|
||||||
|
; RUN: opt < %s -loop-interchange -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
|
||||||
|
; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange \
|
||||||
|
; RUN: -da-disable-delinearization-checks
|
||||||
|
; RUN: FileCheck --check-prefix=DELIN -input-file %t %s
|
||||||
|
|
||||||
;; We test profitability model in these test cases.
|
;; We test profitability model in these test cases.
|
||||||
|
|
||||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
@ -15,10 +20,13 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||||||
;; for(int i=1;i<100;i++)
|
;; for(int i=1;i<100;i++)
|
||||||
;; for(int j=1;j<100;j++)
|
;; for(int j=1;j<100;j++)
|
||||||
;; A[j][i] = A[j - 1][i] + B[j][i];
|
;; A[j][i] = A[j - 1][i] + B[j][i];
|
||||||
;; FIXME: DA misses this case after D35430
|
|
||||||
|
|
||||||
; CHECK: Name: Dependence
|
; CHECK: Name: Dependence
|
||||||
; CHECK-NEXT: Function: interchange_01
|
; CHECK-NEXT: Function: interchange_01
|
||||||
|
|
||||||
|
; DELIN: Name: Interchanged
|
||||||
|
; DELIN-NEXT: Function: interchange_01
|
||||||
|
|
||||||
define void @interchange_01() {
|
define void @interchange_01() {
|
||||||
entry:
|
entry:
|
||||||
br label %for2.preheader
|
br label %for2.preheader
|
||||||
|
Loading…
x
Reference in New Issue
Block a user