1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[DA] Delinearization of fixed-size multi-dimensional arrays

Summary:
Currently the dependence analysis in LLVM is unable to compute accurate
dependence vectors for multi-dimensional fixed size arrays.
This is mainly because the delinearization algorithm in scalar evolution
relies on parametric terms to be present in the access functions. In the
case of fixed size arrays such parametric terms are not present, but we
can use the indexes from GEP instructions to recover the subscripts for
each dimension of the arrays. This patch adds this ability under the
existing option `-da-disable-delinearization-checks`.

Authored By: bmahjour

Reviewer: Meinersbur, sebpop, fhahn, dmgreen, grosser, etiotto, bollu

Reviewed By: Meinersbur

Subscribers: hiraditya, arphaman, Whitney, ppc-slack, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72178
This commit is contained in:
Bardia Mahjour 2020-02-26 12:51:58 -05:00
parent a5ed19f7c7
commit 4af1e9e981
7 changed files with 427 additions and 38 deletions

View File

@ -924,10 +924,28 @@ template <typename T> class ArrayRef;
void updateDirection(Dependence::DVEntry &Level,
const Constraint &CurConstraint) const;
/// Given a linear access function, tries to recover subscripts
/// for each dimension of the array element access.
bool tryDelinearize(Instruction *Src, Instruction *Dst,
SmallVectorImpl<Subscript> &Pair);
private:
/// Tries to delinearize access function for a fixed size multi-dimensional
/// array, by deriving subscripts from GEP instructions. Returns true upon
/// success and false otherwise.
bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst,
const SCEV *SrcAccessFn,
const SCEV *DstAccessFn,
SmallVectorImpl<const SCEV *> &SrcSubscripts,
SmallVectorImpl<const SCEV *> &DstSubscripts);
/// Tries to delinearize access function for a multi-dimensional array with
/// symbolic runtime sizes.
/// Returns true upon success and false otherwise.
bool tryDelinearizeParametricSize(
Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
SmallVectorImpl<const SCEV *> &DstSubscripts);
/// checkSubscript - Helper function for checkSrcSubscript and
/// checkDstSubscript to avoid duplicate code
bool checkSubscript(const SCEV *Expr, const Loop *LoopNest,

View File

@ -3264,16 +3264,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
assert(isLoadOrStore(Dst) && "instruction is not load or store");
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
Loop *SrcLoop = LI->getLoopFor(Src->getParent());
Loop *DstLoop = LI->getLoopFor(Dst->getParent());
// Below code mimics the code in Delinearization.cpp
const SCEV *SrcAccessFn =
SE->getSCEVAtScope(SrcPtr, SrcLoop);
const SCEV *DstAccessFn =
SE->getSCEVAtScope(DstPtr, DstLoop);
const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);
const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);
const SCEVUnknown *SrcBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
const SCEVUnknown *DstBase =
@ -3282,6 +3276,123 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
if (!SrcBase || !DstBase || SrcBase != DstBase)
return false;
SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
SrcSubscripts, DstSubscripts) &&
!tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
SrcSubscripts, DstSubscripts))
return false;
int Size = SrcSubscripts.size();
LLVM_DEBUG({
dbgs() << "\nSrcSubscripts: ";
for (int I = 0; I < Size; I++)
dbgs() << *SrcSubscripts[I];
dbgs() << "\nDstSubscripts: ";
for (int I = 0; I < Size; I++)
dbgs() << *DstSubscripts[I];
});
// The delinearization transforms a single-subscript MIV dependence test into
// a multi-subscript SIV dependence test that is easier to compute. So we
// resize Pair to contain as many pairs of subscripts as the delinearization
// has found, and then initialize the pairs following the delinearization.
Pair.resize(Size);
for (int I = 0; I < Size; ++I) {
Pair[I].Src = SrcSubscripts[I];
Pair[I].Dst = DstSubscripts[I];
unifySubscriptType(&Pair[I]);
}
return true;
}
bool DependenceInfo::tryDelinearizeFixedSize(
Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
SmallVectorImpl<const SCEV *> &DstSubscripts) {
// In general we cannot safely assume that the subscripts recovered from GEPs
// are in the range of values defined for their corresponding array
// dimensions. For example some C language usage/interpretation make it
// impossible to verify this at compile-time. As such we give up here unless
// we can assume that the subscripts do not overlap into neighboring
// dimensions and that the number of dimensions matches the number of
// subscripts being recovered.
if (!DisableDelinearizationChecks)
return false;
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
const SCEVUnknown *SrcBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
const SCEVUnknown *DstBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
assert(SrcBase && DstBase && SrcBase == DstBase &&
"expected src and dst scev unknowns to be equal");
// Check the simple case where the array dimensions are fixed size.
auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr);
if (!SrcGEP || !DstGEP)
return false;
SmallVector<int, 4> SrcSizes, DstSizes;
SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes);
SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes);
// Check that the two size arrays are non-empty and equal in length and
// value.
if (SrcSizes.empty() || SrcSubscripts.size() <= 1 ||
SrcSizes.size() != DstSizes.size() ||
!std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
SrcSubscripts.clear();
DstSubscripts.clear();
return false;
}
Value *SrcBasePtr = SrcGEP->getOperand(0);
Value *DstBasePtr = DstGEP->getOperand(0);
while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr))
SrcBasePtr = PCast->getOperand(0);
while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr))
DstBasePtr = PCast->getOperand(0);
// Check that for identical base pointers we do not miss index offsets
// that have been added before this GEP is applied.
if (SrcBasePtr == SrcBase->getValue() && DstBasePtr == DstBase->getValue()) {
assert(SrcSubscripts.size() == DstSubscripts.size() &&
SrcSubscripts.size() == SrcSizes.size() + 1 &&
"Expected equal number of entries in the list of sizes and "
"subscripts.");
LLVM_DEBUG({
dbgs() << "Delinearized subscripts of fixed-size array\n"
<< "SrcGEP:" << *SrcGEP << "\n"
<< "DstGEP:" << *DstGEP << "\n";
});
return true;
}
SrcSubscripts.clear();
DstSubscripts.clear();
return false;
}
bool DependenceInfo::tryDelinearizeParametricSize(
Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
SmallVectorImpl<const SCEV *> &DstSubscripts) {
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
const SCEVUnknown *SrcBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
const SCEVUnknown *DstBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
assert(SrcBase && DstBase && SrcBase == DstBase &&
"expected src and dst scev unknowns to be equal");
const SCEV *ElementSize = SE->getElementSize(Src);
if (ElementSize != SE->getElementSize(Dst))
return false;
@ -3304,7 +3415,6 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
SE->findArrayDimensions(Terms, Sizes, ElementSize);
// Third step: compute the access functions for each subscript.
SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
@ -3313,7 +3423,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
SrcSubscripts.size() != DstSubscripts.size())
return false;
int size = SrcSubscripts.size();
size_t Size = SrcSubscripts.size();
// Statically check that the array bounds are in-range. The first subscript we
// don't have a size for and it cannot overflow into another subscript, so is
@ -3322,40 +3432,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
// FIXME: It may be better to record these sizes and add them as constraints
// to the dependency checks.
if (!DisableDelinearizationChecks)
for (int i = 1; i < size; ++i) {
if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
for (size_t I = 1; I < Size; ++I) {
if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr))
return false;
if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]))
return false;
if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
if (!isKnownNonNegative(DstSubscripts[I], DstPtr))
return false;
if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1]))
return false;
}
LLVM_DEBUG({
dbgs() << "\nSrcSubscripts: ";
for (int i = 0; i < size; i++)
dbgs() << *SrcSubscripts[i];
dbgs() << "\nDstSubscripts: ";
for (int i = 0; i < size; i++)
dbgs() << *DstSubscripts[i];
});
// The delinearization transforms a single-subscript MIV dependence test into
// a multi-subscript SIV dependence test that is easier to compute. So we
// resize Pair to contain as many pairs of subscripts as the delinearization
// has found, and then initialize the pairs following the delinearization.
Pair.resize(size);
for (int i = 0; i < size; ++i) {
Pair[i].Src = SrcSubscripts[i];
Pair[i].Dst = DstSubscripts[i];
unifySubscriptType(&Pair[i]);
}
return true;
}

View File

@ -0,0 +1,106 @@
; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
; RUN: -da-disable-delinearization-checks | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.6.0"
;; for (long int i = 0; i < n; i++) {
;; for (long int j = 0; j < n; j++) {
;; for (long int k = 0; k < n; k++) {
;; A[i][j][k] = i;
;; }
;; for (long int k = 0; k < n; k++) {
;; *B++ = A[i + 3][j + 2][k + 1];
define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp {
entry:
%cmp10 = icmp sgt i64 %n, 0
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
; CHECK-LABEL: p2
; CHECK: da analyze - none!
; CHECK: da analyze - flow [-3 -2]!
; CHECK: da analyze - confused!
; CHECK: da analyze - none!
; CHECK: da analyze - confused!
; CHECK: da analyze - output [* * *]!
for.cond1.preheader.preheader: ; preds = %entry
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
%B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %for.cond1.preheader.preheader ]
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
%cmp26 = icmp sgt i64 %n, 0
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
br label %for.cond4.preheader
for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
%B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond4.preheader.preheader ]
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
%cmp51 = icmp sgt i64 %n, 0
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
for.body6.preheader: ; preds = %for.cond4.preheader
br label %for.body6
for.body6: ; preds = %for.body6.preheader, %for.body6
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02
store i64 %i.011, i64* %arrayidx8, align 8
%inc = add nsw i64 %k.02, 1
%exitcond13 = icmp ne i64 %inc, %n
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
for.cond10.loopexit.loopexit: ; preds = %for.body6
br label %for.cond10.loopexit
for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
%cmp113 = icmp sgt i64 %n, 0
br i1 %cmp113, label %for.body12.preheader, label %for.inc21
for.body12.preheader: ; preds = %for.cond10.loopexit
br label %for.body12
for.body12: ; preds = %for.body12.preheader, %for.body12
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
%B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.body12.preheader ]
%add = add nsw i64 %k9.05, 1
%add13 = add nsw i64 %j.07, 2
%add14 = add nsw i64 %i.011, 3
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add
%0 = load i64, i64* %arrayidx17, align 8
%incdec.ptr = getelementptr inbounds i64, i64* %B.addr.24, i64 1
store i64 %0, i64* %B.addr.24, align 8
%inc19 = add nsw i64 %k9.05, 1
%exitcond = icmp ne i64 %inc19, %n
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
for.inc21.loopexit: ; preds = %for.body12
%scevgep = getelementptr i64, i64* %B.addr.18, i64 %n
br label %for.inc21
for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
%B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %scevgep, %for.inc21.loopexit ]
%inc22 = add nsw i64 %j.07, 1
%exitcond14 = icmp ne i64 %inc22, %n
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
for.inc24.loopexit: ; preds = %for.inc21
%B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc21 ]
br label %for.inc24
for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
%B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc24.loopexit ]
%inc25 = add nsw i64 %i.011, 1
%exitcond15 = icmp ne i64 %inc25, %n
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
for.end26.loopexit: ; preds = %for.inc24
br label %for.end26
for.end26: ; preds = %for.end26.loopexit, %entry
ret void
}

View File

@ -0,0 +1,120 @@
; RUN: opt < %s -disable-output -passes="print<da>" \
; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s
; RUN: opt < %s -da -analyze -da-disable-delinearization-checks | FileCheck %s
; CHECK-LABEL: t1
; CHECK: da analyze - none!
; CHECK: da analyze - consistent anti [1 -2]!
; CHECK: da analyze - none!
;; #define N 1024
;; #define M 2048
;; void t1(int a[N][M]) {
;; for (int i = 0; i < N-1; ++i)
;; for (int j = 2; j < M; ++j)
;; a[i][j] = a[i+1][j-2];
;; }
define void @t1([2048 x i32]* %a) {
entry:
br label %for.body
for.body: ; preds = %entry, %for.inc11
%indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ]
br label %for.body4
for.body4: ; preds = %for.body, %for.body4
%indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ]
%0 = add nuw nsw i64 %indvars.iv4, 1
%1 = add nsw i64 %indvars.iv, -2
%arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1
%2 = load i32, i32* %arrayidx6, align 4
%arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %indvars.iv4, i64 %indvars.iv
store i32 %2, i32* %arrayidx10, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 2048
br i1 %exitcond, label %for.body4, label %for.inc11
for.inc11: ; preds = %for.body4
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
%exitcond7 = icmp ne i64 %indvars.iv.next5, 1023
br i1 %exitcond7, label %for.body, label %for.end13
for.end13: ; preds = %for.inc11
ret void
}
; CHECK-LABEL: t2
; CHECK: da analyze - none!
; CHECK: da analyze - consistent anti [1 -2 0 -3 2]!
; CHECK: da analyze - none!
;; #define N 1024
;; #define M 2048
;; void t2(int a[][N][N][N][M]) {
;; for (int i1 = 0; i1 < N-1; ++i1)
;; for (int i2 = 2; i2 < N; ++i2)
;; for (int i3 = 0; i3 < N; ++i3)
;; for (int i4 = 3; i4 < N; ++i4)
;; for (int i5 = 0; i5 < M-2; ++i5)
;; a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2];
;; }
define void @t2([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) {
entry:
br label %for.body
for.body: ; preds = %entry, %for.inc46
%indvars.iv18 = phi i64 [ 0, %entry ], [ %indvars.iv.next19, %for.inc46 ]
br label %for.body4
for.body4: ; preds = %for.body, %for.inc43
%indvars.iv14 = phi i64 [ 2, %for.body ], [ %indvars.iv.next15, %for.inc43 ]
br label %for.body8
for.body8: ; preds = %for.body4, %for.inc40
%indvars.iv11 = phi i64 [ 0, %for.body4 ], [ %indvars.iv.next12, %for.inc40 ]
br label %for.body12
for.body12: ; preds = %for.body8, %for.inc37
%indvars.iv7 = phi i64 [ 3, %for.body8 ], [ %indvars.iv.next8, %for.inc37 ]
br label %for.body16
for.body16: ; preds = %for.body12, %for.body16
%indvars.iv = phi i64 [ 0, %for.body12 ], [ %indvars.iv.next, %for.body16 ]
%0 = add nuw nsw i64 %indvars.iv18, 1
%1 = add nsw i64 %indvars.iv14, -2
%2 = add nsw i64 %indvars.iv7, -3
%3 = add nuw nsw i64 %indvars.iv, 2
%arrayidx26 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3
%4 = load i32, i32* %arrayidx26, align 4
%arrayidx36 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv
store i32 %4, i32* %arrayidx36, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 2046
br i1 %exitcond, label %for.body16, label %for.inc37
for.inc37: ; preds = %for.body16
%indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
%exitcond10 = icmp ne i64 %indvars.iv.next8, 1024
br i1 %exitcond10, label %for.body12, label %for.inc40
for.inc40: ; preds = %for.inc37
%indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
%exitcond13 = icmp ne i64 %indvars.iv.next12, 1024
br i1 %exitcond13, label %for.body8, label %for.inc43
for.inc43: ; preds = %for.inc40
%indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1
%exitcond17 = icmp ne i64 %indvars.iv.next15, 1024
br i1 %exitcond17, label %for.body4, label %for.inc46
for.inc46: ; preds = %for.inc43
%indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
%exitcond21 = icmp ne i64 %indvars.iv.next19, 1023
br i1 %exitcond21, label %for.body, label %for.end48
for.end48: ; preds = %for.inc46
ret void
}

View File

@ -2,6 +2,11 @@
; RUN: -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
; RUN: FileCheck --input-file=%t %s
; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' \
; RUN: -da-disable-delinearization-checks -pass-remarks-output=%t \
; RUN: -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@ -16,13 +21,14 @@ target triple = "x86_64-unknown-linux-gnu"
;; for(int j=1;j<N-1;j++)
;; A[j+1][i+1] = A[j+1][i+1] + k;
; FIXME: Currently fails because of DA changes.
; IR-LABEL: @interchange_01
; IR-NOT: split
; CHECK: Name: Dependence
; CHECK-NEXT: Function: interchange_01
; DELIN: Name: UnsupportedInsBetweenInduction
; DELIN-NEXT: Function: interchange_01
define void @interchange_01(i32 %k, i32 %N) {
entry:
%sub = add nsw i32 %N, -1

View File

@ -5,6 +5,11 @@
; RUN: -pass-remarks='loop-interchange' -S
; RUN: cat %t | FileCheck %s
; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \
; RUN: -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
; RUN: -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks
; RUN: cat %t | FileCheck --check-prefix=DELIN %s
@A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x [100 x i32]] zeroinitializer
@C = common global [100 x i32] zeroinitializer
@ -61,6 +66,18 @@ for.end19:
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
; CHECK-NEXT: ...
; DELIN: --- !Missed
; DELIN-NEXT: Pass: loop-interchange
; DELIN-NEXT: Name: InterchangeNotProfitable
; DELIN-NEXT: Function: test01
; DELIN-NEXT: Args:
; DELIN-NEXT: - String: 'Interchanging loops is too costly (cost='
; DELIN-NEXT: - Cost: '2'
; DELIN-NEXT: - String: ', threshold='
; DELIN-NEXT: - Threshold: '0'
; DELIN-NEXT: - String: ') and it does not improve parallelism.'
; DELIN-NEXT: ...
;;--------------------------------------Test case 02------------------------------------
;; [FIXME] This loop though valid is currently not interchanged due to the
;; limitation that we cannot split the inner loop latch due to multiple use of inner induction
@ -113,6 +130,14 @@ define void @test02(i32 %k, i32 %N) {
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
; CHECK-NEXT: ...
; DELIN: --- !Missed
; DELIN-NEXT: Pass: loop-interchange
; DELIN-NEXT: Name: UnsupportedInsBetweenInduction
; DELIN-NEXT: Function: test02
; DELIN-NEXT: Args:
; DELIN-NEXT: - String: Found unsupported instruction between induction variable increment and branch.
; DELIN-NEXT: ...
;;-----------------------------------Test case 03-------------------------------
;; Test to make sure we can handle output dependencies.
;;
@ -161,6 +186,14 @@ for.body4: ; preds = %for.body4, %for.con
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
; CHECK-NEXT: ...
; DELIN: --- !Passed
; DELIN-NEXT: Pass: loop-interchange
; DELIN-NEXT: Name: Interchanged
; DELIN-NEXT: Function: test03
; DELIN-NEXT: Args:
; DELIN-NEXT: - String: Loop interchanged with enclosing loop.
; DELIN-NEXT: ...
;;--------------------------------------Test case 04-------------------------------------
;; Loops not tightly nested are not interchanged
;; for(int j=0;j<N;j++) {
@ -215,3 +248,11 @@ for.end17:
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
; CHECK-NEXT: ...
; DELIN: --- !Missed
; DELIN-NEXT: Pass: loop-interchange
; DELIN-NEXT: Name: NotTightlyNested
; DELIN-NEXT: Function: test04
; DELIN-NEXT: Args:
; DELIN-NEXT: - String: Cannot interchange loops because they are not tightly nested.
; DELIN-NEXT: ...

View File

@ -2,6 +2,11 @@
; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange
; RUN: FileCheck -input-file %t %s
; RUN: opt < %s -loop-interchange -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange \
; RUN: -da-disable-delinearization-checks
; RUN: FileCheck --check-prefix=DELIN -input-file %t %s
;; We test profitability model in these test cases.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@ -15,10 +20,13 @@ target triple = "x86_64-unknown-linux-gnu"
;; for(int i=1;i<100;i++)
;; for(int j=1;j<100;j++)
;; A[j][i] = A[j - 1][i] + B[j][i];
;; FIXME: DA misses this case after D35430
; CHECK: Name: Dependence
; CHECK-NEXT: Function: interchange_01
; DELIN: Name: Interchanged
; DELIN-NEXT: Function: interchange_01
define void @interchange_01() {
entry:
br label %for2.preheader