[DA] Delinearization of fixed-size multi-dimensional arrays

Summary: Currently the dependence analysis in LLVM is unable to compute accurate dependence vectors for multi-dimensional fixed size arrays. This is mainly because the delinearization algorithm in scalar evolution relies on parametric terms to be present in the access functions. In the case of fixed size arrays such parametric terms are not present, but we can use the indexes from GEP instructions to recover the subscripts for each dimension of the arrays. This patch adds this ability under the existing option `-da-disable-delinearization-checks`. Authored By: bmahjour Reviewer: Meinersbur, sebpop, fhahn, dmgreen, grosser, etiotto, bollu Reviewed By: Meinersbur Subscribers: hiraditya, arphaman, Whitney, ppc-slack, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D72178
2025-01-31 12:41:49 +01:00 · 2020-02-26 12:51:58 -05:00 · 2020-02-26 12:51:58 -05:00 · 4af1e9e981
commit 4af1e9e981
parent a5ed19f7c7
7 changed files with 427 additions and 38 deletions
--- a/include/llvm/Analysis/DependenceAnalysis.h
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@ -924,10 +924,28 @@ template <typename T> class ArrayRef;
    void updateDirection(Dependence::DVEntry &Level,
                         const Constraint &CurConstraint) const;

+    /// Given a linear access function, tries to recover subscripts
+    /// for each dimension of the array element access.
    bool tryDelinearize(Instruction *Src, Instruction *Dst,
                        SmallVectorImpl<Subscript> &Pair);

-  private:
+    /// Tries to delinearize access function for a fixed size multi-dimensional
+    /// array, by deriving subscripts from GEP instructions. Returns true upon
+    /// success and false otherwise.
+    bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst,
+                                 const SCEV *SrcAccessFn,
+                                 const SCEV *DstAccessFn,
+                                 SmallVectorImpl<const SCEV *> &SrcSubscripts,
+                                 SmallVectorImpl<const SCEV *> &DstSubscripts);
+
+    /// Tries to delinearize access function for a multi-dimensional array with
+    /// symbolic runtime sizes.
+    /// Returns true upon success and false otherwise.
+    bool tryDelinearizeParametricSize(
+        Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+        const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+        SmallVectorImpl<const SCEV *> &DstSubscripts);
+
    /// checkSubscript - Helper function for checkSrcSubscript and
    /// checkDstSubscript to avoid duplicate code
    bool checkSubscript(const SCEV *Expr, const Loop *LoopNest,
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@ -3264,16 +3264,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
  assert(isLoadOrStore(Dst) && "instruction is not load or store");
  Value *SrcPtr = getLoadStorePointerOperand(Src);
  Value *DstPtr = getLoadStorePointerOperand(Dst);
-
  Loop *SrcLoop = LI->getLoopFor(Src->getParent());
  Loop *DstLoop = LI->getLoopFor(Dst->getParent());
-
-  // Below code mimics the code in Delinearization.cpp
-  const SCEV *SrcAccessFn =
-    SE->getSCEVAtScope(SrcPtr, SrcLoop);
-  const SCEV *DstAccessFn =
-    SE->getSCEVAtScope(DstPtr, DstLoop);
-
+  const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);
+  const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);
  const SCEVUnknown *SrcBase =
      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
  const SCEVUnknown *DstBase =
@ -3282,6 +3276,123 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
  if (!SrcBase || !DstBase || SrcBase != DstBase)
    return false;

+  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
+
+  if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
+                               SrcSubscripts, DstSubscripts) &&
+      !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
+                                    SrcSubscripts, DstSubscripts))
+    return false;
+
+  int Size = SrcSubscripts.size();
+  LLVM_DEBUG({
+    dbgs() << "\nSrcSubscripts: ";
+    for (int I = 0; I < Size; I++)
+      dbgs() << *SrcSubscripts[I];
+    dbgs() << "\nDstSubscripts: ";
+    for (int I = 0; I < Size; I++)
+      dbgs() << *DstSubscripts[I];
+  });
+
+  // The delinearization transforms a single-subscript MIV dependence test into
+  // a multi-subscript SIV dependence test that is easier to compute. So we
+  // resize Pair to contain as many pairs of subscripts as the delinearization
+  // has found, and then initialize the pairs following the delinearization.
+  Pair.resize(Size);
+  for (int I = 0; I < Size; ++I) {
+    Pair[I].Src = SrcSubscripts[I];
+    Pair[I].Dst = DstSubscripts[I];
+    unifySubscriptType(&Pair[I]);
+  }
+
+  return true;
+}
+
+bool DependenceInfo::tryDelinearizeFixedSize(
+    Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+    const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+    SmallVectorImpl<const SCEV *> &DstSubscripts) {
+
+  // In general we cannot safely assume that the subscripts recovered from GEPs
+  // are in the range of values defined for their corresponding array
+  // dimensions. For example some C language usage/interpretation make it
+  // impossible to verify this at compile-time. As such we give up here unless
+  // we can assume that the subscripts do not overlap into neighboring
+  // dimensions and that the number of dimensions matches the number of
+  // subscripts being recovered.
+  if (!DisableDelinearizationChecks)
+    return false;
+
+  Value *SrcPtr = getLoadStorePointerOperand(Src);
+  Value *DstPtr = getLoadStorePointerOperand(Dst);
+  const SCEVUnknown *SrcBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+  const SCEVUnknown *DstBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+  assert(SrcBase && DstBase && SrcBase == DstBase &&
+         "expected src and dst scev unknowns to be equal");
+
+  // Check the simple case where the array dimensions are fixed size.
+  auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
+  auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr);
+  if (!SrcGEP || !DstGEP)
+    return false;
+
+  SmallVector<int, 4> SrcSizes, DstSizes;
+  SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes);
+  SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes);
+
+  // Check that the two size arrays are non-empty and equal in length and
+  // value.
+  if (SrcSizes.empty() || SrcSubscripts.size() <= 1 ||
+      SrcSizes.size() != DstSizes.size() ||
+      !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
+    SrcSubscripts.clear();
+    DstSubscripts.clear();
+    return false;
+  }
+
+  Value *SrcBasePtr = SrcGEP->getOperand(0);
+  Value *DstBasePtr = DstGEP->getOperand(0);
+  while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr))
+    SrcBasePtr = PCast->getOperand(0);
+  while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr))
+    DstBasePtr = PCast->getOperand(0);
+
+  // Check that for identical base pointers we do not miss index offsets
+  // that have been added before this GEP is applied.
+  if (SrcBasePtr == SrcBase->getValue() && DstBasePtr == DstBase->getValue()) {
+    assert(SrcSubscripts.size() == DstSubscripts.size() &&
+           SrcSubscripts.size() == SrcSizes.size() + 1 &&
+           "Expected equal number of entries in the list of sizes and "
+           "subscripts.");
+    LLVM_DEBUG({
+      dbgs() << "Delinearized subscripts of fixed-size array\n"
+             << "SrcGEP:" << *SrcGEP << "\n"
+             << "DstGEP:" << *DstGEP << "\n";
+    });
+    return true;
+  }
+
+  SrcSubscripts.clear();
+  DstSubscripts.clear();
+  return false;
+}
+
+bool DependenceInfo::tryDelinearizeParametricSize(
+    Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+    const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+    SmallVectorImpl<const SCEV *> &DstSubscripts) {
+
+  Value *SrcPtr = getLoadStorePointerOperand(Src);
+  Value *DstPtr = getLoadStorePointerOperand(Dst);
+  const SCEVUnknown *SrcBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+  const SCEVUnknown *DstBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+  assert(SrcBase && DstBase && SrcBase == DstBase &&
+         "expected src and dst scev unknowns to be equal");
+
  const SCEV *ElementSize = SE->getElementSize(Src);
  if (ElementSize != SE->getElementSize(Dst))
    return false;
@ -3304,7 +3415,6 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
  SE->findArrayDimensions(Terms, Sizes, ElementSize);

  // Third step: compute the access functions for each subscript.
-  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
  SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
  SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);

@ -3313,7 +3423,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
      SrcSubscripts.size() != DstSubscripts.size())
    return false;

-  int size = SrcSubscripts.size();
+  size_t Size = SrcSubscripts.size();

  // Statically check that the array bounds are in-range. The first subscript we
  // don't have a size for and it cannot overflow into another subscript, so is
@ -3322,40 +3432,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
  // FIXME: It may be better to record these sizes and add them as constraints
  // to the dependency checks.
  if (!DisableDelinearizationChecks)
-    for (int i = 1; i < size; ++i) {
-      if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
+    for (size_t I = 1; I < Size; ++I) {
+      if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr))
        return false;

-      if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
+      if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]))
        return false;

-      if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
+      if (!isKnownNonNegative(DstSubscripts[I], DstPtr))
        return false;

-      if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
+      if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1]))
        return false;
    }

-  LLVM_DEBUG({
-    dbgs() << "\nSrcSubscripts: ";
-    for (int i = 0; i < size; i++)
-      dbgs() << *SrcSubscripts[i];
-    dbgs() << "\nDstSubscripts: ";
-    for (int i = 0; i < size; i++)
-      dbgs() << *DstSubscripts[i];
-  });
-
-  // The delinearization transforms a single-subscript MIV dependence test into
-  // a multi-subscript SIV dependence test that is easier to compute. So we
-  // resize Pair to contain as many pairs of subscripts as the delinearization
-  // has found, and then initialize the pairs following the delinearization.
-  Pair.resize(size);
-  for (int i = 0; i < size; ++i) {
-    Pair[i].Src = SrcSubscripts[i];
-    Pair[i].Dst = DstSubscripts[i];
-    unifySubscriptType(&Pair[i]);
-  }
-
  return true;
 }

--- a/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
+++ b/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
@ -0,0 +1,106 @@
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN:   -da-disable-delinearization-checks | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+;;  for (long int i = 0; i < n; i++) {
+;;    for (long int j = 0; j < n; j++) {
+;;      for (long int k = 0; k < n; k++) {
+;;        A[i][j][k] = i;
+;;      }
+;;      for (long int k = 0; k < n; k++) {
+;;        *B++ = A[i + 3][j + 2][k + 1];
+
+define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i64 %n, 0
+  br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
+
+; CHECK-LABEL: p2
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [-3 -2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* * *]!
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc24
+  %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %for.cond1.preheader.preheader ]
+  %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp26 = icmp sgt i64 %n, 0
+  br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
+
+for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc21
+  %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond4.preheader.preheader ]
+  %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
+  %cmp51 = icmp sgt i64 %n, 0
+  br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
+
+for.body6.preheader:                              ; preds = %for.cond4.preheader
+  br label %for.body6
+
+for.body6:                                        ; preds = %for.body6.preheader, %for.body6
+  %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02
+  store i64 %i.011, i64* %arrayidx8, align 8
+  %inc = add nsw i64 %k.02, 1
+  %exitcond13 = icmp ne i64 %inc, %n
+  br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
+
+for.cond10.loopexit.loopexit:                     ; preds = %for.body6
+  br label %for.cond10.loopexit
+
+for.cond10.loopexit:                              ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
+  %cmp113 = icmp sgt i64 %n, 0
+  br i1 %cmp113, label %for.body12.preheader, label %for.inc21
+
+for.body12.preheader:                             ; preds = %for.cond10.loopexit
+  br label %for.body12
+
+for.body12:                                       ; preds = %for.body12.preheader, %for.body12
+  %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
+  %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.body12.preheader ]
+  %add = add nsw i64 %k9.05, 1
+  %add13 = add nsw i64 %j.07, 2
+  %add14 = add nsw i64 %i.011, 3
+  %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add
+  %0 = load i64, i64* %arrayidx17, align 8
+  %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.24, i64 1
+  store i64 %0, i64* %B.addr.24, align 8
+  %inc19 = add nsw i64 %k9.05, 1
+  %exitcond = icmp ne i64 %inc19, %n
+  br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
+
+for.inc21.loopexit:                               ; preds = %for.body12
+  %scevgep = getelementptr i64, i64* %B.addr.18, i64 %n
+  br label %for.inc21
+
+for.inc21:                                        ; preds = %for.inc21.loopexit, %for.cond10.loopexit
+  %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %scevgep, %for.inc21.loopexit ]
+  %inc22 = add nsw i64 %j.07, 1
+  %exitcond14 = icmp ne i64 %inc22, %n
+  br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
+
+for.inc24.loopexit:                               ; preds = %for.inc21
+  %B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc21 ]
+  br label %for.inc24
+
+for.inc24:                                        ; preds = %for.inc24.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc24.loopexit ]
+  %inc25 = add nsw i64 %i.011, 1
+  %exitcond15 = icmp ne i64 %inc25, %n
+  br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
+
+for.end26.loopexit:                               ; preds = %for.inc24
+  br label %for.end26
+
+for.end26:                                        ; preds = %for.end26.loopexit, %entry
+  ret void
+}
--- a/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
+++ b/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
@ -0,0 +1,120 @@
+; RUN: opt < %s -disable-output -passes="print<da>"                            \
+; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s
+; RUN: opt < %s -da -analyze -da-disable-delinearization-checks | FileCheck %s
+
+; CHECK-LABEL: t1
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent anti [1 -2]!
+; CHECK: da analyze - none!
+
+;; #define N 1024
+;; #define M 2048
+;; void t1(int a[N][M]) {
+;;   for (int i = 0; i < N-1; ++i)
+;;     for (int j = 2; j < M; ++j)
+;;       a[i][j] = a[i+1][j-2];
+;; }
+
+define void @t1([2048 x i32]* %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc11
+  %indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body, %for.body4
+  %indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ]
+  %0 = add nuw nsw i64 %indvars.iv4, 1
+  %1 = add nsw i64 %indvars.iv, -2
+  %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1
+  %2 = load i32, i32* %arrayidx6, align 4
+  %arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %indvars.iv4, i64 %indvars.iv
+  store i32 %2, i32* %arrayidx10, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 2048
+  br i1 %exitcond, label %for.body4, label %for.inc11
+
+for.inc11:                                        ; preds = %for.body4
+  %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+  %exitcond7 = icmp ne i64 %indvars.iv.next5, 1023
+  br i1 %exitcond7, label %for.body, label %for.end13
+
+for.end13:                                        ; preds = %for.inc11
+  ret void
+}
+
+
+; CHECK-LABEL: t2
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent anti [1 -2 0 -3 2]!
+; CHECK: da analyze - none!
+
+;; #define N 1024
+;; #define M 2048
+;; void t2(int a[][N][N][N][M]) {
+;;   for (int i1 = 0; i1 < N-1; ++i1)
+;;     for (int i2 = 2; i2 < N; ++i2)
+;;       for (int i3 = 0; i3 < N; ++i3)
+;;         for (int i4 = 3; i4 < N; ++i4)
+;;           for (int i5 = 0; i5 < M-2; ++i5)
+;;             a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2];
+;; }
+
+define void @t2([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc46
+  %indvars.iv18 = phi i64 [ 0, %entry ], [ %indvars.iv.next19, %for.inc46 ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body, %for.inc43
+  %indvars.iv14 = phi i64 [ 2, %for.body ], [ %indvars.iv.next15, %for.inc43 ]
+  br label %for.body8
+
+for.body8:                                        ; preds = %for.body4, %for.inc40
+  %indvars.iv11 = phi i64 [ 0, %for.body4 ], [ %indvars.iv.next12, %for.inc40 ]
+  br label %for.body12
+
+for.body12:                                       ; preds = %for.body8, %for.inc37
+  %indvars.iv7 = phi i64 [ 3, %for.body8 ], [ %indvars.iv.next8, %for.inc37 ]
+  br label %for.body16
+
+for.body16:                                       ; preds = %for.body12, %for.body16
+  %indvars.iv = phi i64 [ 0, %for.body12 ], [ %indvars.iv.next, %for.body16 ]
+  %0 = add nuw nsw i64 %indvars.iv18, 1
+  %1 = add nsw i64 %indvars.iv14, -2
+  %2 = add nsw i64 %indvars.iv7, -3
+  %3 = add nuw nsw i64 %indvars.iv, 2
+  %arrayidx26 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3
+  %4 = load i32, i32* %arrayidx26, align 4
+  %arrayidx36 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv
+  store i32 %4, i32* %arrayidx36, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 2046
+  br i1 %exitcond, label %for.body16, label %for.inc37
+
+for.inc37:                                        ; preds = %for.body16
+  %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
+  %exitcond10 = icmp ne i64 %indvars.iv.next8, 1024
+  br i1 %exitcond10, label %for.body12, label %for.inc40
+
+for.inc40:                                        ; preds = %for.inc37
+  %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
+  %exitcond13 = icmp ne i64 %indvars.iv.next12, 1024
+  br i1 %exitcond13, label %for.body8, label %for.inc43
+
+for.inc43:                                        ; preds = %for.inc40
+  %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1
+  %exitcond17 = icmp ne i64 %indvars.iv.next15, 1024
+  br i1 %exitcond17, label %for.body4, label %for.inc46
+
+for.inc46:                                        ; preds = %for.inc43
+  %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
+  %exitcond21 = icmp ne i64 %indvars.iv.next19, 1023
+  br i1 %exitcond21, label %for.body, label %for.end48
+
+for.end48:                                        ; preds = %for.inc46
+  ret void
+}
--- a/test/Transforms/LoopInterchange/currentLimitation.ll
+++ b/test/Transforms/LoopInterchange/currentLimitation.ll
@ -2,6 +2,11 @@
 ; RUN:   -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
 ; RUN: FileCheck --input-file=%t %s

+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' \
+; RUN:   -da-disable-delinearization-checks -pass-remarks-output=%t             \
+; RUN:   -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
+; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s
+
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@ -16,13 +21,14 @@ target triple = "x86_64-unknown-linux-gnu"
 ;;    for(int j=1;j<N-1;j++)
 ;;      A[j+1][i+1] = A[j+1][i+1] + k;

-; FIXME: Currently fails because of DA changes.
 ; IR-LABEL: @interchange_01
 ; IR-NOT: split

 ; CHECK:      Name:            Dependence
 ; CHECK-NEXT: Function:        interchange_01

+; DELIN:      Name:            UnsupportedInsBetweenInduction
+; DELIN-NEXT: Function:        interchange_01
 define void @interchange_01(i32 %k, i32 %N) {
 entry:
   %sub = add nsw i32 %N, -1
--- a/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
+++ b/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
@ -5,6 +5,11 @@
 ; RUN:     -pass-remarks='loop-interchange' -S
 ; RUN: cat %t |  FileCheck %s

+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN:     -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
+; RUN:     -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks
+; RUN: cat %t |  FileCheck --check-prefix=DELIN %s
+
@A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x [100 x i32]] zeroinitializer
@C = common global [100 x i32] zeroinitializer
@ -61,6 +66,18 @@ for.end19:
 ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...

+; DELIN: --- !Missed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            InterchangeNotProfitable
+; DELIN-NEXT: Function:        test01
+; DELIN-NEXT: Args:
+; DELIN-NEXT:   - String:          'Interchanging loops is too costly (cost='
+; DELIN-NEXT:   - Cost:            '2'
+; DELIN-NEXT:   - String:          ', threshold='
+; DELIN-NEXT:   - Threshold:       '0'
+; DELIN-NEXT:   - String:          ') and it does not improve parallelism.'
+; DELIN-NEXT: ...
+
 ;;--------------------------------------Test case 02------------------------------------
 ;; [FIXME] This loop though valid is currently not interchanged due to the
 ;; limitation that we cannot split the inner loop latch due to multiple use of inner induction
@ -113,6 +130,14 @@ define void @test02(i32 %k, i32 %N) {
 ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...

+; DELIN: --- !Missed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            UnsupportedInsBetweenInduction
+; DELIN-NEXT: Function:        test02
+; DELIN-NEXT: Args:
+; DELIN-NEXT:   - String:          Found unsupported instruction between induction variable increment and branch.
+; DELIN-NEXT: ...
+
 ;;-----------------------------------Test case 03-------------------------------
 ;; Test to make sure we can handle output dependencies.
 ;;
@ -161,6 +186,14 @@ for.body4:                                        ; preds = %for.body4, %for.con
 ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...

+; DELIN: --- !Passed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            Interchanged
+; DELIN-NEXT: Function:        test03
+; DELIN-NEXT: Args:
+; DELIN-NEXT:  - String:          Loop interchanged with enclosing loop.
+; DELIN-NEXT: ...
+
 ;;--------------------------------------Test case 04-------------------------------------
 ;; Loops not tightly nested are not interchanged
 ;;  for(int j=0;j<N;j++) {
@ -215,3 +248,11 @@ for.end17:
 ; CHECK-NEXT: Args:
 ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
+
+; DELIN: --- !Missed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            NotTightlyNested
+; DELIN-NEXT: Function:        test04
+; DELIN-NEXT: Args:
+; DELIN-NEXT:  - String:          Cannot interchange loops because they are not tightly nested.
+; DELIN-NEXT: ...
--- a/test/Transforms/LoopInterchange/profitability.ll
+++ b/test/Transforms/LoopInterchange/profitability.ll
@ -2,6 +2,11 @@
 ; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange
 ; RUN: FileCheck -input-file %t %s

+; RUN: opt < %s -loop-interchange -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
+; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange \
+; RUN:     -da-disable-delinearization-checks
+; RUN: FileCheck --check-prefix=DELIN -input-file %t %s
+
 ;; We test profitability model in these test cases.

 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@ -15,10 +20,13 @@ target triple = "x86_64-unknown-linux-gnu"
 ;;   for(int i=1;i<100;i++)
 ;;     for(int j=1;j<100;j++)
 ;;       A[j][i] = A[j - 1][i] + B[j][i];
-;; FIXME: DA misses this case after D35430

 ; CHECK:      Name:            Dependence
 ; CHECK-NEXT: Function:        interchange_01
+
+; DELIN:      Name:            Interchanged
+; DELIN-NEXT: Function:        interchange_01
+
 define void @interchange_01() {
 entry:
  br label %for2.preheader