[Dependence Analysis] Enable delinearization of fixed sized arrays

Patch by Artem Radzikhovskyy! Allow delinearization of fixed sized arrays if we can prove that the GEP indices do not overflow the array dimensions. The checks applied are similar to the ones that are used for delinearization of parametric size arrays. Make sure that the GEP indices are non-negative and that they are smaller than the range of that dimension. Changes Summary: - Updated the LIT tests with more exact values, as we are able to delinearize and apply more exact tests - profitability.ll - now able to delinearize in all cases, no need to use -da-disable-delinearization-checks flag and run the test twice - loop-interchange-optimization-remarks.ll - in one of the cases we are able to delinearize without using -da-disable-delinearization-checks - SimpleSIVNoValidityCheckFixedSize.ll - removed unnecessary "-da-disable-delinearization-checks" flag. Now can get the exact answer without it. - SimpleSIVNoValidityCheckFixedSize.ll and PreliminaryNoValidityCheckFixedSize.ll - made negative tests more explicit, in order to demonstrate the need for "-da-disable-delinearization-checks" flag Differential Revision: https://reviews.llvm.org/D101486
2025-01-31 20:51:52 +01:00 · 2021-05-10 10:01:43 -07:00 · 2021-05-10 10:01:43 -07:00 · cc0c445bfc
commit cc0c445bfc
parent 0afed2f182
9 changed files with 107 additions and 51 deletions
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@ -3304,16 +3304,6 @@ bool DependenceInfo::tryDelinearizeFixedSize(
    const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
    SmallVectorImpl<const SCEV *> &DstSubscripts) {

-  // In general we cannot safely assume that the subscripts recovered from GEPs
-  // are in the range of values defined for their corresponding array
-  // dimensions. For example some C language usage/interpretation make it
-  // impossible to verify this at compile-time. As such we give up here unless
-  // we can assume that the subscripts do not overlap into neighboring
-  // dimensions and that the number of dimensions matches the number of
-  // subscripts being recovered.
-  if (!DisableDelinearizationChecks)
-    return false;
-
  Value *SrcPtr = getLoadStorePointerOperand(Src);
  Value *DstPtr = getLoadStorePointerOperand(Dst);
  const SCEVUnknown *SrcBase =
@ -3352,22 +3342,55 @@ bool DependenceInfo::tryDelinearizeFixedSize(

  // Check that for identical base pointers we do not miss index offsets
  // that have been added before this GEP is applied.
-  if (SrcBasePtr == SrcBase->getValue() && DstBasePtr == DstBase->getValue()) {
-    assert(SrcSubscripts.size() == DstSubscripts.size() &&
-           SrcSubscripts.size() == SrcSizes.size() + 1 &&
-           "Expected equal number of entries in the list of sizes and "
-           "subscripts.");
-    LLVM_DEBUG({
-      dbgs() << "Delinearized subscripts of fixed-size array\n"
-             << "SrcGEP:" << *SrcGEP << "\n"
-             << "DstGEP:" << *DstGEP << "\n";
-    });
-    return true;
+  if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) {
+    SrcSubscripts.clear();
+    DstSubscripts.clear();
+    return false;
  }

-  SrcSubscripts.clear();
-  DstSubscripts.clear();
-  return false;
+  assert(SrcSubscripts.size() == DstSubscripts.size() &&
+         SrcSubscripts.size() == SrcSizes.size() + 1 &&
+         "Expected equal number of entries in the list of sizes and "
+         "subscripts.");
+
+  // In general we cannot safely assume that the subscripts recovered from GEPs
+  // are in the range of values defined for their corresponding array
+  // dimensions. For example some C language usage/interpretation make it
+  // impossible to verify this at compile-time. As such we can only delinearize
+  // iff the subscripts are positive and are less than the range of the
+  // dimension.
+  if (!DisableDelinearizationChecks) {
+    auto AllIndiciesInRange = [&](SmallVector<int, 4> &DimensionSizes,
+                                  SmallVectorImpl<const SCEV *> &Subscripts,
+                                  Value *Ptr) {
+      size_t SSize = Subscripts.size();
+      for (size_t I = 1; I < SSize; ++I) {
+        const SCEV *S = Subscripts[I];
+        if (!isKnownNonNegative(S, Ptr))
+          return false;
+        if (auto *SType = dyn_cast<IntegerType>(S->getType())) {
+          const SCEV *Range = SE->getConstant(
+              ConstantInt::get(SType, DimensionSizes[I - 1], false));
+          if (!isKnownLessThan(S, Range))
+            return false;
+        }
+      }
+      return true;
+    };
+
+    if (!AllIndiciesInRange(SrcSizes, SrcSubscripts, SrcPtr) ||
+        !AllIndiciesInRange(DstSizes, DstSubscripts, DstPtr)) {
+      SrcSubscripts.clear();
+      DstSubscripts.clear();
+      return false;
+    }
+  }
+  LLVM_DEBUG({
+    dbgs() << "Delinearized subscripts of fixed-size array\n"
+           << "SrcGEP:" << *SrcGEP << "\n"
+           << "DstGEP:" << *DstGEP << "\n";
+  });
+  return true;
 }

 bool DependenceInfo::tryDelinearizeParametricSize(
--- a/test/Analysis/DependenceAnalysis/Coupled.ll
+++ b/test/Analysis/DependenceAnalysis/Coupled.ll
@ -438,7 +438,8 @@ entry:

 ; CHECK-LABEL: couple11
 ; CHECK: da analyze - none!
-; CHECK: da analyze - flow [=|<]!
+; CHECK: da analyze - flow [=|<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 9!
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@ -476,7 +477,8 @@ entry:

 ; CHECK-LABEL: couple12
 ; CHECK: da analyze - none!
-; CHECK: da analyze - flow [<]!
+; CHECK: da analyze - flow [<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 11!
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
--- a/test/Analysis/DependenceAnalysis/Invariant.ll
+++ b/test/Analysis/DependenceAnalysis/Invariant.ll
@ -5,9 +5,20 @@
 ; Test for a bug, which caused an assert when an invalid
 ; SCEVAddRecExpr is created in addToCoefficient.

+; float foo (float g, float* rr[40]) {
+;   float res= 0.0f;
+;   for (int i = 0; i < 40; i += 5) {
+;     for (int j = 0; j < 40; j += 5) {
+;       float add = rr[j][j] + rr[i][j];
+;       res = add > g? add : res;
+;     }
+;   }
+;   return res;
+; }
+
 ; CHECK-LABEL: foo
 ; CHECK: da analyze - consistent input [S 0]!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* 0|<]!
 ; CHECK: da analyze - none!

 define float @foo(float %g, [40 x float]* %rr) nounwind {
--- a/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
+++ b/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
@ -1,5 +1,7 @@
 ; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
 ; RUN:   -da-disable-delinearization-checks | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN:   | FileCheck --check-prefix=LIN %s

 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.6.0"
@ -25,6 +27,14 @@ entry:
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - output [* * *]!

+; LIN-LABEL: p2
+; LIN: da analyze - output [* * *]!
+; LIN: da analyze - flow [* *|<]!
+; LIN: da analyze - confused!
+; LIN: da analyze - input [* * *]!
+; LIN: da analyze - confused!
+; LIN: da analyze - output [* * *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
  br label %for.cond1.preheader

--- a/test/Analysis/DependenceAnalysis/Separability.ll
+++ b/test/Analysis/DependenceAnalysis/Separability.ll
@ -17,7 +17,7 @@ define void @sep0([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp
 entry:
  br label %for.cond1.preheader

-; CHECK: da analyze - output [= * * S]!
+; CHECK: da analyze - output [0 * * S]!
 ; CHECK: da analyze - flow [* * * *|<]!
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - input [* * S *]!
@ -92,7 +92,7 @@ define void @sep1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp
 entry:
  br label %for.cond1.preheader

-; CHECK: da analyze - output [= * * S]!
+; CHECK: da analyze - output [0 * * S]!
 ; CHECK: da analyze - flow [* * * *|<]!
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - input [* * S *]!
@ -166,10 +166,10 @@ define void @sep2([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwt
 entry:
  br label %for.cond1.preheader

-; CHECK: da analyze - output [= S = =]!
-; CHECK: da analyze - flow [* * * <>]!
+; CHECK: da analyze - consistent output [0 S 0 0]!
+; CHECK: da analyze - flow [> * * -10]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= * * *]!
+; CHECK: da analyze - input [0 * * 0]!
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!

@ -240,10 +240,10 @@ define void @sep3([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwt
 entry:
  br label %for.cond1.preheader

-; CHECK: da analyze - output [= S = =]!
-; CHECK: da analyze - flow [* * * *|<]!
+; CHECK: da analyze - consistent output [0 S 0 0]!
+; CHECK: da analyze - flow [> * * *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= * * *]!
+; CHECK: da analyze - input [0 * * 0]!
 ; CHECK: da analyze - confused!
 ; CHECK: da analyze - none!

--- a/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
+++ b/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
@ -1,12 +1,18 @@
 ; RUN: opt < %s -disable-output -passes="print<da>"                            \
 ; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s
-; RUN: opt < %s -da -analyze -enable-new-pm=0 -da-disable-delinearization-checks | FileCheck %s
+; RUN: opt < %s -disable-output -passes="print<da>"                            \
+; RUN: 2>&1 | FileCheck --check-prefix=LIN %s

 ; CHECK-LABEL: t1
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent anti [1 -2]!
 ; CHECK: da analyze - none!

+; LIN-LABEL: t1
+; LIN: da analyze - input [* *]!
+; LIN: da analyze - anti [* *|<]!
+; LIN: da analyze - output [* *]!
+
 ;; void t1(int n, int m, int a[][m]) {
 ;;   for (int i = 0; i < n-1; ++i)
 ;;     for (int j = 2; j < m; ++j)
@ -58,6 +64,11 @@ for.end14:                                        ; preds = %entry, %for.inc12
 ; CHECK: da analyze - consistent anti [1 -2 0 -3 2]!
 ; CHECK: da analyze - none!

+; LIN-LABEL: t2
+; LIN: da analyze - input [* * * * *]!
+; LIN: da analyze - anti [* * * * *|<]!
+; LIN: da analyze - output [* * * * *]!
+
 ;; void t2(int n, int m, int a[][n][n][n][m]) {
 ;;   for (int i1 = 0; i1 < n-1; ++i1)
 ;;     for (int i2 = 2; i2 < n; ++i2)
@ -179,6 +190,11 @@ for.end50:                                        ; preds = %entry, %for.inc48
 ; CHECK: da analyze - consistent anti [1 -2]!
 ; CHECK: da analyze - none!

+; LIN-LABEL: t3
+; LIN: da analyze - input [* *]!
+; LIN: da analyze - anti [* *|<]!
+; LIN: da analyze - output [* *]!
+
 ;; // No sign or zero extension, but with compile-time unknown loop lower bound.
 ;; void t3(unsigned long long n, unsigned long long m, unsigned long long lb, float a[][m]) {
 ;;   for (unsigned long long i = 0; i < n-1; ++i)
--- a/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
+++ b/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
@ -1,6 +1,8 @@
-; RUN: opt < %s -disable-output -passes="print<da>"                            \
-; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s
-; RUN: opt < %s -da -analyze -enable-new-pm=0 -da-disable-delinearization-checks | FileCheck %s
+; RUN: opt < %s -disable-output -passes="print<da>" 2>&1 | FileCheck %s
+; RUN: opt < %s -da -analyze -enable-new-pm=0 | FileCheck %s
+
+; Note: exact results can be achived even if
+; "-da-disable-delinearization-checks" is not used

 ; CHECK-LABEL: t1
 ; CHECK: da analyze - none!
--- a/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
+++ b/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
@ -178,12 +178,12 @@ for.body4:                                        ; preds = %for.body4, %for.con
  br i1 %exitcond, label %for.body4, label %for.cond.loopexit
 }

-; CHECK: --- !Missed
+; CHECK: --- !Passed
 ; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            Dependence
+; CHECK-NEXT: Name:            Interchanged
 ; CHECK-NEXT: Function:        test03
 ; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
+; CHECK-NEXT:   - String:          Loop interchanged with enclosing loop.
 ; CHECK-NEXT: ...

 ; DELIN: --- !Passed
--- a/test/Transforms/LoopInterchange/profitability.ll
+++ b/test/Transforms/LoopInterchange/profitability.ll
@ -2,11 +2,6 @@
 ; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange
 ; RUN: FileCheck -input-file %t %s

-; RUN: opt < %s -loop-interchange -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
-; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange \
-; RUN:     -da-disable-delinearization-checks
-; RUN: FileCheck --check-prefix=DELIN -input-file %t %s
-
 ;; We test profitability model in these test cases.

 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@ -21,12 +16,9 @@ target triple = "x86_64-unknown-linux-gnu"
 ;;     for(int j=1;j<100;j++)
 ;;       A[j][i] = A[j - 1][i] + B[j][i];

-; CHECK:      Name:            Dependence
+; CHECK:      Name:            Interchanged
 ; CHECK-NEXT: Function:        interchange_01

-; DELIN:      Name:            Interchanged
-; DELIN-NEXT: Function:        interchange_01
-
 define void @interchange_01() {
 entry:
  br label %for2.preheader