[LAA] Enable symbolic stride speculation for all LAA clients

This is a functional change for LLE and LDist. The other clients (LV, LVerLICM) already had this explicitly enabled. The temporary boolean parameter to LAA is removed that allowed turning off speculation of symbolic strides. This makes LAA's caching interface LAA::getInfo only take the loop as the parameter. This makes the interface more friendly to the new Pass Manager. The flag -enable-mem-access-versioning is moved from LV to a LAA which now allows turning off speculation globally. llvm-svn: 273064
2024-11-26 04:32:44 +01:00 · 2016-06-17 22:35:41 +00:00 · 2016-06-17 22:35:41 +00:00 · 62a274f0e9
commit 62a274f0e9
parent 12e6050372
6 changed files with 154 additions and 50 deletions
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@ -513,8 +513,7 @@ class LoopAccessInfo {
 public:
  LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
                 const TargetLibraryInfo *TLI, AliasAnalysis *AA,
-                 DominatorTree *DT, LoopInfo *LI,
-                 bool SpeculateSymbolicStrides);
+                 DominatorTree *DT, LoopInfo *LI);

  /// Return true we can analyze the memory accesses in the loop and there are
  /// no memory dependence cycles.
@ -585,11 +584,6 @@ public:
  /// \brief Print the information about the memory accesses in the loop.
  void print(raw_ostream &OS, unsigned Depth = 0) const;

-  /// \brief Used to ensure that if the analysis was run with speculating the
-  /// value of symbolic strides, the client queries it with the same assumption.
-  /// Only used in DEBUG build but we don't want NDEBUG-dependent ABI.
-  bool SpeculateSymbolicStrides;
-
  /// \brief Checks existence of store to invariant address inside loop.
  /// If the loop has any store to invariant address, then it returns true,
  /// else returns false.
@ -715,11 +709,8 @@ public:

  /// \brief Query the result of the loop access information for the loop \p L.
  ///
-  /// \p SpeculateSymbolicStrides enables symbolic value speculation.  The
-  /// corresponding run-time checks are collected in LAI::PSE.
-  ///
  /// If there is no cached result available run the analysis.
-  const LoopAccessInfo &getInfo(Loop *L, bool SpeculateSymbolicStrides = false);
+  const LoopAccessInfo &getInfo(Loop *L);

  void releaseMemory() override {
    // Invalidate the cache when the pass is freed.
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@ -65,6 +65,21 @@ static cl::opt<unsigned>
                            "loop-access analysis (default = 100)"),
                   cl::init(100));

+/// This enables versioning on the strides of symbolically striding memory
+/// accesses in code like the following.
+///   for (i = 0; i < N; ++i)
+///     A[i * Stride1] += B[i * Stride2] ...
+///
+/// Will be roughly translated to
+///    if (Stride1 == 1 && Stride2 == 1) {
+///      for (i = 0; i < N; i+=4)
+///       A[i:i+3] += ...
+///    } else
+///      ...
+static cl::opt<bool> EnableMemAccessVersioning(
+    "enable-mem-access-versioning", cl::init(true), cl::Hidden,
+    cl::desc("Enable symbolic stride memory access versioning"));
+
 /// \brief Enable store-to-load forwarding conflict detection. This option can
 /// be disabled for correctness testing.
 static cl::opt<bool> EnableForwardingConflictDetection(
@ -1540,7 +1555,7 @@ void LoopAccessInfo::analyzeLoop() {
        NumLoads++;
        Loads.push_back(Ld);
        DepChecker.addAccess(Ld);
-        if (SpeculateSymbolicStrides)
+        if (EnableMemAccessVersioning)
          collectStridedAccess(Ld);
        continue;
      }
@ -1564,7 +1579,7 @@ void LoopAccessInfo::analyzeLoop() {
        NumStores++;
        Stores.push_back(St);
        DepChecker.addAccess(St);
-        if (SpeculateSymbolicStrides)
+        if (EnableMemAccessVersioning)
          collectStridedAccess(St);
      }
    } // Next instr.
@ -1904,11 +1919,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                               const DataLayout &DL,
                               const TargetLibraryInfo *TLI, AliasAnalysis *AA,
-                               DominatorTree *DT, LoopInfo *LI,
-                               bool SpeculateSymbolicStrides)
-    : SpeculateSymbolicStrides(SpeculateSymbolicStrides), PSE(*SE, *L),
-      PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), TLI(TLI),
-      AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
+                               DominatorTree *DT, LoopInfo *LI)
+    : PSE(*SE, *L), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL),
+      TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
      MaxSafeDepDistBytes(-1U), CanVecMem(false),
      StoreToLoopInvariantAddress(false) {
  if (canAnalyzeLoop())
@ -1955,19 +1968,12 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
  PSE.print(OS, Depth);
 }

-const LoopAccessInfo &
-LoopAccessAnalysis::getInfo(Loop *L, bool SpeculateSymbolicStrides) {
+const LoopAccessInfo &LoopAccessAnalysis::getInfo(Loop *L) {
  auto &LAI = LoopAccessInfoMap[L];

-#ifndef NDEBUG
-  assert((!LAI || LAI->SpeculateSymbolicStrides == SpeculateSymbolicStrides) &&
-         "Symbolic strides changed for loop");
-#endif
-
  if (!LAI) {
    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
-                                            SpeculateSymbolicStrides);
+    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI);
  }
  return *LAI.get();
 }
--- a/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@ -385,7 +385,7 @@ bool LoopVersioningLICM::legalLoopInstructions() {
        return false;
    }
  // Get LoopAccessInfo from current loop.
-  LAI = &LAA->getInfo(CurLoop, true);
+  LAI = &LAA->getInfo(CurLoop);
  // Check LoopAccessInfo for need of runtime check.
  if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
    DEBUG(dbgs() << "    LAA: Runtime check not found !!\n");
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -130,21 +130,6 @@ static cl::opt<bool> MaximizeBandwidth(
    cl::desc("Maximize bandwidth when selecting vectorization factor which "
             "will be determined by the smallest type in loop."));

-/// This enables versioning on the strides of symbolically striding memory
-/// accesses in code like the following.
-///   for (i = 0; i < N; ++i)
-///     A[i * Stride1] += B[i * Stride2] ...
-///
-/// Will be roughly translated to
-///    if (Stride1 == 1 && Stride2 == 1) {
-///      for (i = 0; i < N; i+=4)
-///       A[i:i+3] += ...
-///    } else
-///      ...
-static cl::opt<bool> EnableMemAccessVersioning(
-    "enable-mem-access-versioning", cl::init(true), cl::Hidden,
-    cl::desc("Enable symbolic stride memory access versioning"));
-
 static cl::opt<bool> EnableInterleavedMemAccesses(
    "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
    cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
@ -4970,7 +4955,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
 }

 bool LoopVectorizationLegality::canVectorizeMemory() {
-  LAI = &LAA->getInfo(TheLoop, EnableMemAccessVersioning);
+  LAI = &LAA->getInfo(TheLoop);
  auto &OptionalReport = LAI->getReport();
  if (OptionalReport)
    emitAnalysis(VectorizationReport(*OptionalReport));
--- a/test/Transforms/LoopDistribute/symbolic-stride.ll
+++ b/test/Transforms/LoopDistribute/symbolic-stride.ll
@ -0,0 +1,65 @@
+; RUN: opt -basicaa -loop-distribute -S < %s | \
+; RUN:     FileCheck %s --check-prefix=ALL --check-prefix=STRIDE_SPEC
+
+; RUN: opt -basicaa -loop-distribute -S -enable-mem-access-versioning=0 < %s | \
+; RUN:     FileCheck %s --check-prefix=ALL --check-prefix=NO_STRIDE_SPEC
+
+; If we don't speculate stride for 1 we can't distribute along the line
+; because we could have a backward dependence:
+;
+;   for (i = 0; i < n; i++) {
+;     A[i + 1] = A[i] * B[i];
+;     =======================
+;     C[i] = D[i] * A[stride * i];
+;   }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; ALL-LABEL: @f(
+define void @f(i32* noalias %a,
+               i32* noalias %b,
+               i32* noalias %c,
+               i32* noalias %d,
+               i64 %stride) {
+entry:
+  br label %for.body
+
+; STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
+
+; STRIDE_SPEC: for.body.ldist1:
+; NO_STRIDE_SPEC-NOT: for.body.ldist1:
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulA = mul i32 %loadB, %loadA
+
+  %add = add nuw nsw i64 %ind, 1
+  %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+  store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+  %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+  %loadD = load i32, i32* %arrayidxD, align 4
+
+  %mul = mul i64 %ind, %stride
+  %arrayidxStridedA = getelementptr inbounds i32, i32* %a, i64 %mul
+  %loadStridedA = load i32, i32* %arrayidxStridedA, align 4
+
+  %mulC = mul i32 %loadD, %loadStridedA
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
--- a/test/Transforms/LoopLoadElim/symbolic-stride.ll
+++ b/test/Transforms/LoopLoadElim/symbolic-stride.ll
@ -1,28 +1,44 @@
-; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+; RUN: opt -loop-load-elim -S < %s | \
+; RUN:     FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \
+; RUN:                  -check-prefix=TWO_STRIDE_SPEC

-; Forwarding in the presence of symbolic strides is currently not supported:
+; RUN: opt -loop-load-elim -S -enable-mem-access-versioning=0 < %s | \
+; RUN:     FileCheck %s -check-prefix=ALL -check-prefix=NO_ONE_STRIDE_SPEC \
+; RUN:                  -check-prefix=NO_TWO_STRIDE_SPEC
+
+; RUN: opt -loop-load-elim -S -loop-load-elimination-scev-check-threshold=1 < %s | \
+; RUN:     FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \
+; RUN:                  -check-prefix=NO_TWO_STRIDE_SPEC
+
+; Forwarding in the presence of symbolic strides:
 ;
 ;   for (unsigned i = 0; i < 100; i++)
 ;     A[i + 1] = A[Stride * i] + B[i];

 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"

-; CHECK-LABEL: @f(
+; ALL-LABEL: @f(
 define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N,
               i64 %stride) {
+
+; ONE_STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
+
 entry:
-; CHECK-NOT: %load_initial = load i32, i32* %A
+; NO_ONE_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A
+; ONE_STRIDE_SPEC: %load_initial = load i32, i32* %A
  br label %for.body

 for.body:                                         ; preds = %for.body, %entry
-; CHECK-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; NO_ONE_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; ONE_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  %mul = mul i64 %indvars.iv, %stride
  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
  %load = load i32, i32* %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
  %load_1 = load i32, i32* %arrayidx2, align 4
-; CHECK-NOT: %add = add i32 %load_1, %store_forwarded
+; NO_ONE_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded
+; ONE_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded
  %add = add i32 %load_1, %load
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
@ -33,3 +49,44 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
  ret void
 }
+
+; With two symbolic strides:
+;
+;   for (unsigned i = 0; i < 100; i++)
+;     A[Stride2 * (i + 1)] = A[Stride1 * i] + B[i];
+
+; ALL-LABEL: @two_strides(
+define void @two_strides(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N,
+                         i64 %stride.1, i64 %stride.2) {
+
+; TWO_STRIDE_SPEC: %ident.check = icmp ne i64 %stride.2, 1
+; TWO_STRIDE_SPEC: %ident.check1 = icmp ne i64 %stride.1, 1
+; NO_TWO_STRIDE_SPEC-NOT: %ident.check{{.*}} = icmp ne i64 %stride{{.*}}, 1
+
+entry:
+; NO_TWO_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A
+; TWO_STRIDE_SPEC: %load_initial = load i32, i32* %A
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; NO_TWO_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; TWO_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %mul = mul i64 %indvars.iv, %stride.1
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
+  %load = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %load_1 = load i32, i32* %arrayidx2, align 4
+; NO_TWO_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded
+; TWO_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded
+  %add = add i32 %load_1, %load
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %mul.2 = mul i64 %indvars.iv.next, %stride.2
+  %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %mul.2
+  store i32 %add, i32* %arrayidx_next, align 4
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}