[LICM] Report successful hoist/sink/promotion

Differential Revision: https://reviews.llvm.org/D27938 llvm-svn: 291646
2024-11-25 20:23:11 +01:00 · 2017-01-11 04:39:35 +00:00 · 2017-01-11 04:39:35 +00:00 · 7d55194036
commit 7d55194036
parent 63cd0c2c97
27 changed files with 161 additions and 50 deletions
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@ -29,6 +29,7 @@ class DataLayout;
 class DominatorTree;
 class Loop;
 class LoopInfo;
+class OptimizationRemarkEmitter;
 class Pass;
 class PredicatedScalarEvolution;
 class PredIteratorCache;
@ -404,11 +405,11 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
 /// uses before definitions, allowing us to sink a loop body in one pass without
 /// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,
 /// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all
-/// instructions of the loop and loop safety information as arguments.
-/// It returns changed status.
+/// instructions of the loop and loop safety information as
+/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                TargetLibraryInfo *, Loop *, AliasSetTracker *,
-                LoopSafetyInfo *);
+                LoopSafetyInfo *, OptimizationRemarkEmitter *ORE);

 /// \brief Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
@ -416,10 +417,11 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
 /// before uses, allowing us to hoist a loop body in one pass without iteration.
 /// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,
 /// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
-/// loop and loop safety information as arguments. It returns changed status.
+/// loop and loop safety information as arguments. Diagnostics is emitted via \p
+/// ORE. It returns changed status.
 bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                 TargetLibraryInfo *, Loop *, AliasSetTracker *,
-                 LoopSafetyInfo *);
+                 LoopSafetyInfo *, OptimizationRemarkEmitter *ORE);

 /// \brief Try to promote memory values to scalars by sinking stores out of
 /// the loop and moving loads to before the loop.  We do this by looping over
@ -427,12 +429,14 @@ bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
 /// loop invariant. It takes AliasSet, Loop exit blocks vector, loop exit blocks
 /// insertion point vector, PredIteratorCache, LoopInfo, DominatorTree, Loop,
 /// AliasSet information for all instructions of the loop and loop safety
-/// information as arguments. It returns changed status.
+/// information as arguments. Diagnostics is emitted via \p ORE. It returns
+/// changed status.
 bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock *> &,
                                  SmallVectorImpl<Instruction *> &,
                                  PredIteratorCache &, LoopInfo *,
                                  DominatorTree *, const TargetLibraryInfo *,
-                                  Loop *, AliasSetTracker *, LoopSafetyInfo *);
+                                  Loop *, AliasSetTracker *, LoopSafetyInfo *,
+                                  OptimizationRemarkEmitter *);

 /// \brief Computes safety information for a loop
 /// checks loop body & header for the possibility of may throw
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@ -43,6 +43,7 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@ -84,10 +85,12 @@ static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
 static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
                            const LoopSafetyInfo *SafetyInfo);
 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
-                  const LoopSafetyInfo *SafetyInfo);
+                  const LoopSafetyInfo *SafetyInfo,
+                  OptimizationRemarkEmitter *ORE);
 static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
                 const Loop *CurLoop, AliasSetTracker *CurAST,
-                 const LoopSafetyInfo *SafetyInfo);
+                 const LoopSafetyInfo *SafetyInfo,
+                 OptimizationRemarkEmitter *ORE);
 static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
                                           const DominatorTree *DT,
                                           const Loop *CurLoop,
@ -104,7 +107,8 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
 namespace {
 struct LoopInvariantCodeMotion {
  bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
-                 TargetLibraryInfo *TLI, ScalarEvolution *SE, bool DeleteAST);
+                 TargetLibraryInfo *TLI, ScalarEvolution *SE,
+                 OptimizationRemarkEmitter *ORE, bool DeleteAST);

  DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
    return LoopToAliasSetMap;
@ -135,12 +139,16 @@ struct LegacyLICMPass : public LoopPass {
    }

    auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+    // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
+    // pass.  Function analyses need to be preserved across loop transformations
+    // but ORE cannot be preserved (see comment before the pass definition).
+    OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
    return LICM.runOnLoop(L,
                          &getAnalysis<AAResultsWrapperPass>().getAAResults(),
                          &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
                          &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
                          &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
-                          SE ? &SE->getSE() : nullptr, false);
+                          SE ? &SE->getSE() : nullptr, &ORE, false);
  }

  /// This transformation requires natural loop information & requires that
@ -186,11 +194,13 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM) {
  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
  auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(*F);
  auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
-  assert((AA && LI && DT && TLI && SE) && "Analyses for LICM not available");
+  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
+  assert((AA && LI && DT && TLI && SE && ORE) &&
+         "Analyses for LICM not available");

  LoopInvariantCodeMotion LICM;

-  if (!LICM.runOnLoop(&L, AA, LI, DT, TLI, SE, true))
+  if (!LICM.runOnLoop(&L, AA, LI, DT, TLI, SE, ORE, true))
    return PreservedAnalyses::all();

  // FIXME: There is no setPreservesCFG in the new PM. When that becomes
@ -217,7 +227,9 @@ Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
 bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
                                        LoopInfo *LI, DominatorTree *DT,
                                        TargetLibraryInfo *TLI,
-                                        ScalarEvolution *SE, bool DeleteAST) {
+                                        ScalarEvolution *SE,
+                                        OptimizationRemarkEmitter *ORE,
+                                        bool DeleteAST) {
  bool Changed = false;

  assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
@ -243,10 +255,10 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
  //
  if (L->hasDedicatedExits())
    Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
-                          CurAST, &SafetyInfo);
+                          CurAST, &SafetyInfo, ORE);
  if (Preheader)
    Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
-                           CurAST, &SafetyInfo);
+                           CurAST, &SafetyInfo, ORE);

  // Now that all loop invariants have been removed from the loop, promote any
  // memory references to scalars that we can.
@ -279,7 +291,7 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
      for (AliasSet &AS : *CurAST)
        Promoted |=
            promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT,
-                                         TLI, L, CurAST, &SafetyInfo);
+                                         TLI, L, CurAST, &SafetyInfo, ORE);

      // Once we have promoted values across the loop body we have to
      // recursively reform LCSSA as any nested loop may now have values defined
@ -320,7 +332,8 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
 ///
 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                      DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
-                      AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) {
+                      AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
+                      OptimizationRemarkEmitter *ORE) {

  // Verify inputs.
  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
@ -336,7 +349,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
  bool Changed = false;
  const std::vector<DomTreeNode *> &Children = N->getChildren();
  for (DomTreeNode *Child : Children)
-    Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+    Changed |=
+        sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE);

  // Only need to process the contents of this block if it is not part of a
  // subloop (which would already have been processed).
@ -365,7 +379,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
    if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
        canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo)) {
      ++II;
-      Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo);
+      Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE);
    }
  }
  return Changed;
@ -378,7 +392,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
 ///
 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                       DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
-                       AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) {
+                       AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
+                       OptimizationRemarkEmitter *ORE) {
  // Verify inputs.
  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
         CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
@ -421,12 +436,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
          isSafeToExecuteUnconditionally(
              I, DT, CurLoop, SafetyInfo,
              CurLoop->getLoopPreheader()->getTerminator()))
-        Changed |= hoist(I, DT, CurLoop, SafetyInfo);
+        Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE);
    }

  const std::vector<DomTreeNode *> &Children = N->getChildren();
  for (DomTreeNode *Child : Children)
-    Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+    Changed |=
+        hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE);
  return Changed;
 }

@ -680,8 +696,11 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
 ///
 static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
                 const Loop *CurLoop, AliasSetTracker *CurAST,
-                 const LoopSafetyInfo *SafetyInfo) {
+                 const LoopSafetyInfo *SafetyInfo,
+                 OptimizationRemarkEmitter *ORE) {
  DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
+  ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
+            << "sinking " << ore::NV("Inst", &I));
  bool Changed = false;
  if (isa<LoadInst>(I))
    ++NumMovedLoads;
@ -748,10 +767,13 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
 /// is safe to hoist, this instruction is called to do the dirty work.
 ///
 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
-                  const LoopSafetyInfo *SafetyInfo) {
+                  const LoopSafetyInfo *SafetyInfo,
+                  OptimizationRemarkEmitter *ORE) {
  auto *Preheader = CurLoop->getLoopPreheader();
  DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
               << "\n");
+  ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I)
+            << "hosting " << ore::NV("Inst", &I));

  // Metadata can be dependent on conditions we are hoisting above.
  // Conservatively strip all metadata on the instruction unless we were
@ -882,7 +904,8 @@ bool llvm::promoteLoopAccessesToScalars(
    AliasSet &AS, SmallVectorImpl<BasicBlock *> &ExitBlocks,
    SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
    LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
-    Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) {
+    Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
+    OptimizationRemarkEmitter *ORE) {
  // Verify inputs.
  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
         CurAST != nullptr && SafetyInfo != nullptr &&
@ -1074,6 +1097,9 @@ bool llvm::promoteLoopAccessesToScalars(
  // Otherwise, this is safe to promote, lets do it!
  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
               << '\n');
+  ORE->emit(
+      OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0])
+      << "Moving accesses to memory location out of the loop");
  ++NumPromoted;

  // Grab a debug location for the inserted loads/stores; given that the
--- a/test/Transforms/LICM/argmemonly-call.ll
+++ b/test/Transforms/LICM/argmemonly-call.ll
@ -1,5 +1,5 @@
 ; RUN: opt -S -basicaa -licm %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
 declare i32 @foo() readonly argmemonly nounwind
 declare i32 @foo2() readonly nounwind
 declare i32 @bar(i32* %loc2) readonly argmemonly nounwind
--- a/test/Transforms/LICM/assume.ll
+++ b/test/Transforms/LICM/assume.ll
@ -1,5 +1,5 @@
 ; RUN: opt -licm -basicaa < %s -S | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s

 define void @f_0(i1 %p) nounwind ssp {
 ; CHECK-LABEL: @f_0(
--- a/test/Transforms/LICM/atomics.ll
+++ b/test/Transforms/LICM/atomics.ll
@ -1,5 +1,5 @@
 ; RUN: opt < %s -S -basicaa -licm | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s

 ; Check that we can hoist unordered loads
 define i32 @test1(i32* nocapture %y) nounwind uwtable ssp {
--- a/test/Transforms/LICM/basictest.ll
+++ b/test/Transforms/LICM/basictest.ll
@ -1,5 +1,5 @@
 ; RUN: opt < %s -licm | llvm-dis
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s | llvm-dis
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s | llvm-dis

 define void @testfunc(i32 %i) {
 ; <label>:0
--- a/test/Transforms/LICM/constexpr.ll
+++ b/test/Transforms/LICM/constexpr.ll
@ -1,5 +1,5 @@
 ; RUN: opt < %s -S -basicaa -licm | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
 ; This fixes PR22460

 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
--- a/test/Transforms/LICM/crash.ll
+++ b/test/Transforms/LICM/crash.ll
@ -1,5 +1,5 @@
 ; RUN: opt -licm -disable-output < %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -disable-output < %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -disable-output < %s

 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
--- a/test/Transforms/LICM/debug-value.ll
+++ b/test/Transforms/LICM/debug-value.ll
@ -1,5 +1,5 @@
 ; RUN: opt -licm -basicaa < %s -S | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s

 define void @dgefa() nounwind ssp {
 entry:
--- a/test/Transforms/LICM/extra-copies.ll
+++ b/test/Transforms/LICM/extra-copies.ll
@ -1,5 +1,5 @@
 ; RUN: opt < %s -licm -S | FileCheck %s
-; RUN: opt -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
 ; PR19835
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
--- a/test/Transforms/LICM/funclet.ll
+++ b/test/Transforms/LICM/funclet.ll
@ -1,5 +1,5 @@
 ; RUN: opt < %s -licm -S | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s

 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i386-pc-windows-msvc18.0.0"
--- a/test/Transforms/LICM/hoist-bitcast-load.ll
+++ b/test/Transforms/LICM/hoist-bitcast-load.ll
@ -1,5 +1,5 @@
 ; RUN: opt -S -basicaa -licm < %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

--- a/test/Transforms/LICM/hoist-deref-load.ll
+++ b/test/Transforms/LICM/hoist-deref-load.ll
@ -1,5 +1,5 @@
 ; RUN: opt -S -basicaa -licm < %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

--- a/test/Transforms/LICM/hoist-nounwind.ll
+++ b/test/Transforms/LICM/hoist-nounwind.ll
@ -1,5 +1,5 @@
 ; RUN: opt -S -basicaa -licm < %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

--- a/test/Transforms/LICM/hoist-round.ll
+++ b/test/Transforms/LICM/hoist-round.ll
@ -1,5 +1,5 @@
 ; RUN: opt -S -licm < %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s

 target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n32"

--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@ -1,5 +1,5 @@
 ; RUN: opt < %s -licm -S | FileCheck %s
-; RUN: opt -lcssa %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S | FileCheck %s
+; RUN: opt -lcssa %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S | FileCheck %s

@X = global i32 0		; <i32*> [#uses=1]

--- a/test/Transforms/LICM/lcssa-ssa-promoter.ll
+++ b/test/Transforms/LICM/lcssa-ssa-promoter.ll
@ -1,5 +1,5 @@
 ; RUN: opt -S -basicaa -licm < %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s| FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s| FileCheck %s
 ;
 ; Manually validate LCSSA form is preserved even after SSAUpdater is used to
 ; promote things in the loop bodies.
--- a/test/Transforms/LICM/no-preheader-test.ll
+++ b/test/Transforms/LICM/no-preheader-test.ll
@ -1,6 +1,6 @@
 ; Test that LICM works when there is not a loop-preheader
 ; RUN: opt < %s -licm | llvm-dis
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s | llvm-dis
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s | llvm-dis

 define void @testfunc(i32 %i.s, i1 %ifcond) {
 	br i1 %ifcond, label %Then, label %Else
--- a/test/Transforms/LICM/opt-remarks.ll
+++ b/test/Transforms/LICM/opt-remarks.ll
@ -0,0 +1,81 @@
+; RUN: opt < %s -licm -pass-remarks=licm -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' %s -o /dev/null -pass-remarks=licm 2>&1 | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define void @hoist(i32* %array, i32* noalias %p) {
+Entry:
+  br label %Loop
+
+Loop:
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]
+  %addr = getelementptr i32, i32* %array, i32 %j
+  %a = load i32, i32* %addr
+; CHECK: remark: /tmp/kk.c:2:20: hosting load
+  %b = load i32, i32* %p, !dbg !8
+  %a2 = add i32 %a, %b
+  store i32 %a2, i32* %addr
+  %Next = add i32 %j, 1
+  %cond = icmp eq i32 %Next, 0
+  br i1 %cond, label %Out, label %Loop
+
+Out:
+  ret void
+}
+
+define i32 @sink(i32* %array, i32* noalias %p, i32 %b) {
+Entry:
+  br label %Loop
+
+Loop:
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]
+  %addr = getelementptr i32, i32* %array, i32 %j
+  %a = load i32, i32* %addr
+  %a2 = add i32 %a, %b
+  store i32 %a2, i32* %addr
+; CHECK: remark: /tmp/kk.c:2:21: sinking add
+  %a3 = add i32 %a, 1, !dbg !9
+  %Next = add i32 %j, 1
+  %cond = icmp eq i32 %Next, 0
+  br i1 %cond, label %Out, label %Loop
+
+Out:
+  %a4 = phi i32 [ %a3, %Loop ]
+  ret i32 %a4
+}
+
+define void @promote(i32* %array, i32* noalias %p) {
+Entry:
+  br label %Loop
+
+Loop:
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]
+  %addr = getelementptr i32, i32* %array, i32 %j
+  %a = load i32, i32* %addr
+  %b = load i32, i32* %p
+  %a2 = add i32 %a, %b
+  store i32 %a2, i32* %addr
+; CHECK: remark: /tmp/kk.c:2:22: Moving accesses to memory location out of the loop
+  store i32 %b, i32* %p, !dbg !10
+  %Next = add i32 %j, 1
+  %cond = icmp eq i32 %Next, 0
+  br i1 %cond, label %Out, label %Loop
+
+Out:
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "/tmp/kk.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"PIC Level", i32 2}
+!5 = !{!"clang version 3.9.0 "}
+!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 2, column: 20, scope: !6)
+!9 = !DILocation(line: 2, column: 21, scope: !6)
+!10 = !DILocation(line: 2, column: 22, scope: !6)
--- a/test/Transforms/LICM/preheader-safe.ll
+++ b/test/Transforms/LICM/preheader-safe.ll
@ -1,5 +1,5 @@
 ; RUN: opt -S -licm < %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s

 declare void @use_nothrow(i64 %a) nounwind
 declare void @use(i64 %a)
--- a/test/Transforms/LICM/promote-order.ll
+++ b/test/Transforms/LICM/promote-order.ll
@ -1,5 +1,5 @@
 ; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s
-; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s

 ; LICM should keep the stores in their original order when it sinks/promotes them.
 ; rdar://12045203
--- a/test/Transforms/LICM/promote-tls.ll
+++ b/test/Transforms/LICM/promote-tls.ll
@ -1,5 +1,5 @@
 ; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s
-; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s

 ; If we can prove a local is thread local, we can insert stores during
 ; promotion which wouldn't be legal otherwise.
--- a/test/Transforms/LICM/scalar-promote-memmodel.ll
+++ b/test/Transforms/LICM/scalar-promote-memmodel.ll
@ -1,5 +1,5 @@
 ; RUN: opt < %s -basicaa -licm -S | FileCheck %s
-; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s

 ; Make sure we don't hoist a conditionally-executed store out of the loop;
 ; it would violate the concurrency memory model
--- a/test/Transforms/LICM/scalar_promote-unwind.ll
+++ b/test/Transforms/LICM/scalar_promote-unwind.ll
@ -1,5 +1,5 @@
 ; RUN: opt < %s -basicaa -licm -S | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s

 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@ -1,5 +1,5 @@
 ; RUN: opt < %s -basicaa -tbaa -licm -S | FileCheck %s
-; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

@X = global i32 7   ; <i32*> [#uses=4]
--- a/test/Transforms/LICM/speculate.ll
+++ b/test/Transforms/LICM/speculate.ll
@ -1,5 +1,5 @@
 ; RUN: opt -S -licm < %s | FileCheck %s
-; RUN: opt -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+; RUN: opt -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s

 ; UDiv is safe to speculate if the denominator is known non-zero.

--- a/test/Transforms/LICM/volatile-alias.ll
+++ b/test/Transforms/LICM/volatile-alias.ll
@ -1,5 +1,5 @@
 ; RUN: opt -basicaa -sroa -loop-rotate -licm -S < %s | FileCheck %s
-; RUN: opt -basicaa -sroa -loop-rotate %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S | FileCheck %s
+; RUN: opt -basicaa -sroa -loop-rotate %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
 ; The objects *p and *q are aliased to each other, but even though *q is
 ; volatile, *p can be considered invariant in the loop. Check if it is moved
 ; out of the loop.