1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[NFCI][LoopUnrollAndJam] Changing LoopUnrollAndJamPass to a function

pass.

Summary: This patch changes LoopUnrollAndJamPass to a function pass, and
keeps the loops traversal order same as defined in
FunctionToLoopPassAdaptor LoopPassManager.h.

The next patch will change the loop traversal to outer to inner order,
so more loops can be transform.

Discussion in llvm-dev mailing list:
https://groups.google.com/forum/#!topic/llvm-dev/LF4rUjkVI2g
Reviewer: dmgreen, jdoerfert, Meinersbur, kbarton, bmahjour, etiotto
Reviewed By: dmgreen
Subscribers: hiraditya, zzheng, llvm-commits
Tag: LLVM
Differential Revision: https://reviews.llvm.org/D72230
This commit is contained in:
Whitney Tsang 2020-01-09 16:15:53 +00:00
parent 14d68637c8
commit 55a5570570
8 changed files with 69 additions and 49 deletions

View File

@ -15,9 +15,7 @@
namespace llvm {
class Loop;
struct LoopStandardAnalysisResults;
class LPMUpdater;
class Function;
/// A simple loop rotation transformation.
class LoopUnrollAndJamPass : public PassInfoMixin<LoopUnrollAndJamPass> {
@ -25,8 +23,7 @@ class LoopUnrollAndJamPass : public PassInfoMixin<LoopUnrollAndJamPass> {
public:
explicit LoopUnrollAndJamPass(int OptLevel = 2) : OptLevel(OptLevel) {}
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm

View File

@ -970,8 +970,7 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// across the loop nests.
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
if (EnableUnrollAndJam && PTO.LoopUnrolling) {
OptimizePM.addPass(
createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
OptimizePM.addPass(LoopUnrollAndJamPass(Level));
}
OptimizePM.addPass(LoopUnrollPass(
LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling,

View File

@ -235,6 +235,7 @@ FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass())
FUNCTION_PASS("sroa", SROA())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
FUNCTION_PASS("unroll-and-jam", LoopUnrollAndJamPass())
FUNCTION_PASS("verify", VerifierPass())
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
FUNCTION_PASS("verify<loops>", LoopVerifierPass())
@ -307,7 +308,6 @@ LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass())
LOOP_PASS("strength-reduce", LoopStrengthReducePass())
LOOP_PASS("indvars", IndVarSimplifyPass())
LOOP_PASS("irce", IRCEPass())
LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass())
LOOP_PASS("unroll-full", LoopFullUnrollPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))

View File

@ -427,51 +427,76 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
return UnrollResult;
}
static bool tryToUnrollAndJamLoop(Function &F, DominatorTree &DT, LoopInfo &LI,
ScalarEvolution &SE,
const TargetTransformInfo &TTI,
AssumptionCache &AC, DependenceInfo &DI,
OptimizationRemarkEmitter &ORE,
int OptLevel) {
bool DidSomething = false;
// The loop unroll and jam pass requires loops to be in simplified form, and also needs LCSSA.
// Since simplification may add new inner loops, it has to run before the
// legality and profitability checks. This means running the loop unroll and jam pass
// will simplify all loops, regardless of whether anything end up being
// unroll and jammed.
for (auto &L : LI) {
DidSomething |=
simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
DidSomething |= formLCSSARecursively(*L, DT, &LI, &SE);
}
SmallPriorityWorklist<Loop *, 4> Worklist;
internal::appendLoopsToWorklist(reverse(LI), Worklist);
while (!Worklist.empty()) {
Loop *L = Worklist.pop_back_val();
formLCSSA(*L, DT, &LI, &SE);
LoopUnrollResult Result =
tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
if (Result != LoopUnrollResult::Unmodified)
DidSomething = true;
}
return DidSomething;
}
namespace {
class LoopUnrollAndJam : public LoopPass {
class LoopUnrollAndJam : public FunctionPass {
public:
static char ID; // Pass ID, replacement for typeid
unsigned OptLevel;
LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
LoopUnrollAndJam(int OptLevel = 2) : FunctionPass(ID), OptLevel(OptLevel) {
initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry());
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
if (skipLoop(L))
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
Function &F = *L->getHeader()->getParent();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(&F);
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
LoopUnrollResult Result =
tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
if (Result == LoopUnrollResult::FullyUnrolled)
LPM.markLoopAsDeleted(*L);
return Result != LoopUnrollResult::Unmodified;
return tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
}
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DependenceAnalysisWrapperPass>();
getLoopAnalysisUsage(AU);
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
};
@ -481,10 +506,13 @@ char LoopUnrollAndJam::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
"Unroll and Jam loops", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
"Unroll and Jam loops", false, false)
@ -492,26 +520,18 @@ Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) {
return new LoopUnrollAndJam(OptLevel);
}
PreservedAnalyses LoopUnrollAndJamPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
const auto &FAM =
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();
PreservedAnalyses LoopUnrollAndJamPass::run(Function &F,
FunctionAnalysisManager &AM) {
ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
DependenceInfo &DI = AM.getResult<DependenceAnalysis>(F);
OptimizationRemarkEmitter &ORE =
AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
// FIXME: This should probably be optional rather than required.
if (!ORE)
report_fatal_error(
"LoopUnrollAndJamPass: OptimizationRemarkEmitterAnalysis not cached at "
"a higher level");
DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
LoopUnrollResult Result = tryToUnrollAndJamLoop(
&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, DI, *ORE, OptLevel);
if (Result == LoopUnrollResult::Unmodified)
if (!tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();

View File

@ -1,4 +1,5 @@
; RUN: opt -basicaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"

View File

@ -1,4 +1,5 @@
; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -pass-remarks=loop-unroll-and-jam < %s -S 2>&1 | FileCheck %s
; RUN: opt -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 -pass-remarks=loop-unroll-and-jam < %s -S 2>&1 | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"

View File

@ -1,4 +1,5 @@
; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s
; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -basicaa -tbaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"