From 3984a320d20887d14f37e028faf68f8dae3492bd Mon Sep 17 00:00:00 2001 From: Fedor Sergeev Date: Wed, 31 Oct 2018 14:33:14 +0000 Subject: [PATCH] [LoopUnroll] allow customization for new-pass-manager version of LoopUnroll Unlike its legacy counterpart new pass manager's LoopUnrollPass does not provide any means to select which flavors of unroll to run (runtime, peeling, partial), relying on global defaults. In some cases having ability to run a restricted LoopUnroll that does more than LoopFullUnroll is needed. Introduced LoopUnrollOptions to select optional unroll behaviors. Added 'unroll' to PassRegistry mainly for the sake of testing. Reviewers: chandlerc, tejohnson Differential Revision: https://reviews.llvm.org/D53440 llvm-svn: 345723 --- .../llvm/Transforms/Scalar/LoopUnrollPass.h | 60 ++++++++++++++++++- lib/Passes/PassBuilder.cpp | 2 +- lib/Passes/PassRegistry.def | 1 + lib/Transforms/Scalar/LoopUnrollPass.cpp | 21 +++---- test/Transforms/LoopUnroll/peel-loop.ll | 2 + test/Transforms/LoopUnroll/runtime-loop.ll | 34 ++++++++++- 6 files changed, 104 insertions(+), 16 deletions(-) diff --git a/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/include/llvm/Transforms/Scalar/LoopUnrollPass.h index 9848e0d54f2..20c9a26b98c 100644 --- a/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ b/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -10,6 +10,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H #define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H +#include "llvm/ADT/Optional.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" @@ -30,16 +31,71 @@ public: LoopStandardAnalysisResults &AR, LPMUpdater &U); }; +/// A set of parameters used to control various transforms performed by the +/// LoopUnroll pass. Each of the boolean parameters can be set to: +/// true - enabling the transformation. +/// false - disabling the transformation. +/// None - relying on a global default. +/// +/// There is also OptLevel parameter, which is used for additional loop unroll +/// tuning. +/// +/// Intended use is to create a default object, modify parameters with +/// additional setters and then pass it to LoopUnrollPass. +/// +struct LoopUnrollOptions { + Optional AllowPartial; + Optional AllowPeeling; + Optional AllowRuntime; + Optional AllowUpperBound; + int OptLevel; + + LoopUnrollOptions(int OptLevel = 2) : OptLevel(OptLevel) {} + + /// Enables or disables partial unrolling. When disabled only full unrolling + /// is allowed. + LoopUnrollOptions &setPartial(bool Partial) { + AllowPartial = Partial; + return *this; + } + + /// Enables or disables unrolling of loops with runtime trip count. + LoopUnrollOptions &setRuntime(bool Runtime) { + AllowRuntime = Runtime; + return *this; + } + + /// Enables or disables loop peeling. + LoopUnrollOptions &setPeeling(bool Peeling) { + AllowPeeling = Peeling; + return *this; + } + + /// Enables or disables the use of trip count upper bound + /// in loop unrolling. + LoopUnrollOptions &setUpperBound(bool UpperBound) { + AllowUpperBound = UpperBound; + return *this; + } + + // Sets "optimization level" tuning parameter for loop unrolling. + LoopUnrollOptions &setOptLevel(int O) { + OptLevel = O; + return *this; + } +}; + /// Loop unroll pass that will support both full and partial unrolling. /// It is a function pass to have access to function and module analyses. /// It will also put loops into canonical form (simplified and LCSSA). class LoopUnrollPass : public PassInfoMixin { - const int OptLevel; + LoopUnrollOptions UnrollOpts; public: /// This uses the target information (or flags) to control the thresholds for /// different unrolling stategies but supports all of them. - explicit LoopUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {} + explicit LoopUnrollPass(LoopUnrollOptions UnrollOpts = {}) + : UnrollOpts(UnrollOpts) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index c23c8c8d47a..0c6dfff06f1 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -830,7 +830,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass( createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level))); } - OptimizePM.addPass(LoopUnrollPass(Level)); + OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level))); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass()); OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index 8de4541a772..99df2ad2719 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -215,6 +215,7 @@ FUNCTION_PASS("sroa", SROA()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) FUNCTION_PASS("unroll", LoopUnrollPass()) +FUNCTION_PASS("unroll",LoopUnrollPass(LoopUnrollOptions().setPeeling(true).setRuntime(false))) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("verify", DominatorTreeVerifierPass()) FUNCTION_PASS("verify", LoopVerifierPass()) diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 34d2b2a8b27..d10dae124a7 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1333,23 +1333,20 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, Loop *ParentL = L.getParentLoop(); #endif - // The API here is quite complex to call, but there are only two interesting - // states we support: partial and full (or "simple") unrolling. However, to - // enable these things we actually pass "None" in for the optional to avoid - // providing an explicit choice. - Optional AllowPartialParam, RuntimeParam, UpperBoundParam, - AllowPeeling; // Check if the profile summary indicates that the profiled application // has a huge working set size, in which case we disable peeling to avoid // bloating it further. + Optional LocalAllowPeeling = UnrollOpts.AllowPeeling; if (PSI && PSI->hasHugeWorkingSetSize()) - AllowPeeling = false; + LocalAllowPeeling = false; std::string LoopName = L.getName(); - LoopUnrollResult Result = - tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE, - /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, - /*Threshold*/ None, AllowPartialParam, RuntimeParam, - UpperBoundParam, AllowPeeling); + // The API here is quite complex to call and we allow to select some + // flavors of unrolling during construction time (by setting UnrollOpts). + LoopUnrollResult Result = tryToUnrollLoop( + &L, DT, &LI, SE, TTI, AC, ORE, + /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*Count*/ None, + /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, + UnrollOpts.AllowUpperBound, LocalAllowPeeling); Changed |= Result != LoopUnrollResult::Unmodified; // The parent must not be damaged by unrolling! diff --git a/test/Transforms/LoopUnroll/peel-loop.ll b/test/Transforms/LoopUnroll/peel-loop.ll index d535414b3eb..eb3d29cb494 100644 --- a/test/Transforms/LoopUnroll/peel-loop.ll +++ b/test/Transforms/LoopUnroll/peel-loop.ll @@ -1,4 +1,6 @@ ; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=3 -verify-dom-info -simplifycfg -instcombine | FileCheck %s +; RUN: opt < %s -S -passes='require,unroll,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s +; RUN: opt < %s -S -passes='require,unroll,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s ; Basic loop peeling - check that we can peel-off the first 3 loop iterations ; when explicitly requested. diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll index 34eaa4ec333..19072855d25 100644 --- a/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/test/Transforms/LoopUnroll/runtime-loop.ll @@ -1,8 +1,16 @@ ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON - +; ; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON ; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON +; +; Restricted versions of unroll (unroll, unroll-full) should not be doing runtime unrolling +; even if it is globally enabled through -unroll-runtime option +; +; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON +; RUN: opt < %s -S -passes='require,unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON +; RUN: opt < %s -S -passes='require,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON +; RUN: opt < %s -S -passes='require,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -14,22 +22,32 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0 ; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit +; NOEPILOG-NOT: %xtraiter = and i32 %n + ; PROLOG: %xtraiter = and i32 %n ; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0 ; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit +; NOPROLOG-NOT: %xtraiter = and i32 %n + ; EPILOG: for.body.epil: ; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ] ; EPILOG: %epil.iter.sub = sub i32 %epil.iter, 1 ; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0 ; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0 +; NOEPILOG: for.body: +; NOEPILOG-NOT: for.body.epil: + ; PROLOG: for.body.prol: ; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ] ; PROLOG: %prol.iter.sub = sub i32 %prol.iter, 1 ; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0 ; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0 +; NOPROLOG: for.body: +; NOPROLOG-NOT: for.body.prol: + define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { entry: @@ -86,6 +104,8 @@ for.end: ; preds = %for.body ; COMMON-LABEL: @foo( ; EPILOG: bb72.2: ; PROLOG: bb72.2: +; NOEPILOG-NOT: bb72.2: +; NOPROLOG-NOT: bb72.2: define void @foo(i32 %trips) { entry: @@ -111,9 +131,15 @@ cond_true138: ; EPILOG: for.body.epil: ; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa +; NOEPILOG: for.body: +; NOEPILOG-NOT: for.body.epil: + ; PROLOG: for.body.prol: ; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit +; NOPROLOG: for.body: +; NOPROLOG-NOT: for.body.prol: + define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly { entry: %cmp2 = icmp eq i32 %len, 0 @@ -146,9 +172,15 @@ for.end: ; preds = %for.cond.for.end_cr ; EPILOG: for.body: ; EPILOG-NOT: for.body.epil: +; NOEPILOG: for.body: +; NOEPILOG-NOT: for.body.epil: + ; PROLOG: for.body: ; PROLOG-NOT: for.body.prol: +; NOPROLOG: for.body: +; NOPROLOG-NOT: for.body.prol: + define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly { entry: %cmp2 = icmp eq i32 %len, 0