From 76fb79614fe3cf353e4a517495601a91a9a63412 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Tue, 16 Feb 2021 10:26:45 +0530 Subject: [PATCH] [NewPM] Introduce (GPU)DivergenceAnalysis in the new pass manager The GPUDivergenceAnalysis is now renamed to just "DivergenceAnalysis" since there is no conflict with LegacyDivergenceAnalysis. In the legacy PM, this analysis can only be used through the legacy DA serving as a wrapper. It is now made available as a pass in the new PM, and has no relation with the legacy DA. The new DA currently cannot handle irreducible control flow; its presence can cause the analysis to run indefinitely. The analysis is now modified to detect this and report all instructions in the function as divergent. This is super conservative, but allows the analysis to be used without hanging the compiler. Reviewed By: aeubanks Differential Revision: https://reviews.llvm.org/D96615 --- include/llvm/Analysis/DivergenceAnalysis.h | 72 +++++++--- .../llvm/Analysis/LegacyDivergenceAnalysis.h | 4 +- lib/Analysis/DivergenceAnalysis.cpp | 129 ++++++++++-------- lib/Analysis/LegacyDivergenceAnalysis.cpp | 3 +- lib/Passes/PassBuilder.cpp | 1 + lib/Passes/PassRegistry.def | 2 + .../AMDGPU/always_uniform.ll | 5 +- .../DivergenceAnalysis/AMDGPU/atomics.ll | 3 +- .../AMDGPU/b42473-r1-crash.ll | 5 +- .../AMDGPU/control-flow-intrinsics.ll | 15 +- .../AMDGPU/hidden_diverge.ll | 6 +- .../AMDGPU/hidden_loopdiverge.ll | 13 +- .../DivergenceAnalysis/AMDGPU/inline-asm.ll | 28 ++-- .../DivergenceAnalysis/AMDGPU/interp_f16.ll | 3 +- .../DivergenceAnalysis/AMDGPU/intrinsics.ll | 3 +- .../DivergenceAnalysis/AMDGPU/irreducible.ll | 14 +- .../AMDGPU/join-at-loop-exit.ll | 3 +- .../DivergenceAnalysis/AMDGPU/kernel-args.ll | 9 +- .../AMDGPU/llvm.amdgcn.buffer.atomic.ll | 3 +- .../AMDGPU/llvm.amdgcn.image.atomic.ll | 3 +- .../AMDGPU/no-return-blocks.ll | 3 +- .../DivergenceAnalysis/AMDGPU/phi-undef.ll | 3 +- .../AMDGPU/propagate-loop-live-out.ll | 3 +- .../AMDGPU/temporal_diverge.ll | 13 +- .../AMDGPU/trivial-join-at-loop-exit.ll | 3 +- .../AMDGPU/unreachable-loop-block.ll | 3 +- .../AMDGPU/workitem-intrinsics.ll | 3 +- .../DivergenceAnalysis/NVPTX/daorder.ll | 5 +- .../DivergenceAnalysis/NVPTX/diverge.ll | 15 +- .../NVPTX/hidden_diverge.ll | 5 +- .../DivergenceAnalysis/NVPTX/irreducible.ll | 12 +- .../AMDGPU/atomics.ll | 2 +- .../AMDGPU/intrinsics.ll | 2 +- .../AMDGPU/kernel-args.ll | 2 +- .../AMDGPU/llvm.amdgcn.buffer.atomic.ll | 2 +- .../AMDGPU/llvm.amdgcn.image.atomic.ll | 2 +- .../LegacyDivergenceAnalysis/AMDGPU/loads.ll | 2 +- .../AMDGPU/no-return-blocks.ll | 2 +- .../AMDGPU/phi-undef.ll | 2 +- .../AMDGPU/unreachable-loop-block.ll | 2 +- .../AMDGPU/workitem-intrinsics.ll | 2 +- .../LegacyDivergenceAnalysis/NVPTX/diverge.ll | 2 +- tools/opt/opt.cpp | 2 +- unittests/Analysis/DivergenceAnalysisTest.cpp | 16 +-- 44 files changed, 263 insertions(+), 169 deletions(-) diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h index a6530b2eb49..0b36ef35aa5 100644 --- a/include/llvm/Analysis/DivergenceAnalysis.h +++ b/include/llvm/Analysis/DivergenceAnalysis.h @@ -34,7 +34,7 @@ class TargetTransformInfo; /// This analysis propagates divergence in a data-parallel context from sources /// of divergence to all users. It requires reducible CFGs. All assignments /// should be in SSA form. -class DivergenceAnalysis { +class DivergenceAnalysisImpl { public: /// \brief This instance will analyze the whole function \p F or the loop \p /// RegionLoop. @@ -43,9 +43,9 @@ public: /// Otherwise the whole function is analyzed. /// \param IsLCSSAForm whether the analysis may assume that the IR in the /// region in in LCSSA form. - DivergenceAnalysis(const Function &F, const Loop *RegionLoop, - const DominatorTree &DT, const LoopInfo &LI, - SyncDependenceAnalysis &SDA, bool IsLCSSAForm); + DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop, + const DominatorTree &DT, const LoopInfo &LI, + SyncDependenceAnalysis &SDA, bool IsLCSSAForm); /// \brief The loop that defines the analyzed region (if any). const Loop *getRegionLoop() const { return RegionLoop; } @@ -82,8 +82,6 @@ public: /// divergent. bool isDivergentUse(const Use &U) const; - void print(raw_ostream &OS, const Module *) const; - private: /// \brief Mark \p Term as divergent and push all Instructions that become /// divergent as a result on the worklist. @@ -152,28 +150,39 @@ private: std::vector Worklist; }; -/// \brief Divergence analysis frontend for GPU kernels. -class GPUDivergenceAnalysis { - SyncDependenceAnalysis SDA; - DivergenceAnalysis DA; +class DivergenceInfo { + Function &F; + + // If the function contains an irreducible region the divergence + // analysis can run indefinitely. We set ContainsIrreducible and no + // analysis is actually performed on the function. All values in + // this function are conservatively reported as divergent instead. + bool ContainsIrreducible; + std::unique_ptr SDA; + std::unique_ptr DA; public: - /// Runs the divergence analysis on @F, a GPU kernel - GPUDivergenceAnalysis(Function &F, const DominatorTree &DT, - const PostDominatorTree &PDT, const LoopInfo &LI, - const TargetTransformInfo &TTI); + DivergenceInfo(Function &F, const DominatorTree &DT, + const PostDominatorTree &PDT, const LoopInfo &LI, + const TargetTransformInfo &TTI, bool KnownReducible); /// Whether any divergence was detected. - bool hasDivergence() const { return DA.hasDetectedDivergence(); } + bool hasDivergence() const { + return ContainsIrreducible || DA->hasDetectedDivergence(); + } /// The GPU kernel this analysis result is for - const Function &getFunction() const { return DA.getFunction(); } + const Function &getFunction() const { return F; } /// Whether \p V is divergent at its definition. - bool isDivergent(const Value &V) const; + bool isDivergent(const Value &V) const { + return ContainsIrreducible || DA->isDivergent(V); + } /// Whether \p U is divergent. Uses of a uniform value can be divergent. - bool isDivergentUse(const Use &U) const; + bool isDivergentUse(const Use &U) const { + return ContainsIrreducible || DA->isDivergentUse(U); + } /// Whether \p V is uniform/non-divergent. bool isUniform(const Value &V) const { return !isDivergent(V); } @@ -181,11 +190,32 @@ public: /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be /// divergent. bool isUniformUse(const Use &U) const { return !isDivergentUse(U); } - - /// Print all divergent values in the kernel. - void print(raw_ostream &OS, const Module *) const; }; +/// \brief Divergence analysis frontend for GPU kernels. +class DivergenceAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + + static AnalysisKey Key; + +public: + using Result = DivergenceInfo; + + /// Runs the divergence analysis on @F, a GPU kernel + Result run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass to dump divergence analysis results. +struct DivergenceAnalysisPrinterPass + : public PassInfoMixin { + DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + +private: + raw_ostream &OS; +}; // class DivergenceAnalysisPrinterPass + } // namespace llvm #endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H diff --git a/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/include/llvm/Analysis/LegacyDivergenceAnalysis.h index 6215af3d323..0132c88077d 100644 --- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -20,8 +20,8 @@ #include namespace llvm { +class DivergenceInfo; class Function; -class GPUDivergenceAnalysis; class Module; class raw_ostream; class TargetTransformInfo; @@ -63,7 +63,7 @@ private: const TargetTransformInfo &TTI) const; // (optional) handle to new DivergenceAnalysis - std::unique_ptr gpuDA; + std::unique_ptr gpuDA; // Stores all divergent values. DenseSet DivergentValues; diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp index 287c1327801..81120b3fe92 100644 --- a/lib/Analysis/DivergenceAnalysis.cpp +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -31,10 +31,10 @@ // Ralf Karrenberg and Sebastian Hack // CC '12 // -// This DivergenceAnalysis implementation is generic in the sense that it does +// This implementation is generic in the sense that it does // not itself identify original sources of divergence. // Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and -// (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence +// (DivergenceAnalysis) for functions, identify the sources of divergence // (e.g., special variables that hold the thread ID or the iteration variable). // // The generic implementation propagates divergence to variables that are data @@ -61,7 +61,7 @@ // The sync dependence detection (which branch induces divergence in which join // points) is implemented in the SyncDependenceAnalysis. // -// The current DivergenceAnalysis implementation has the following limitations: +// The current implementation has the following limitations: // 1. intra-procedural. It conservatively considers the arguments of a // non-kernel-entry function and the return value of a function call as // divergent. @@ -73,6 +73,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" @@ -87,16 +88,15 @@ using namespace llvm; -#define DEBUG_TYPE "divergence-analysis" +#define DEBUG_TYPE "divergence" -// class DivergenceAnalysis -DivergenceAnalysis::DivergenceAnalysis( +DivergenceAnalysisImpl::DivergenceAnalysisImpl( const Function &F, const Loop *RegionLoop, const DominatorTree &DT, const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm) : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA), IsLCSSAForm(IsLCSSAForm) {} -bool DivergenceAnalysis::markDivergent(const Value &DivVal) { +bool DivergenceAnalysisImpl::markDivergent(const Value &DivVal) { if (isAlwaysUniform(DivVal)) return false; assert(isa(DivVal) || isa(DivVal)); @@ -104,12 +104,12 @@ bool DivergenceAnalysis::markDivergent(const Value &DivVal) { return DivergentValues.insert(&DivVal).second; } -void DivergenceAnalysis::addUniformOverride(const Value &UniVal) { +void DivergenceAnalysisImpl::addUniformOverride(const Value &UniVal) { UniformOverrides.insert(&UniVal); } -bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock, - const Value &Val) const { +bool DivergenceAnalysisImpl::isTemporalDivergent( + const BasicBlock &ObservingBlock, const Value &Val) const { const auto *Inst = dyn_cast(&Val); if (!Inst) return false; @@ -125,15 +125,15 @@ bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock, return false; } -bool DivergenceAnalysis::inRegion(const Instruction &I) const { +bool DivergenceAnalysisImpl::inRegion(const Instruction &I) const { return I.getParent() && inRegion(*I.getParent()); } -bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const { +bool DivergenceAnalysisImpl::inRegion(const BasicBlock &BB) const { return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB); } -void DivergenceAnalysis::pushUsers(const Value &V) { +void DivergenceAnalysisImpl::pushUsers(const Value &V) { const auto *I = dyn_cast(&V); if (I && I->isTerminator()) { @@ -166,8 +166,8 @@ static const Instruction *getIfCarriedInstruction(const Use &U, return I; } -void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I, - const Loop &OuterDivLoop) { +void DivergenceAnalysisImpl::analyzeTemporalDivergence( + const Instruction &I, const Loop &OuterDivLoop) { if (isAlwaysUniform(I)) return; if (isDivergent(I)) @@ -188,8 +188,8 @@ void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I, // marks all users of loop-carried values of the loop headed by LoopHeader as // divergent -void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit, - const Loop &OuterDivLoop) { +void DivergenceAnalysisImpl::analyzeLoopExitDivergence( + const BasicBlock &DivExit, const Loop &OuterDivLoop) { // All users are in immediate exit blocks if (IsLCSSAForm) { for (const auto &Phi : DivExit.phis()) { @@ -242,8 +242,8 @@ void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit, } while (!TaintStack.empty()); } -void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit, - const Loop &InnerDivLoop) { +void DivergenceAnalysisImpl::propagateLoopExitDivergence( + const BasicBlock &DivExit, const Loop &InnerDivLoop) { LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n"); // Find outer-most loop that does not contain \p DivExit @@ -265,7 +265,7 @@ void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit, // this is a divergent join point - mark all phi nodes as divergent and push // them onto the stack. -void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) { +void DivergenceAnalysisImpl::taintAndPushPhiNodes(const BasicBlock &JoinBlock) { LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName() << "\n"); @@ -287,7 +287,7 @@ void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) { } } -void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) { +void DivergenceAnalysisImpl::analyzeControlDivergence(const Instruction &Term) { LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName() << "\n"); @@ -310,7 +310,7 @@ void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) { } } -void DivergenceAnalysis::compute() { +void DivergenceAnalysisImpl::compute() { // Initialize worklist. auto DivValuesCopy = DivergentValues; for (const auto *DivVal : DivValuesCopy) { @@ -330,63 +330,82 @@ void DivergenceAnalysis::compute() { } } -bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const { +bool DivergenceAnalysisImpl::isAlwaysUniform(const Value &V) const { return UniformOverrides.contains(&V); } -bool DivergenceAnalysis::isDivergent(const Value &V) const { +bool DivergenceAnalysisImpl::isDivergent(const Value &V) const { return DivergentValues.contains(&V); } -bool DivergenceAnalysis::isDivergentUse(const Use &U) const { +bool DivergenceAnalysisImpl::isDivergentUse(const Use &U) const { Value &V = *U.get(); Instruction &I = *cast(U.getUser()); return isDivergent(V) || isTemporalDivergent(*I.getParent(), V); } -void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { - if (DivergentValues.empty()) - return; - // iterate instructions using instructions() to ensure a deterministic order. - for (auto &I : instructions(F)) { - if (isDivergent(I)) - OS << "DIVERGENT:" << I << '\n'; +DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT, + const PostDominatorTree &PDT, const LoopInfo &LI, + const TargetTransformInfo &TTI, + bool KnownReducible) + : F(F), ContainsIrreducible(false) { + if (!KnownReducible) { + using RPOTraversal = ReversePostOrderTraversal; + RPOTraversal FuncRPOT(&F); + if (containsIrreducibleCFG(FuncRPOT, LI)) { + ContainsIrreducible = true; + return; + } } -} - -// class GPUDivergenceAnalysis -GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F, - const DominatorTree &DT, - const PostDominatorTree &PDT, - const LoopInfo &LI, - const TargetTransformInfo &TTI) - : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, /* LCSSA */ false) { + SDA = std::make_unique(DT, PDT, LI); + DA = std::make_unique(F, nullptr, DT, LI, *SDA, + /* LCSSA */ false); for (auto &I : instructions(F)) { if (TTI.isSourceOfDivergence(&I)) { - DA.markDivergent(I); + DA->markDivergent(I); } else if (TTI.isAlwaysUniform(&I)) { - DA.addUniformOverride(I); + DA->addUniformOverride(I); } } for (auto &Arg : F.args()) { if (TTI.isSourceOfDivergence(&Arg)) { - DA.markDivergent(Arg); + DA->markDivergent(Arg); } } - DA.compute(); + DA->compute(); } -bool GPUDivergenceAnalysis::isDivergent(const Value &val) const { - return DA.isDivergent(val); +AnalysisKey DivergenceAnalysis::Key; + +DivergenceAnalysis::Result +DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { + auto &DT = AM.getResult(F); + auto &PDT = AM.getResult(F); + auto &LI = AM.getResult(F); + auto &TTI = AM.getResult(F); + + return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false); } -bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const { - return DA.isDivergentUse(use); -} - -void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const { - OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n"; - DA.print(OS, mod); - OS << "}\n"; +PreservedAnalyses +DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { + auto &DI = FAM.getResult(F); + OS << "'Divergence Analysis' for function '" << F.getName() << "':\n"; + if (DI.hasDivergence()) { + for (auto &Arg : F.args()) { + OS << (DI.isDivergent(Arg) ? "DIVERGENT: " : " "); + OS << Arg << "\n"; + } + for (auto BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + auto &BB = *BI; + OS << "\n " << BB.getName() << ":\n"; + for (auto &I : BB.instructionsWithoutDebug()) { + OS << (DI.isDivergent(I) ? "DIVERGENT: " : " "); + OS << I << "\n"; + } + } + } + return PreservedAnalyses::all(); } diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp index be8a18a21f2..031bf3bae51 100644 --- a/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -339,7 +339,8 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { if (shouldUseGPUDivergenceAnalysis(F, TTI)) { // run the new GPU divergence analysis auto &LI = getAnalysis().getLoopInfo(); - gpuDA = std::make_unique(F, DT, PDT, LI, TTI); + gpuDA = std::make_unique(F, DT, PDT, LI, TTI, + /* KnownReducible = */ true); } else { // run LLVM's existing DivergenceAnalysis diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 92cabeaafa3..f0e9f475b9d 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/Delinearization.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index 877cb9ed13b..5d8a1a07634 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -172,6 +172,7 @@ FUNCTION_ANALYSIS("targetir", TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) FUNCTION_ANALYSIS("verify", VerifierAnalysis()) FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) +FUNCTION_ANALYSIS("divergence", DivergenceAnalysis()) #ifndef FUNCTION_ALIAS_ANALYSIS #define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ @@ -273,6 +274,7 @@ FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) FUNCTION_PASS("print", BlockFrequencyPrinterPass(dbgs())) FUNCTION_PASS("print", BranchProbabilityPrinterPass(dbgs())) FUNCTION_PASS("print", DependenceAnalysisPrinterPass(dbgs())) +FUNCTION_PASS("print", DivergenceAnalysisPrinterPass(dbgs())) FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", PostDominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", DelinearizationPrinterPass(dbgs())) diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll index 3d948553409..13c7ba78f91 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK-LABEL: for function 'readfirstlane': define amdgpu_kernel void @readfirstlane() { @@ -39,7 +40,7 @@ define i32 @asm_sgpr(i32 %divergent) { ret i32 %sgpr } -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'asm_mixed_sgpr_vgpr': +; CHECK-LABEL: Divergence Analysis' for function 'asm_mixed_sgpr_vgpr': ; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1, $2", "=s,=v,v"(i32 %divergent) ; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0 ; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1 diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll index 521d528d795..97ead49a81a 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst define i32 @test1(i32* %ptr, i32 %val) #0 { diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll index cb3e42de363..8fc86e95bb0 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll @@ -1,10 +1,11 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s declare i32 @gf2(i32) declare i32 @gf1(i32) define void @tw1(i32 addrspace(4)* noalias nocapture readonly %A, i32 addrspace(4)* noalias nocapture %B) local_unnamed_addr #2 { -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'tw1': +; CHECK: Divergence Analysis' for function 'tw1': ; CHECK: DIVERGENT: i32 addrspace(4)* %A ; CHECK: DIVERGENT: i32 addrspace(4)* %B entry: diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll index 9446a7e8e9f..88503d70ca4 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll @@ -1,8 +1,9 @@ -; RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ; Tests control flow intrinsics that should be treated as uniform -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_break': +; CHECK: Divergence Analysis' for function 'test_if_break': ; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0 ; CHECK-NOT: DIVERGENT ; CHECK: ret void @@ -14,7 +15,7 @@ entry: ret void } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if': +; CHECK: Divergence Analysis' for function 'test_if': ; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0 ; CHECK-NEXT: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) ; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 @@ -33,7 +34,7 @@ entry: } ; The result should still be treated as divergent, even with a uniform source. -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_uniform': +; CHECK: Divergence Analysis' for function 'test_if_uniform': ; CHECK-NOT: DIVERGENT ; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) ; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 @@ -51,7 +52,7 @@ entry: ret void } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_loop_uniform': +; CHECK: Divergence Analysis' for function 'test_loop_uniform': ; CHECK: DIVERGENT: %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask) define amdgpu_ps void @test_loop_uniform(i64 inreg %mask) { entry: @@ -61,7 +62,7 @@ entry: ret void } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else': +; CHECK: Divergence Analysis' for function 'test_else': ; CHECK: DIVERGENT: %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) ; CHECK: DIVERGENT: %else.bool = extractvalue { i1, i64 } %else, 0 ; CHECK: {{^[ \t]+}}%else.mask = extractvalue { i1, i64 } %else, 1 @@ -77,7 +78,7 @@ entry: } ; This case is probably always broken -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else_divergent_mask': +; CHECK: Divergence Analysis' for function 'test_else_divergent_mask': ; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) ; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 ; CHECK-NOT: DIVERGENT diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll index 889553d3471..767e2c1bd66 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll @@ -1,7 +1,7 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s define amdgpu_kernel void @hidden_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge' +; CHECK-LABEL: 'Divergence Analysis' for function 'hidden_diverge' entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %cond.var = icmp slt i32 %tid, 0 @@ -22,7 +22,7 @@ merge: } define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd' +; CHECK-LABEL: 'Divergence Analysis' for function 'hidden_loop_ipd' entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %cond.var = icmp slt i32 %tid, 0 diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll index 774e995c7ca..ee963dde686 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll @@ -1,9 +1,10 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; divergent loop (H
, B) ; the divergent join point in %exit is obscured by uniform control joining in %X define amdgpu_kernel void @hidden_loop_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'hidden_loop_diverge' ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -45,7 +46,7 @@ exit: ; divergent loop (H
, B) ; the phi nodes in X and Y don't actually receive divergent values define amdgpu_kernel void @unobserved_loop_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unobserved_loop_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'unobserved_loop_diverge': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -86,7 +87,7 @@ exit: ; the inner loop has no exit to top level. ; the outer loop becomes divergent as its exiting branch in C is control-dependent on the inner loop's divergent loop exit in D. define amdgpu_kernel void @hidden_nestedloop_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_nestedloop_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'hidden_nestedloop_diverge': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -137,7 +138,7 @@ exit: ; the outer loop has no immediately divergent exiting edge. ; the inner exiting edge is exiting to top-level through the outer loop causing both to become divergent. define amdgpu_kernel void @hidden_doublebreak_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_doublebreak_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'hidden_doublebreak_diverge': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -179,7 +180,7 @@ Y: ; divergent loop (G
, L) contained inside a uniform loop (H
, B, G, L , D) define amdgpu_kernel void @hidden_containedloop_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_containedloop_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'hidden_containedloop_diverge': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll index 8443b82f388..b9af7fcd9ef 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll @@ -1,50 +1,52 @@ -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=tahiti -passes='print' -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx908 -passes='print' -disable-output %s 2>&1 | FileCheck %s ; Make sure nothing crashes on targets with or without AGPRs -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output': ; CHECK-NOT: DIVERGENT define i32 @inline_asm_1_sgpr_virtreg_output() { %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"() ret i32 %sgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output': ; CHECK-NOT: DIVERGENT define i32 @inline_asm_1_sgpr_physreg_output() { %sgpr = call i32 asm "s_mov_b32 s0, 0", "={s0}"() ret i32 %sgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output': ; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"() define i32 @inline_asm_1_vgpr_virtreg_output() { %vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"() ret i32 %vgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output': ; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"() define i32 @inline_asm_1_vgpr_physreg_output() { %vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"() ret i32 %vgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output': ; CHECK: DIVERGENT: %vgpr = call i32 asm "; def $0", "=a"() define i32 @inline_asm_1_agpr_virtreg_output() { %vgpr = call i32 asm "; def $0", "=a"() ret i32 %vgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output': ; CHECK: DIVERGENT: %vgpr = call i32 asm "; def a0", "={a0}"() define i32 @inline_asm_1_agpr_physreg_output() { %vgpr = call i32 asm "; def a0", "={a0}"() ret i32 %vgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output': ; CHECK-NOT: DIVERGENT define void @inline_asm_2_sgpr_virtreg_output() { %asm = call { i32, i32 } asm "; def $0, $1", "=s,=s"() @@ -56,7 +58,7 @@ define void @inline_asm_2_sgpr_virtreg_output() { } ; One output is SGPR, one is VGPR. Infer divergent for the aggregate, but uniform on the SGPR extract -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output': ; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=s,=v"() ; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0 ; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1 @@ -69,7 +71,7 @@ define void @inline_asm_sgpr_vgpr_virtreg_output() { ret void } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output': ; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s"() ; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0 ; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1 @@ -83,7 +85,7 @@ define void @inline_asm_vgpr_sgpr_virtreg_output() { } ; Have an extra output constraint -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint': +; CHECK: Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint': ; CHECK-NOT: DIVERGENT define void @multi_sgpr_inline_asm_output_input_constraint() { %asm = call { i32, i32 } asm "; def $0, $1", "=s,=s,s"(i32 1234) @@ -94,7 +96,7 @@ define void @multi_sgpr_inline_asm_output_input_constraint() { ret void } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint': +; CHECK: Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint': ; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s,v"(i32 1234) ; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0 ; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1 diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll index 174dd567978..da5ba6774ec 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: for function 'interp_p1_f16' ; CHECK: DIVERGENT: %p1 = call float @llvm.amdgcn.interp.p1.f16 diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll index e9c753f027a..88b178cb9c0 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 { diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll index 9a94328be67..abe85e4e045 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll @@ -1,4 +1,14 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -passes='print' -disable-output 2>&1 | FileCheck %s + +; NOTE: The new pass manager does not fall back on legacy divergence +; analysis even when the function contains an irreducible loop. The +; (new) divergence analysis conservatively reports all values as +; divergent. This test does not check for this conservative +; behaviour. Instead, it only checks for the values that are known to +; be divergent according to the legacy analysis. + +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; This test contains an unstructured loop. ; +-------------- entry ----------------+ @@ -14,7 +24,7 @@ ; if (i3 == 5) // divergent ; because sync dependent on (tid / i3). define i32 @unstructured_loop(i1 %entry_cond) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop' +; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop' entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2 diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll index 2b9bce7657a..1ddb869f985 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: bb3: ; CHECK: DIVERGENT: %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ] diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll index bc9ed6fb879..21379e8fbf7 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll @@ -1,6 +1,7 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps': +; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_ps': ; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0 ; CHECK-NOT: DIVERGENT ; CHECK: DIVERGENT: <2 x i32> %arg3 @@ -12,7 +13,7 @@ define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(4)* byref([4 x < ret void } -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_kernel': +; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_kernel': ; CHECK-NOT: %arg0 ; CHECK-NOT: %arg1 ; CHECK-NOT: %arg2 @@ -24,7 +25,7 @@ define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(4)* byre ret void } -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_c': +; CHECK-LABEL: Divergence Analysis' for function 'test_c': ; CHECK: DIVERGENT: ; CHECK: DIVERGENT: ; CHECK: DIVERGENT: diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll index 5bc5fe34cda..cdcc401e7a0 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -1,4 +1,5 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +;RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32( define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll index 97ef984dc81..616bebd5cc9 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,4 +1,5 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +;RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32( define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll index fb7c041e2d1..dabded9955b 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll @@ -1,4 +1,5 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2 ; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4 diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll index 978bc4232b1..69cfd3d971d 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK-LABEL: 'test1': ; CHECK-NEXT: DIVERGENT: i32 %bound diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll index 9ed3b0df0d5..252b6ff7335 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: bb6: ; CHECK: DIVERGENT: %.126.i355.i = phi i1 [ false, %bb5 ], [ true, %bb4 ] diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll index 4211ca28ad6..1895b0d84b2 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll @@ -1,8 +1,9 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; temporal-divergent use of value carried by divergent loop define amdgpu_kernel void @temporal_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -26,7 +27,7 @@ X: ; temporal-divergent use of value carried by divergent loop inside a top-level loop define amdgpu_kernel void @temporal_diverge_inloop(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_inloop': +; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_inloop': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -58,7 +59,7 @@ Y: ; temporal-uniform use of a valud, definition and users are carried by a surrounding divergent loop define amdgpu_kernel void @temporal_uniform_indivloop(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_uniform_indivloop': +; CHECK-LABEL: Divergence Analysis' for function 'temporal_uniform_indivloop': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -90,7 +91,7 @@ Y: ; temporal-divergent use of value carried by divergent loop, user is inside sibling loop define amdgpu_kernel void @temporal_diverge_loopuser(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser': +; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -120,7 +121,7 @@ Y: ; temporal-divergent use of value carried by divergent loop, user is inside sibling loop, defs and use are carried by a uniform loop define amdgpu_kernel void @temporal_diverge_loopuser_nested(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser_nested': +; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser_nested': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll index b872dd8966b..ee4167e90ae 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: bb2: ; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ] diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll index af3db4c8881..48294d714f2 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll @@ -1,4 +1,5 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %tmp = cmpxchg volatile define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 { diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll index b22c5f11abe..15f79a7ef61 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll b/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll index 89954b6f7c0..eb0938e76ae 100644 --- a/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll +++ b/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll @@ -1,10 +1,11 @@ -; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" define i32 @daorder(i32 %n) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'daorder' +; CHECK-LABEL: Divergence Analysis' for function 'daorder' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %cond = icmp slt i32 %tid, 0 diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll index e2e54728220..10bcd106c8c 100644 --- a/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll +++ b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll @@ -1,11 +1,12 @@ -; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" ; return (n < 0 ? a + threadIdx.x : b + threadIdx.x) define i32 @no_diverge(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'no_diverge' +; CHECK-LABEL: Divergence Analysis' for function 'no_diverge' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %cond = icmp slt i32 %n, 0 @@ -27,7 +28,7 @@ merge: ; c = b; ; return c; // c is divergent: sync dependent define i32 @sync(i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'sync' +; CHECK-LABEL: Divergence Analysis' for function 'sync' bb1: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() %cond = icmp slt i32 %tid, 5 @@ -48,7 +49,7 @@ bb3: ; // c here is divergent because it is sync dependent on threadIdx.x >= 5 ; return c; define i32 @mixed(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'mixed' +; CHECK-LABEL: Divergence Analysis' for function 'mixed' bb1: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() %cond = icmp slt i32 %tid, 5 @@ -73,7 +74,7 @@ bb6: ; We conservatively treats all parameters of a __device__ function as divergent. define i32 @device(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'device' +; CHECK-LABEL: Divergence Analysis' for function 'device' ; CHECK: DIVERGENT: i32 %n ; CHECK: DIVERGENT: i32 %a ; CHECK: DIVERGENT: i32 %b @@ -98,7 +99,7 @@ merge: ; ; The i defined in the loop is used outside. define i32 @loop() { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'loop' +; CHECK-LABEL: Divergence Analysis' for function 'loop' entry: %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid() br label %loop @@ -120,7 +121,7 @@ else: ; Same as @loop, but the loop is in the LCSSA form. define i32 @lcssa() { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'lcssa' +; CHECK-LABEL: Divergence Analysis' for function 'lcssa' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() br label %loop diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll b/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll index 3d61986657e..ea15a7c86f4 100644 --- a/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll +++ b/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll @@ -1,10 +1,11 @@ -; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" define i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge' +; CHECK-LABEL: Divergence Analysis' for function 'hidden_diverge' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %cond.var = icmp slt i32 %tid, 0 diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll b/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll index 2e1686a446d..1693d64fd1a 100644 --- a/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll +++ b/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll @@ -1,4 +1,12 @@ -; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s + +; NOTE: The new pass manager does not fall back on legacy divergence +; analysis even when the function contains an irreducible loop. The +; (new) divergence analysis conservatively reports all values as +; divergent. This test does not check for this conservative +; behaviour. Instead, it only checks for the values that are known to +; be divergent according to the legacy analysis. target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" @@ -17,7 +25,7 @@ target triple = "nvptx64-nvidia-cuda" ; if (i3 == 5) // divergent ; because sync dependent on (tid / i3). define i32 @unstructured_loop(i1 %entry_cond) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop' +; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2 diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll index 6c4b24e114b..965d9f22a24 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s ; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst define i32 @test1(i32* %ptr, i32 %val) #0 { diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll index 596e8143633..894de06c4bc 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s ; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 { diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll index f06c9e2a315..e2675f98015 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps': ; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0 diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll index 9f82cd96ffe..639a95575c4 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -1,4 +1,4 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32( define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll index 81489aaf74b..c8b9e1dacaf 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,4 +1,4 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32( define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll index 122c14f389f..903858bab37 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s ; Test that we consider loads from flat and private addrspaces to be divergent. diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll index 44eed135985..e9a640f97c3 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s ; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2 ; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4 diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll index 6fffc811116..5bc388cac0e 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s ; CHECK-LABEL: 'test1': ; CHECK-NEXT: DIVERGENT: i32 %bound diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll index 5ee1a56cc41..49657d253ba 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s ; CHECK: DIVERGENT: %tmp = cmpxchg volatile define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 { diff --git a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll index d10b4be49aa..0fd25c85ff7 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 diff --git a/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll b/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll index 4e7163d3a51..9ff837a11e8 100644 --- a/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll +++ b/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -analyze -divergence | FileCheck %s +; RUN: opt %s -enable-new-pm=0 -analyze -divergence | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index e7ba330bb61..bf2d59b5eed 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -496,7 +496,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) { "safe-stack", "cost-model", "codegenprepare", "interleaved-load-combine", "unreachableblockelim", "verify-safepoint-ir", - "divergence", "atomic-expand", + "atomic-expand", "hardware-loops", "type-promotion", "mve-tail-predication", "interleaved-access", "global-merge", "pre-isel-intrinsic-lowering", diff --git a/unittests/Analysis/DivergenceAnalysisTest.cpp b/unittests/Analysis/DivergenceAnalysisTest.cpp index 9416e592012..0737e7773fb 100644 --- a/unittests/Analysis/DivergenceAnalysisTest.cpp +++ b/unittests/Analysis/DivergenceAnalysisTest.cpp @@ -38,7 +38,7 @@ BasicBlock *GetBlockByName(StringRef BlockName, Function &F) { return nullptr; } -// We use this fixture to ensure that we clean up DivergenceAnalysis before +// We use this fixture to ensure that we clean up DivergenceAnalysisImpl before // deleting the PassManager. class DivergenceAnalysisTest : public testing::Test { protected: @@ -54,21 +54,21 @@ protected: DivergenceAnalysisTest() : M("", Context), TLII(), TLI(TLII) {} - DivergenceAnalysis buildDA(Function &F, bool IsLCSSA) { + DivergenceAnalysisImpl buildDA(Function &F, bool IsLCSSA) { DT.reset(new DominatorTree(F)); PDT.reset(new PostDominatorTree(F)); LI.reset(new LoopInfo(*DT)); SDA.reset(new SyncDependenceAnalysis(*DT, *PDT, *LI)); - return DivergenceAnalysis(F, nullptr, *DT, *LI, *SDA, IsLCSSA); + return DivergenceAnalysisImpl(F, nullptr, *DT, *LI, *SDA, IsLCSSA); } void runWithDA( Module &M, StringRef FuncName, bool IsLCSSA, - function_ref + function_ref Test) { auto *F = M.getFunction(FuncName); ASSERT_NE(F, nullptr) << "Could not find " << FuncName; - DivergenceAnalysis DA = buildDA(*F, IsLCSSA); + DivergenceAnalysisImpl DA = buildDA(*F, IsLCSSA); Test(*F, *LI, DA); } }; @@ -82,7 +82,7 @@ TEST_F(DivergenceAnalysisTest, DAInitialState) { BasicBlock *BB = BasicBlock::Create(Context, "entry", F); ReturnInst::Create(Context, nullptr, BB); - DivergenceAnalysis DA = buildDA(*F, false); + DivergenceAnalysisImpl DA = buildDA(*F, false); // Whole function region EXPECT_EQ(DA.getRegionLoop(), nullptr); @@ -135,7 +135,7 @@ TEST_F(DivergenceAnalysisTest, DANoLCSSA) { Err, C); Function *F = M->getFunction("f_1"); - DivergenceAnalysis DA = buildDA(*F, false); + DivergenceAnalysisImpl DA = buildDA(*F, false); EXPECT_FALSE(DA.hasDetectedDivergence()); auto ItArg = F->arg_begin(); @@ -189,7 +189,7 @@ TEST_F(DivergenceAnalysisTest, DALCSSA) { Err, C); Function *F = M->getFunction("f_lcssa"); - DivergenceAnalysis DA = buildDA(*F, true); + DivergenceAnalysisImpl DA = buildDA(*F, true); EXPECT_FALSE(DA.hasDetectedDivergence()); auto ItArg = F->arg_begin();