mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[NewPM] Introduce (GPU)DivergenceAnalysis in the new pass manager
The GPUDivergenceAnalysis is now renamed to just "DivergenceAnalysis" since there is no conflict with LegacyDivergenceAnalysis. In the legacy PM, this analysis can only be used through the legacy DA serving as a wrapper. It is now made available as a pass in the new PM, and has no relation with the legacy DA. The new DA currently cannot handle irreducible control flow; its presence can cause the analysis to run indefinitely. The analysis is now modified to detect this and report all instructions in the function as divergent. This is super conservative, but allows the analysis to be used without hanging the compiler. Reviewed By: aeubanks Differential Revision: https://reviews.llvm.org/D96615
This commit is contained in:
parent
4bfb393b4c
commit
76fb79614f
@ -34,7 +34,7 @@ class TargetTransformInfo;
|
||||
/// This analysis propagates divergence in a data-parallel context from sources
|
||||
/// of divergence to all users. It requires reducible CFGs. All assignments
|
||||
/// should be in SSA form.
|
||||
class DivergenceAnalysis {
|
||||
class DivergenceAnalysisImpl {
|
||||
public:
|
||||
/// \brief This instance will analyze the whole function \p F or the loop \p
|
||||
/// RegionLoop.
|
||||
@ -43,9 +43,9 @@ public:
|
||||
/// Otherwise the whole function is analyzed.
|
||||
/// \param IsLCSSAForm whether the analysis may assume that the IR in the
|
||||
/// region in in LCSSA form.
|
||||
DivergenceAnalysis(const Function &F, const Loop *RegionLoop,
|
||||
const DominatorTree &DT, const LoopInfo &LI,
|
||||
SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
|
||||
DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop,
|
||||
const DominatorTree &DT, const LoopInfo &LI,
|
||||
SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
|
||||
|
||||
/// \brief The loop that defines the analyzed region (if any).
|
||||
const Loop *getRegionLoop() const { return RegionLoop; }
|
||||
@ -82,8 +82,6 @@ public:
|
||||
/// divergent.
|
||||
bool isDivergentUse(const Use &U) const;
|
||||
|
||||
void print(raw_ostream &OS, const Module *) const;
|
||||
|
||||
private:
|
||||
/// \brief Mark \p Term as divergent and push all Instructions that become
|
||||
/// divergent as a result on the worklist.
|
||||
@ -152,28 +150,39 @@ private:
|
||||
std::vector<const Instruction *> Worklist;
|
||||
};
|
||||
|
||||
/// \brief Divergence analysis frontend for GPU kernels.
|
||||
class GPUDivergenceAnalysis {
|
||||
SyncDependenceAnalysis SDA;
|
||||
DivergenceAnalysis DA;
|
||||
class DivergenceInfo {
|
||||
Function &F;
|
||||
|
||||
// If the function contains an irreducible region the divergence
|
||||
// analysis can run indefinitely. We set ContainsIrreducible and no
|
||||
// analysis is actually performed on the function. All values in
|
||||
// this function are conservatively reported as divergent instead.
|
||||
bool ContainsIrreducible;
|
||||
std::unique_ptr<SyncDependenceAnalysis> SDA;
|
||||
std::unique_ptr<DivergenceAnalysisImpl> DA;
|
||||
|
||||
public:
|
||||
/// Runs the divergence analysis on @F, a GPU kernel
|
||||
GPUDivergenceAnalysis(Function &F, const DominatorTree &DT,
|
||||
const PostDominatorTree &PDT, const LoopInfo &LI,
|
||||
const TargetTransformInfo &TTI);
|
||||
DivergenceInfo(Function &F, const DominatorTree &DT,
|
||||
const PostDominatorTree &PDT, const LoopInfo &LI,
|
||||
const TargetTransformInfo &TTI, bool KnownReducible);
|
||||
|
||||
/// Whether any divergence was detected.
|
||||
bool hasDivergence() const { return DA.hasDetectedDivergence(); }
|
||||
bool hasDivergence() const {
|
||||
return ContainsIrreducible || DA->hasDetectedDivergence();
|
||||
}
|
||||
|
||||
/// The GPU kernel this analysis result is for
|
||||
const Function &getFunction() const { return DA.getFunction(); }
|
||||
const Function &getFunction() const { return F; }
|
||||
|
||||
/// Whether \p V is divergent at its definition.
|
||||
bool isDivergent(const Value &V) const;
|
||||
bool isDivergent(const Value &V) const {
|
||||
return ContainsIrreducible || DA->isDivergent(V);
|
||||
}
|
||||
|
||||
/// Whether \p U is divergent. Uses of a uniform value can be divergent.
|
||||
bool isDivergentUse(const Use &U) const;
|
||||
bool isDivergentUse(const Use &U) const {
|
||||
return ContainsIrreducible || DA->isDivergentUse(U);
|
||||
}
|
||||
|
||||
/// Whether \p V is uniform/non-divergent.
|
||||
bool isUniform(const Value &V) const { return !isDivergent(V); }
|
||||
@ -181,11 +190,32 @@ public:
|
||||
/// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
|
||||
/// divergent.
|
||||
bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
|
||||
|
||||
/// Print all divergent values in the kernel.
|
||||
void print(raw_ostream &OS, const Module *) const;
|
||||
};
|
||||
|
||||
/// \brief Divergence analysis frontend for GPU kernels.
|
||||
class DivergenceAnalysis : public AnalysisInfoMixin<DivergenceAnalysis> {
|
||||
friend AnalysisInfoMixin<DivergenceAnalysis>;
|
||||
|
||||
static AnalysisKey Key;
|
||||
|
||||
public:
|
||||
using Result = DivergenceInfo;
|
||||
|
||||
/// Runs the divergence analysis on @F, a GPU kernel
|
||||
Result run(Function &F, FunctionAnalysisManager &AM);
|
||||
};
|
||||
|
||||
/// Printer pass to dump divergence analysis results.
|
||||
struct DivergenceAnalysisPrinterPass
|
||||
: public PassInfoMixin<DivergenceAnalysisPrinterPass> {
|
||||
DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
|
||||
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
|
||||
|
||||
private:
|
||||
raw_ostream &OS;
|
||||
}; // class DivergenceAnalysisPrinterPass
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
|
||||
|
@ -20,8 +20,8 @@
|
||||
#include <memory>
|
||||
|
||||
namespace llvm {
|
||||
class DivergenceInfo;
|
||||
class Function;
|
||||
class GPUDivergenceAnalysis;
|
||||
class Module;
|
||||
class raw_ostream;
|
||||
class TargetTransformInfo;
|
||||
@ -63,7 +63,7 @@ private:
|
||||
const TargetTransformInfo &TTI) const;
|
||||
|
||||
// (optional) handle to new DivergenceAnalysis
|
||||
std::unique_ptr<GPUDivergenceAnalysis> gpuDA;
|
||||
std::unique_ptr<DivergenceInfo> gpuDA;
|
||||
|
||||
// Stores all divergent values.
|
||||
DenseSet<const Value *> DivergentValues;
|
||||
|
@ -31,10 +31,10 @@
|
||||
// Ralf Karrenberg and Sebastian Hack
|
||||
// CC '12
|
||||
//
|
||||
// This DivergenceAnalysis implementation is generic in the sense that it does
|
||||
// This implementation is generic in the sense that it does
|
||||
// not itself identify original sources of divergence.
|
||||
// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
|
||||
// (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
|
||||
// (DivergenceAnalysis) for functions, identify the sources of divergence
|
||||
// (e.g., special variables that hold the thread ID or the iteration variable).
|
||||
//
|
||||
// The generic implementation propagates divergence to variables that are data
|
||||
@ -61,7 +61,7 @@
|
||||
// The sync dependence detection (which branch induces divergence in which join
|
||||
// points) is implemented in the SyncDependenceAnalysis.
|
||||
//
|
||||
// The current DivergenceAnalysis implementation has the following limitations:
|
||||
// The current implementation has the following limitations:
|
||||
// 1. intra-procedural. It conservatively considers the arguments of a
|
||||
// non-kernel-entry function and the return value of a function call as
|
||||
// divergent.
|
||||
@ -73,6 +73,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/DivergenceAnalysis.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
#include "llvm/Analysis/PostDominators.h"
|
||||
@ -87,16 +88,15 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "divergence-analysis"
|
||||
#define DEBUG_TYPE "divergence"
|
||||
|
||||
// class DivergenceAnalysis
|
||||
DivergenceAnalysis::DivergenceAnalysis(
|
||||
DivergenceAnalysisImpl::DivergenceAnalysisImpl(
|
||||
const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
|
||||
const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
|
||||
: F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
|
||||
IsLCSSAForm(IsLCSSAForm) {}
|
||||
|
||||
bool DivergenceAnalysis::markDivergent(const Value &DivVal) {
|
||||
bool DivergenceAnalysisImpl::markDivergent(const Value &DivVal) {
|
||||
if (isAlwaysUniform(DivVal))
|
||||
return false;
|
||||
assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
|
||||
@ -104,12 +104,12 @@ bool DivergenceAnalysis::markDivergent(const Value &DivVal) {
|
||||
return DivergentValues.insert(&DivVal).second;
|
||||
}
|
||||
|
||||
void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
|
||||
void DivergenceAnalysisImpl::addUniformOverride(const Value &UniVal) {
|
||||
UniformOverrides.insert(&UniVal);
|
||||
}
|
||||
|
||||
bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
|
||||
const Value &Val) const {
|
||||
bool DivergenceAnalysisImpl::isTemporalDivergent(
|
||||
const BasicBlock &ObservingBlock, const Value &Val) const {
|
||||
const auto *Inst = dyn_cast<const Instruction>(&Val);
|
||||
if (!Inst)
|
||||
return false;
|
||||
@ -125,15 +125,15 @@ bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DivergenceAnalysis::inRegion(const Instruction &I) const {
|
||||
bool DivergenceAnalysisImpl::inRegion(const Instruction &I) const {
|
||||
return I.getParent() && inRegion(*I.getParent());
|
||||
}
|
||||
|
||||
bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
|
||||
bool DivergenceAnalysisImpl::inRegion(const BasicBlock &BB) const {
|
||||
return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB);
|
||||
}
|
||||
|
||||
void DivergenceAnalysis::pushUsers(const Value &V) {
|
||||
void DivergenceAnalysisImpl::pushUsers(const Value &V) {
|
||||
const auto *I = dyn_cast<const Instruction>(&V);
|
||||
|
||||
if (I && I->isTerminator()) {
|
||||
@ -166,8 +166,8 @@ static const Instruction *getIfCarriedInstruction(const Use &U,
|
||||
return I;
|
||||
}
|
||||
|
||||
void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I,
|
||||
const Loop &OuterDivLoop) {
|
||||
void DivergenceAnalysisImpl::analyzeTemporalDivergence(
|
||||
const Instruction &I, const Loop &OuterDivLoop) {
|
||||
if (isAlwaysUniform(I))
|
||||
return;
|
||||
if (isDivergent(I))
|
||||
@ -188,8 +188,8 @@ void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I,
|
||||
|
||||
// marks all users of loop-carried values of the loop headed by LoopHeader as
|
||||
// divergent
|
||||
void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit,
|
||||
const Loop &OuterDivLoop) {
|
||||
void DivergenceAnalysisImpl::analyzeLoopExitDivergence(
|
||||
const BasicBlock &DivExit, const Loop &OuterDivLoop) {
|
||||
// All users are in immediate exit blocks
|
||||
if (IsLCSSAForm) {
|
||||
for (const auto &Phi : DivExit.phis()) {
|
||||
@ -242,8 +242,8 @@ void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit,
|
||||
} while (!TaintStack.empty());
|
||||
}
|
||||
|
||||
void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit,
|
||||
const Loop &InnerDivLoop) {
|
||||
void DivergenceAnalysisImpl::propagateLoopExitDivergence(
|
||||
const BasicBlock &DivExit, const Loop &InnerDivLoop) {
|
||||
LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n");
|
||||
|
||||
// Find outer-most loop that does not contain \p DivExit
|
||||
@ -265,7 +265,7 @@ void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit,
|
||||
|
||||
// this is a divergent join point - mark all phi nodes as divergent and push
|
||||
// them onto the stack.
|
||||
void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
|
||||
void DivergenceAnalysisImpl::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
|
||||
LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName()
|
||||
<< "\n");
|
||||
|
||||
@ -287,7 +287,7 @@ void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
|
||||
}
|
||||
}
|
||||
|
||||
void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) {
|
||||
void DivergenceAnalysisImpl::analyzeControlDivergence(const Instruction &Term) {
|
||||
LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName()
|
||||
<< "\n");
|
||||
|
||||
@ -310,7 +310,7 @@ void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) {
|
||||
}
|
||||
}
|
||||
|
||||
void DivergenceAnalysis::compute() {
|
||||
void DivergenceAnalysisImpl::compute() {
|
||||
// Initialize worklist.
|
||||
auto DivValuesCopy = DivergentValues;
|
||||
for (const auto *DivVal : DivValuesCopy) {
|
||||
@ -330,63 +330,82 @@ void DivergenceAnalysis::compute() {
|
||||
}
|
||||
}
|
||||
|
||||
bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
|
||||
bool DivergenceAnalysisImpl::isAlwaysUniform(const Value &V) const {
|
||||
return UniformOverrides.contains(&V);
|
||||
}
|
||||
|
||||
bool DivergenceAnalysis::isDivergent(const Value &V) const {
|
||||
bool DivergenceAnalysisImpl::isDivergent(const Value &V) const {
|
||||
return DivergentValues.contains(&V);
|
||||
}
|
||||
|
||||
bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
|
||||
bool DivergenceAnalysisImpl::isDivergentUse(const Use &U) const {
|
||||
Value &V = *U.get();
|
||||
Instruction &I = *cast<Instruction>(U.getUser());
|
||||
return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
|
||||
}
|
||||
|
||||
void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
|
||||
if (DivergentValues.empty())
|
||||
return;
|
||||
// iterate instructions using instructions() to ensure a deterministic order.
|
||||
for (auto &I : instructions(F)) {
|
||||
if (isDivergent(I))
|
||||
OS << "DIVERGENT:" << I << '\n';
|
||||
DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT,
|
||||
const PostDominatorTree &PDT, const LoopInfo &LI,
|
||||
const TargetTransformInfo &TTI,
|
||||
bool KnownReducible)
|
||||
: F(F), ContainsIrreducible(false) {
|
||||
if (!KnownReducible) {
|
||||
using RPOTraversal = ReversePostOrderTraversal<const Function *>;
|
||||
RPOTraversal FuncRPOT(&F);
|
||||
if (containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
|
||||
const LoopInfo>(FuncRPOT, LI)) {
|
||||
ContainsIrreducible = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// class GPUDivergenceAnalysis
|
||||
GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
|
||||
const DominatorTree &DT,
|
||||
const PostDominatorTree &PDT,
|
||||
const LoopInfo &LI,
|
||||
const TargetTransformInfo &TTI)
|
||||
: SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, /* LCSSA */ false) {
|
||||
SDA = std::make_unique<SyncDependenceAnalysis>(DT, PDT, LI);
|
||||
DA = std::make_unique<DivergenceAnalysisImpl>(F, nullptr, DT, LI, *SDA,
|
||||
/* LCSSA */ false);
|
||||
for (auto &I : instructions(F)) {
|
||||
if (TTI.isSourceOfDivergence(&I)) {
|
||||
DA.markDivergent(I);
|
||||
DA->markDivergent(I);
|
||||
} else if (TTI.isAlwaysUniform(&I)) {
|
||||
DA.addUniformOverride(I);
|
||||
DA->addUniformOverride(I);
|
||||
}
|
||||
}
|
||||
for (auto &Arg : F.args()) {
|
||||
if (TTI.isSourceOfDivergence(&Arg)) {
|
||||
DA.markDivergent(Arg);
|
||||
DA->markDivergent(Arg);
|
||||
}
|
||||
}
|
||||
|
||||
DA.compute();
|
||||
DA->compute();
|
||||
}
|
||||
|
||||
bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
|
||||
return DA.isDivergent(val);
|
||||
AnalysisKey DivergenceAnalysis::Key;
|
||||
|
||||
DivergenceAnalysis::Result
|
||||
DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
|
||||
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
|
||||
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
|
||||
auto &LI = AM.getResult<LoopAnalysis>(F);
|
||||
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
|
||||
|
||||
return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false);
|
||||
}
|
||||
|
||||
bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const {
|
||||
return DA.isDivergentUse(use);
|
||||
}
|
||||
|
||||
void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
|
||||
OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
|
||||
DA.print(OS, mod);
|
||||
OS << "}\n";
|
||||
PreservedAnalyses
|
||||
DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
|
||||
auto &DI = FAM.getResult<DivergenceAnalysis>(F);
|
||||
OS << "'Divergence Analysis' for function '" << F.getName() << "':\n";
|
||||
if (DI.hasDivergence()) {
|
||||
for (auto &Arg : F.args()) {
|
||||
OS << (DI.isDivergent(Arg) ? "DIVERGENT: " : " ");
|
||||
OS << Arg << "\n";
|
||||
}
|
||||
for (auto BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
|
||||
auto &BB = *BI;
|
||||
OS << "\n " << BB.getName() << ":\n";
|
||||
for (auto &I : BB.instructionsWithoutDebug()) {
|
||||
OS << (DI.isDivergent(I) ? "DIVERGENT: " : " ");
|
||||
OS << I << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
return PreservedAnalyses::all();
|
||||
}
|
||||
|
@ -339,7 +339,8 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
|
||||
if (shouldUseGPUDivergenceAnalysis(F, TTI)) {
|
||||
// run the new GPU divergence analysis
|
||||
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
|
||||
gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
|
||||
/* KnownReducible = */ true);
|
||||
|
||||
} else {
|
||||
// run LLVM's existing DivergenceAnalysis
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "llvm/Analysis/Delinearization.h"
|
||||
#include "llvm/Analysis/DemandedBits.h"
|
||||
#include "llvm/Analysis/DependenceAnalysis.h"
|
||||
#include "llvm/Analysis/DivergenceAnalysis.h"
|
||||
#include "llvm/Analysis/DominanceFrontier.h"
|
||||
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
|
@ -172,6 +172,7 @@ FUNCTION_ANALYSIS("targetir",
|
||||
TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
|
||||
FUNCTION_ANALYSIS("verify", VerifierAnalysis())
|
||||
FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
|
||||
FUNCTION_ANALYSIS("divergence", DivergenceAnalysis())
|
||||
|
||||
#ifndef FUNCTION_ALIAS_ANALYSIS
|
||||
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
|
||||
@ -273,6 +274,7 @@ FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
|
||||
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
|
||||
FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
|
||||
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
|
||||
FUNCTION_PASS("print<divergence>", DivergenceAnalysisPrinterPass(dbgs()))
|
||||
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
|
||||
FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
|
||||
FUNCTION_PASS("print<delinearization>", DelinearizationPrinterPass(dbgs()))
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: for function 'readfirstlane':
|
||||
define amdgpu_kernel void @readfirstlane() {
|
||||
@ -39,7 +40,7 @@ define i32 @asm_sgpr(i32 %divergent) {
|
||||
ret i32 %sgpr
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'asm_mixed_sgpr_vgpr':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'asm_mixed_sgpr_vgpr':
|
||||
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1, $2", "=s,=v,v"(i32 %divergent)
|
||||
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0
|
||||
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
|
||||
define i32 @test1(i32* %ptr, i32 %val) #0 {
|
||||
|
@ -1,10 +1,11 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
declare i32 @gf2(i32)
|
||||
declare i32 @gf1(i32)
|
||||
|
||||
define void @tw1(i32 addrspace(4)* noalias nocapture readonly %A, i32 addrspace(4)* noalias nocapture %B) local_unnamed_addr #2 {
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'tw1':
|
||||
; CHECK: Divergence Analysis' for function 'tw1':
|
||||
; CHECK: DIVERGENT: i32 addrspace(4)* %A
|
||||
; CHECK: DIVERGENT: i32 addrspace(4)* %B
|
||||
entry:
|
||||
|
@ -1,8 +1,9 @@
|
||||
; RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; Tests control flow intrinsics that should be treated as uniform
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_break':
|
||||
; CHECK: Divergence Analysis' for function 'test_if_break':
|
||||
; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0
|
||||
; CHECK-NOT: DIVERGENT
|
||||
; CHECK: ret void
|
||||
@ -14,7 +15,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if':
|
||||
; CHECK: Divergence Analysis' for function 'test_if':
|
||||
; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0
|
||||
; CHECK-NEXT: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
|
||||
; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
|
||||
@ -33,7 +34,7 @@ entry:
|
||||
}
|
||||
|
||||
; The result should still be treated as divergent, even with a uniform source.
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_uniform':
|
||||
; CHECK: Divergence Analysis' for function 'test_if_uniform':
|
||||
; CHECK-NOT: DIVERGENT
|
||||
; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
|
||||
; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
|
||||
@ -51,7 +52,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_loop_uniform':
|
||||
; CHECK: Divergence Analysis' for function 'test_loop_uniform':
|
||||
; CHECK: DIVERGENT: %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask)
|
||||
define amdgpu_ps void @test_loop_uniform(i64 inreg %mask) {
|
||||
entry:
|
||||
@ -61,7 +62,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else':
|
||||
; CHECK: Divergence Analysis' for function 'test_else':
|
||||
; CHECK: DIVERGENT: %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
|
||||
; CHECK: DIVERGENT: %else.bool = extractvalue { i1, i64 } %else, 0
|
||||
; CHECK: {{^[ \t]+}}%else.mask = extractvalue { i1, i64 } %else, 1
|
||||
@ -77,7 +78,7 @@ entry:
|
||||
}
|
||||
|
||||
; This case is probably always broken
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else_divergent_mask':
|
||||
; CHECK: Divergence Analysis' for function 'test_else_divergent_mask':
|
||||
; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
|
||||
; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
|
||||
; CHECK-NOT: DIVERGENT
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
define amdgpu_kernel void @hidden_diverge(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge'
|
||||
; CHECK-LABEL: 'Divergence Analysis' for function 'hidden_diverge'
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%cond.var = icmp slt i32 %tid, 0
|
||||
@ -22,7 +22,7 @@ merge:
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd'
|
||||
; CHECK-LABEL: 'Divergence Analysis' for function 'hidden_loop_ipd'
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%cond.var = icmp slt i32 %tid, 0
|
||||
|
@ -1,9 +1,10 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; divergent loop (H<header><exiting to X>, B<exiting to Y>)
|
||||
; the divergent join point in %exit is obscured by uniform control joining in %X
|
||||
define amdgpu_kernel void @hidden_loop_diverge(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_diverge':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'hidden_loop_diverge'
|
||||
; CHECK-NOT: DIVERGENT: %uni.
|
||||
; CHECK-NOT: DIVERGENT: br i1 %uni.
|
||||
|
||||
@ -45,7 +46,7 @@ exit:
|
||||
; divergent loop (H<header><exiting to X>, B<exiting to Y>)
|
||||
; the phi nodes in X and Y don't actually receive divergent values
|
||||
define amdgpu_kernel void @unobserved_loop_diverge(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unobserved_loop_diverge':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'unobserved_loop_diverge':
|
||||
; CHECK-NOT: DIVERGENT: %uni.
|
||||
; CHECK-NOT: DIVERGENT: br i1 %uni.
|
||||
|
||||
@ -86,7 +87,7 @@ exit:
|
||||
; the inner loop has no exit to top level.
|
||||
; the outer loop becomes divergent as its exiting branch in C is control-dependent on the inner loop's divergent loop exit in D.
|
||||
define amdgpu_kernel void @hidden_nestedloop_diverge(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_nestedloop_diverge':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'hidden_nestedloop_diverge':
|
||||
; CHECK-NOT: DIVERGENT: %uni.
|
||||
; CHECK-NOT: DIVERGENT: br i1 %uni.
|
||||
|
||||
@ -137,7 +138,7 @@ exit:
|
||||
; the outer loop has no immediately divergent exiting edge.
|
||||
; the inner exiting edge is exiting to top-level through the outer loop causing both to become divergent.
|
||||
define amdgpu_kernel void @hidden_doublebreak_diverge(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_doublebreak_diverge':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'hidden_doublebreak_diverge':
|
||||
; CHECK-NOT: DIVERGENT: %uni.
|
||||
; CHECK-NOT: DIVERGENT: br i1 %uni.
|
||||
|
||||
@ -179,7 +180,7 @@ Y:
|
||||
|
||||
; divergent loop (G<header>, L<exiting to D>) contained inside a uniform loop (H<header>, B, G, L , D<exiting to x>)
|
||||
define amdgpu_kernel void @hidden_containedloop_diverge(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_containedloop_diverge':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'hidden_containedloop_diverge':
|
||||
; CHECK-NOT: DIVERGENT: %uni.
|
||||
; CHECK-NOT: DIVERGENT: br i1 %uni.
|
||||
|
||||
|
@ -1,50 +1,52 @@
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=tahiti -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx908 -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
; Make sure nothing crashes on targets with or without AGPRs
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output':
|
||||
; CHECK: Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output':
|
||||
; CHECK-NOT: DIVERGENT
|
||||
define i32 @inline_asm_1_sgpr_virtreg_output() {
|
||||
%sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
|
||||
ret i32 %sgpr
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output':
|
||||
; CHECK: Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output':
|
||||
; CHECK-NOT: DIVERGENT
|
||||
define i32 @inline_asm_1_sgpr_physreg_output() {
|
||||
%sgpr = call i32 asm "s_mov_b32 s0, 0", "={s0}"()
|
||||
ret i32 %sgpr
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output':
|
||||
; CHECK: Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output':
|
||||
; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"()
|
||||
define i32 @inline_asm_1_vgpr_virtreg_output() {
|
||||
%vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"()
|
||||
ret i32 %vgpr
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output':
|
||||
; CHECK: Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output':
|
||||
; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"()
|
||||
define i32 @inline_asm_1_vgpr_physreg_output() {
|
||||
%vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"()
|
||||
ret i32 %vgpr
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output':
|
||||
; CHECK: Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output':
|
||||
; CHECK: DIVERGENT: %vgpr = call i32 asm "; def $0", "=a"()
|
||||
define i32 @inline_asm_1_agpr_virtreg_output() {
|
||||
%vgpr = call i32 asm "; def $0", "=a"()
|
||||
ret i32 %vgpr
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output':
|
||||
; CHECK: Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output':
|
||||
; CHECK: DIVERGENT: %vgpr = call i32 asm "; def a0", "={a0}"()
|
||||
define i32 @inline_asm_1_agpr_physreg_output() {
|
||||
%vgpr = call i32 asm "; def a0", "={a0}"()
|
||||
ret i32 %vgpr
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output':
|
||||
; CHECK: Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output':
|
||||
; CHECK-NOT: DIVERGENT
|
||||
define void @inline_asm_2_sgpr_virtreg_output() {
|
||||
%asm = call { i32, i32 } asm "; def $0, $1", "=s,=s"()
|
||||
@ -56,7 +58,7 @@ define void @inline_asm_2_sgpr_virtreg_output() {
|
||||
}
|
||||
|
||||
; One output is SGPR, one is VGPR. Infer divergent for the aggregate, but uniform on the SGPR extract
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output':
|
||||
; CHECK: Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output':
|
||||
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=s,=v"()
|
||||
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0
|
||||
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1
|
||||
@ -69,7 +71,7 @@ define void @inline_asm_sgpr_vgpr_virtreg_output() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output':
|
||||
; CHECK: Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output':
|
||||
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s"()
|
||||
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0
|
||||
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1
|
||||
@ -83,7 +85,7 @@ define void @inline_asm_vgpr_sgpr_virtreg_output() {
|
||||
}
|
||||
|
||||
; Have an extra output constraint
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint':
|
||||
; CHECK: Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint':
|
||||
; CHECK-NOT: DIVERGENT
|
||||
define void @multi_sgpr_inline_asm_output_input_constraint() {
|
||||
%asm = call { i32, i32 } asm "; def $0, $1", "=s,=s,s"(i32 1234)
|
||||
@ -94,7 +96,7 @@ define void @multi_sgpr_inline_asm_output_input_constraint() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint':
|
||||
; CHECK: Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint':
|
||||
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s,v"(i32 1234)
|
||||
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0
|
||||
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: for function 'interp_p1_f16'
|
||||
; CHECK: DIVERGENT: %p1 = call float @llvm.amdgcn.interp.p1.f16
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
|
||||
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
|
||||
|
@ -1,4 +1,14 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -mtriple amdgcn-- -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
|
||||
|
||||
; NOTE: The new pass manager does not fall back on legacy divergence
|
||||
; analysis even when the function contains an irreducible loop. The
|
||||
; (new) divergence analysis conservatively reports all values as
|
||||
; divergent. This test does not check for this conservative
|
||||
; behaviour. Instead, it only checks for the values that are known to
|
||||
; be divergent according to the legacy analysis.
|
||||
|
||||
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; This test contains an unstructured loop.
|
||||
; +-------------- entry ----------------+
|
||||
@ -14,7 +24,7 @@
|
||||
; if (i3 == 5) // divergent
|
||||
; because sync dependent on (tid / i3).
|
||||
define i32 @unstructured_loop(i1 %entry_cond) {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop'
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: bb3:
|
||||
; CHECK: DIVERGENT: %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ]
|
||||
|
@ -1,6 +1,7 @@
|
||||
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_ps':
|
||||
; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0
|
||||
; CHECK-NOT: DIVERGENT
|
||||
; CHECK: DIVERGENT: <2 x i32> %arg3
|
||||
@ -12,7 +13,7 @@ define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(4)* byref([4 x <
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_kernel':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_kernel':
|
||||
; CHECK-NOT: %arg0
|
||||
; CHECK-NOT: %arg1
|
||||
; CHECK-NOT: %arg2
|
||||
@ -24,7 +25,7 @@ define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(4)* byre
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_c':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'test_c':
|
||||
; CHECK: DIVERGENT:
|
||||
; CHECK: DIVERGENT:
|
||||
; CHECK: DIVERGENT:
|
||||
|
@ -1,4 +1,5 @@
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
|
||||
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
|
@ -1,4 +1,5 @@
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
|
||||
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
|
||||
; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: 'test1':
|
||||
; CHECK-NEXT: DIVERGENT: i32 %bound
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: bb6:
|
||||
; CHECK: DIVERGENT: %.126.i355.i = phi i1 [ false, %bb5 ], [ true, %bb4 ]
|
||||
|
@ -1,8 +1,9 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; temporal-divergent use of value carried by divergent loop
|
||||
define amdgpu_kernel void @temporal_diverge(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge':
|
||||
; CHECK-NOT: DIVERGENT: %uni.
|
||||
; CHECK-NOT: DIVERGENT: br i1 %uni.
|
||||
|
||||
@ -26,7 +27,7 @@ X:
|
||||
|
||||
; temporal-divergent use of value carried by divergent loop inside a top-level loop
|
||||
define amdgpu_kernel void @temporal_diverge_inloop(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_inloop':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_inloop':
|
||||
; CHECK-NOT: DIVERGENT: %uni.
|
||||
; CHECK-NOT: DIVERGENT: br i1 %uni.
|
||||
|
||||
@ -58,7 +59,7 @@ Y:
|
||||
|
||||
; temporal-uniform use of a valud, definition and users are carried by a surrounding divergent loop
|
||||
define amdgpu_kernel void @temporal_uniform_indivloop(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_uniform_indivloop':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'temporal_uniform_indivloop':
|
||||
; CHECK-NOT: DIVERGENT: %uni.
|
||||
; CHECK-NOT: DIVERGENT: br i1 %uni.
|
||||
|
||||
@ -90,7 +91,7 @@ Y:
|
||||
|
||||
; temporal-divergent use of value carried by divergent loop, user is inside sibling loop
|
||||
define amdgpu_kernel void @temporal_diverge_loopuser(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser':
|
||||
; CHECK-NOT: DIVERGENT: %uni.
|
||||
; CHECK-NOT: DIVERGENT: br i1 %uni.
|
||||
|
||||
@ -120,7 +121,7 @@ Y:
|
||||
|
||||
; temporal-divergent use of value carried by divergent loop, user is inside sibling loop, defs and use are carried by a uniform loop
|
||||
define amdgpu_kernel void @temporal_diverge_loopuser_nested(i32 %n, i32 %a, i32 %b) #0 {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser_nested':
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser_nested':
|
||||
; CHECK-NOT: DIVERGENT: %uni.
|
||||
; CHECK-NOT: DIVERGENT: br i1 %uni.
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: bb2:
|
||||
; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ]
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
|
||||
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #0
|
||||
|
@ -1,10 +1,11 @@
|
||||
; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
define i32 @daorder(i32 %n) {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'daorder'
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'daorder'
|
||||
entry:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
%cond = icmp slt i32 %tid, 0
|
||||
|
@ -1,11 +1,12 @@
|
||||
; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
|
||||
define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'no_diverge'
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'no_diverge'
|
||||
entry:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
%cond = icmp slt i32 %n, 0
|
||||
@ -27,7 +28,7 @@ merge:
|
||||
; c = b;
|
||||
; return c; // c is divergent: sync dependent
|
||||
define i32 @sync(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'sync'
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'sync'
|
||||
bb1:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
|
||||
%cond = icmp slt i32 %tid, 5
|
||||
@ -48,7 +49,7 @@ bb3:
|
||||
; // c here is divergent because it is sync dependent on threadIdx.x >= 5
|
||||
; return c;
|
||||
define i32 @mixed(i32 %n, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'mixed'
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'mixed'
|
||||
bb1:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
|
||||
%cond = icmp slt i32 %tid, 5
|
||||
@ -73,7 +74,7 @@ bb6:
|
||||
|
||||
; We conservatively treats all parameters of a __device__ function as divergent.
|
||||
define i32 @device(i32 %n, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'device'
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'device'
|
||||
; CHECK: DIVERGENT: i32 %n
|
||||
; CHECK: DIVERGENT: i32 %a
|
||||
; CHECK: DIVERGENT: i32 %b
|
||||
@ -98,7 +99,7 @@ merge:
|
||||
;
|
||||
; The i defined in the loop is used outside.
|
||||
define i32 @loop() {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'loop'
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'loop'
|
||||
entry:
|
||||
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
|
||||
br label %loop
|
||||
@ -120,7 +121,7 @@ else:
|
||||
|
||||
; Same as @loop, but the loop is in the LCSSA form.
|
||||
define i32 @lcssa() {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'lcssa'
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'lcssa'
|
||||
entry:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
br label %loop
|
||||
|
@ -1,10 +1,11 @@
|
||||
; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
define i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge'
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'hidden_diverge'
|
||||
entry:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
%cond.var = icmp slt i32 %tid, 0
|
||||
|
@ -1,4 +1,12 @@
|
||||
; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
|
||||
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
|
||||
|
||||
; NOTE: The new pass manager does not fall back on legacy divergence
|
||||
; analysis even when the function contains an irreducible loop. The
|
||||
; (new) divergence analysis conservatively reports all values as
|
||||
; divergent. This test does not check for this conservative
|
||||
; behaviour. Instead, it only checks for the values that are known to
|
||||
; be divergent according to the legacy analysis.
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
@ -17,7 +25,7 @@ target triple = "nvptx64-nvidia-cuda"
|
||||
; if (i3 == 5) // divergent
|
||||
; because sync dependent on (tid / i3).
|
||||
define i32 @unstructured_loop(i1 %entry_cond) {
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'
|
||||
; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop'
|
||||
entry:
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
|
||||
define i32 @test1(i32* %ptr, i32 %val) #0 {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
|
||||
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
|
||||
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps':
|
||||
; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0
|
||||
|
@ -1,4 +1,4 @@
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
|
||||
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
|
||||
|
@ -1,4 +1,4 @@
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
|
||||
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
|
||||
|
||||
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
|
||||
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
|
||||
|
||||
; Test that we consider loads from flat and private addrspaces to be divergent.
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
|
||||
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
|
||||
; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: 'test1':
|
||||
; CHECK-NEXT: DIVERGENT: i32 %bound
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
|
||||
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s
|
||||
|
||||
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
|
||||
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
|
||||
; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #0
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt %s -analyze -divergence | FileCheck %s
|
||||
; RUN: opt %s -enable-new-pm=0 -analyze -divergence | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
@ -496,7 +496,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
|
||||
"safe-stack", "cost-model",
|
||||
"codegenprepare", "interleaved-load-combine",
|
||||
"unreachableblockelim", "verify-safepoint-ir",
|
||||
"divergence", "atomic-expand",
|
||||
"atomic-expand",
|
||||
"hardware-loops", "type-promotion",
|
||||
"mve-tail-predication", "interleaved-access",
|
||||
"global-merge", "pre-isel-intrinsic-lowering",
|
||||
|
@ -38,7 +38,7 @@ BasicBlock *GetBlockByName(StringRef BlockName, Function &F) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// We use this fixture to ensure that we clean up DivergenceAnalysis before
|
||||
// We use this fixture to ensure that we clean up DivergenceAnalysisImpl before
|
||||
// deleting the PassManager.
|
||||
class DivergenceAnalysisTest : public testing::Test {
|
||||
protected:
|
||||
@ -54,21 +54,21 @@ protected:
|
||||
|
||||
DivergenceAnalysisTest() : M("", Context), TLII(), TLI(TLII) {}
|
||||
|
||||
DivergenceAnalysis buildDA(Function &F, bool IsLCSSA) {
|
||||
DivergenceAnalysisImpl buildDA(Function &F, bool IsLCSSA) {
|
||||
DT.reset(new DominatorTree(F));
|
||||
PDT.reset(new PostDominatorTree(F));
|
||||
LI.reset(new LoopInfo(*DT));
|
||||
SDA.reset(new SyncDependenceAnalysis(*DT, *PDT, *LI));
|
||||
return DivergenceAnalysis(F, nullptr, *DT, *LI, *SDA, IsLCSSA);
|
||||
return DivergenceAnalysisImpl(F, nullptr, *DT, *LI, *SDA, IsLCSSA);
|
||||
}
|
||||
|
||||
void runWithDA(
|
||||
Module &M, StringRef FuncName, bool IsLCSSA,
|
||||
function_ref<void(Function &F, LoopInfo &LI, DivergenceAnalysis &DA)>
|
||||
function_ref<void(Function &F, LoopInfo &LI, DivergenceAnalysisImpl &DA)>
|
||||
Test) {
|
||||
auto *F = M.getFunction(FuncName);
|
||||
ASSERT_NE(F, nullptr) << "Could not find " << FuncName;
|
||||
DivergenceAnalysis DA = buildDA(*F, IsLCSSA);
|
||||
DivergenceAnalysisImpl DA = buildDA(*F, IsLCSSA);
|
||||
Test(*F, *LI, DA);
|
||||
}
|
||||
};
|
||||
@ -82,7 +82,7 @@ TEST_F(DivergenceAnalysisTest, DAInitialState) {
|
||||
BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
|
||||
ReturnInst::Create(Context, nullptr, BB);
|
||||
|
||||
DivergenceAnalysis DA = buildDA(*F, false);
|
||||
DivergenceAnalysisImpl DA = buildDA(*F, false);
|
||||
|
||||
// Whole function region
|
||||
EXPECT_EQ(DA.getRegionLoop(), nullptr);
|
||||
@ -135,7 +135,7 @@ TEST_F(DivergenceAnalysisTest, DANoLCSSA) {
|
||||
Err, C);
|
||||
|
||||
Function *F = M->getFunction("f_1");
|
||||
DivergenceAnalysis DA = buildDA(*F, false);
|
||||
DivergenceAnalysisImpl DA = buildDA(*F, false);
|
||||
EXPECT_FALSE(DA.hasDetectedDivergence());
|
||||
|
||||
auto ItArg = F->arg_begin();
|
||||
@ -189,7 +189,7 @@ TEST_F(DivergenceAnalysisTest, DALCSSA) {
|
||||
Err, C);
|
||||
|
||||
Function *F = M->getFunction("f_lcssa");
|
||||
DivergenceAnalysis DA = buildDA(*F, true);
|
||||
DivergenceAnalysisImpl DA = buildDA(*F, true);
|
||||
EXPECT_FALSE(DA.hasDetectedDivergence());
|
||||
|
||||
auto ItArg = F->arg_begin();
|
||||
|
Loading…
Reference in New Issue
Block a user