1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[NewPM] Introduce (GPU)DivergenceAnalysis in the new pass manager

The GPUDivergenceAnalysis is now renamed to just "DivergenceAnalysis"
since there is no conflict with LegacyDivergenceAnalysis. In the
legacy PM, this analysis can only be used through the legacy DA
serving as a wrapper. It is now made available as a pass in the new
PM, and has no relation with the legacy DA.

The new DA currently cannot handle irreducible control flow; its
presence can cause the analysis to run indefinitely. The analysis is
now modified to detect this and report all instructions in the
function as divergent. This is super conservative, but allows the
analysis to be used without hanging the compiler.

Reviewed By: aeubanks

Differential Revision: https://reviews.llvm.org/D96615
This commit is contained in:
Sameer Sahasrabuddhe 2021-02-16 10:26:45 +05:30
parent 4bfb393b4c
commit 76fb79614f
44 changed files with 263 additions and 169 deletions

View File

@ -34,7 +34,7 @@ class TargetTransformInfo;
/// This analysis propagates divergence in a data-parallel context from sources
/// of divergence to all users. It requires reducible CFGs. All assignments
/// should be in SSA form.
class DivergenceAnalysis {
class DivergenceAnalysisImpl {
public:
/// \brief This instance will analyze the whole function \p F or the loop \p
/// RegionLoop.
@ -43,9 +43,9 @@ public:
/// Otherwise the whole function is analyzed.
/// \param IsLCSSAForm whether the analysis may assume that the IR in the
/// region in in LCSSA form.
DivergenceAnalysis(const Function &F, const Loop *RegionLoop,
const DominatorTree &DT, const LoopInfo &LI,
SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop,
const DominatorTree &DT, const LoopInfo &LI,
SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
/// \brief The loop that defines the analyzed region (if any).
const Loop *getRegionLoop() const { return RegionLoop; }
@ -82,8 +82,6 @@ public:
/// divergent.
bool isDivergentUse(const Use &U) const;
void print(raw_ostream &OS, const Module *) const;
private:
/// \brief Mark \p Term as divergent and push all Instructions that become
/// divergent as a result on the worklist.
@ -152,28 +150,39 @@ private:
std::vector<const Instruction *> Worklist;
};
/// \brief Divergence analysis frontend for GPU kernels.
class GPUDivergenceAnalysis {
SyncDependenceAnalysis SDA;
DivergenceAnalysis DA;
class DivergenceInfo {
Function &F;
// If the function contains an irreducible region the divergence
// analysis can run indefinitely. We set ContainsIrreducible and no
// analysis is actually performed on the function. All values in
// this function are conservatively reported as divergent instead.
bool ContainsIrreducible;
std::unique_ptr<SyncDependenceAnalysis> SDA;
std::unique_ptr<DivergenceAnalysisImpl> DA;
public:
/// Runs the divergence analysis on @F, a GPU kernel
GPUDivergenceAnalysis(Function &F, const DominatorTree &DT,
const PostDominatorTree &PDT, const LoopInfo &LI,
const TargetTransformInfo &TTI);
DivergenceInfo(Function &F, const DominatorTree &DT,
const PostDominatorTree &PDT, const LoopInfo &LI,
const TargetTransformInfo &TTI, bool KnownReducible);
/// Whether any divergence was detected.
bool hasDivergence() const { return DA.hasDetectedDivergence(); }
bool hasDivergence() const {
return ContainsIrreducible || DA->hasDetectedDivergence();
}
/// The GPU kernel this analysis result is for
const Function &getFunction() const { return DA.getFunction(); }
const Function &getFunction() const { return F; }
/// Whether \p V is divergent at its definition.
bool isDivergent(const Value &V) const;
bool isDivergent(const Value &V) const {
return ContainsIrreducible || DA->isDivergent(V);
}
/// Whether \p U is divergent. Uses of a uniform value can be divergent.
bool isDivergentUse(const Use &U) const;
bool isDivergentUse(const Use &U) const {
return ContainsIrreducible || DA->isDivergentUse(U);
}
/// Whether \p V is uniform/non-divergent.
bool isUniform(const Value &V) const { return !isDivergent(V); }
@ -181,11 +190,32 @@ public:
/// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
/// divergent.
bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
/// Print all divergent values in the kernel.
void print(raw_ostream &OS, const Module *) const;
};
/// \brief Divergence analysis frontend for GPU kernels.
class DivergenceAnalysis : public AnalysisInfoMixin<DivergenceAnalysis> {
friend AnalysisInfoMixin<DivergenceAnalysis>;
static AnalysisKey Key;
public:
using Result = DivergenceInfo;
/// Runs the divergence analysis on @F, a GPU kernel
Result run(Function &F, FunctionAnalysisManager &AM);
};
/// Printer pass to dump divergence analysis results.
struct DivergenceAnalysisPrinterPass
: public PassInfoMixin<DivergenceAnalysisPrinterPass> {
DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
private:
raw_ostream &OS;
}; // class DivergenceAnalysisPrinterPass
} // namespace llvm
#endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H

View File

@ -20,8 +20,8 @@
#include <memory>
namespace llvm {
class DivergenceInfo;
class Function;
class GPUDivergenceAnalysis;
class Module;
class raw_ostream;
class TargetTransformInfo;
@ -63,7 +63,7 @@ private:
const TargetTransformInfo &TTI) const;
// (optional) handle to new DivergenceAnalysis
std::unique_ptr<GPUDivergenceAnalysis> gpuDA;
std::unique_ptr<DivergenceInfo> gpuDA;
// Stores all divergent values.
DenseSet<const Value *> DivergentValues;

View File

@ -31,10 +31,10 @@
// Ralf Karrenberg and Sebastian Hack
// CC '12
//
// This DivergenceAnalysis implementation is generic in the sense that it does
// This implementation is generic in the sense that it does
// not itself identify original sources of divergence.
// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
// (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
// (DivergenceAnalysis) for functions, identify the sources of divergence
// (e.g., special variables that hold the thread ID or the iteration variable).
//
// The generic implementation propagates divergence to variables that are data
@ -61,7 +61,7 @@
// The sync dependence detection (which branch induces divergence in which join
// points) is implemented in the SyncDependenceAnalysis.
//
// The current DivergenceAnalysis implementation has the following limitations:
// The current implementation has the following limitations:
// 1. intra-procedural. It conservatively considers the arguments of a
// non-kernel-entry function and the return value of a function call as
// divergent.
@ -73,6 +73,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PostDominators.h"
@ -87,16 +88,15 @@
using namespace llvm;
#define DEBUG_TYPE "divergence-analysis"
#define DEBUG_TYPE "divergence"
// class DivergenceAnalysis
DivergenceAnalysis::DivergenceAnalysis(
DivergenceAnalysisImpl::DivergenceAnalysisImpl(
const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
: F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
IsLCSSAForm(IsLCSSAForm) {}
bool DivergenceAnalysis::markDivergent(const Value &DivVal) {
bool DivergenceAnalysisImpl::markDivergent(const Value &DivVal) {
if (isAlwaysUniform(DivVal))
return false;
assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
@ -104,12 +104,12 @@ bool DivergenceAnalysis::markDivergent(const Value &DivVal) {
return DivergentValues.insert(&DivVal).second;
}
void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
void DivergenceAnalysisImpl::addUniformOverride(const Value &UniVal) {
UniformOverrides.insert(&UniVal);
}
bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
const Value &Val) const {
bool DivergenceAnalysisImpl::isTemporalDivergent(
const BasicBlock &ObservingBlock, const Value &Val) const {
const auto *Inst = dyn_cast<const Instruction>(&Val);
if (!Inst)
return false;
@ -125,15 +125,15 @@ bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
return false;
}
bool DivergenceAnalysis::inRegion(const Instruction &I) const {
bool DivergenceAnalysisImpl::inRegion(const Instruction &I) const {
return I.getParent() && inRegion(*I.getParent());
}
bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
bool DivergenceAnalysisImpl::inRegion(const BasicBlock &BB) const {
return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB);
}
void DivergenceAnalysis::pushUsers(const Value &V) {
void DivergenceAnalysisImpl::pushUsers(const Value &V) {
const auto *I = dyn_cast<const Instruction>(&V);
if (I && I->isTerminator()) {
@ -166,8 +166,8 @@ static const Instruction *getIfCarriedInstruction(const Use &U,
return I;
}
void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I,
const Loop &OuterDivLoop) {
void DivergenceAnalysisImpl::analyzeTemporalDivergence(
const Instruction &I, const Loop &OuterDivLoop) {
if (isAlwaysUniform(I))
return;
if (isDivergent(I))
@ -188,8 +188,8 @@ void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I,
// marks all users of loop-carried values of the loop headed by LoopHeader as
// divergent
void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit,
const Loop &OuterDivLoop) {
void DivergenceAnalysisImpl::analyzeLoopExitDivergence(
const BasicBlock &DivExit, const Loop &OuterDivLoop) {
// All users are in immediate exit blocks
if (IsLCSSAForm) {
for (const auto &Phi : DivExit.phis()) {
@ -242,8 +242,8 @@ void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit,
} while (!TaintStack.empty());
}
void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit,
const Loop &InnerDivLoop) {
void DivergenceAnalysisImpl::propagateLoopExitDivergence(
const BasicBlock &DivExit, const Loop &InnerDivLoop) {
LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n");
// Find outer-most loop that does not contain \p DivExit
@ -265,7 +265,7 @@ void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit,
// this is a divergent join point - mark all phi nodes as divergent and push
// them onto the stack.
void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
void DivergenceAnalysisImpl::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName()
<< "\n");
@ -287,7 +287,7 @@ void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
}
}
void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) {
void DivergenceAnalysisImpl::analyzeControlDivergence(const Instruction &Term) {
LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName()
<< "\n");
@ -310,7 +310,7 @@ void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) {
}
}
void DivergenceAnalysis::compute() {
void DivergenceAnalysisImpl::compute() {
// Initialize worklist.
auto DivValuesCopy = DivergentValues;
for (const auto *DivVal : DivValuesCopy) {
@ -330,63 +330,82 @@ void DivergenceAnalysis::compute() {
}
}
bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
bool DivergenceAnalysisImpl::isAlwaysUniform(const Value &V) const {
return UniformOverrides.contains(&V);
}
bool DivergenceAnalysis::isDivergent(const Value &V) const {
bool DivergenceAnalysisImpl::isDivergent(const Value &V) const {
return DivergentValues.contains(&V);
}
bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
bool DivergenceAnalysisImpl::isDivergentUse(const Use &U) const {
Value &V = *U.get();
Instruction &I = *cast<Instruction>(U.getUser());
return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
}
void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if (DivergentValues.empty())
return;
// iterate instructions using instructions() to ensure a deterministic order.
for (auto &I : instructions(F)) {
if (isDivergent(I))
OS << "DIVERGENT:" << I << '\n';
DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT,
const PostDominatorTree &PDT, const LoopInfo &LI,
const TargetTransformInfo &TTI,
bool KnownReducible)
: F(F), ContainsIrreducible(false) {
if (!KnownReducible) {
using RPOTraversal = ReversePostOrderTraversal<const Function *>;
RPOTraversal FuncRPOT(&F);
if (containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
const LoopInfo>(FuncRPOT, LI)) {
ContainsIrreducible = true;
return;
}
}
}
// class GPUDivergenceAnalysis
GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
const DominatorTree &DT,
const PostDominatorTree &PDT,
const LoopInfo &LI,
const TargetTransformInfo &TTI)
: SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, /* LCSSA */ false) {
SDA = std::make_unique<SyncDependenceAnalysis>(DT, PDT, LI);
DA = std::make_unique<DivergenceAnalysisImpl>(F, nullptr, DT, LI, *SDA,
/* LCSSA */ false);
for (auto &I : instructions(F)) {
if (TTI.isSourceOfDivergence(&I)) {
DA.markDivergent(I);
DA->markDivergent(I);
} else if (TTI.isAlwaysUniform(&I)) {
DA.addUniformOverride(I);
DA->addUniformOverride(I);
}
}
for (auto &Arg : F.args()) {
if (TTI.isSourceOfDivergence(&Arg)) {
DA.markDivergent(Arg);
DA->markDivergent(Arg);
}
}
DA.compute();
DA->compute();
}
bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
return DA.isDivergent(val);
AnalysisKey DivergenceAnalysis::Key;
DivergenceAnalysis::Result
DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false);
}
bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const {
return DA.isDivergentUse(use);
}
void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
DA.print(OS, mod);
OS << "}\n";
PreservedAnalyses
DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
auto &DI = FAM.getResult<DivergenceAnalysis>(F);
OS << "'Divergence Analysis' for function '" << F.getName() << "':\n";
if (DI.hasDivergence()) {
for (auto &Arg : F.args()) {
OS << (DI.isDivergent(Arg) ? "DIVERGENT: " : " ");
OS << Arg << "\n";
}
for (auto BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
auto &BB = *BI;
OS << "\n " << BB.getName() << ":\n";
for (auto &I : BB.instructionsWithoutDebug()) {
OS << (DI.isDivergent(I) ? "DIVERGENT: " : " ");
OS << I << "\n";
}
}
}
return PreservedAnalyses::all();
}

View File

@ -339,7 +339,8 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
if (shouldUseGPUDivergenceAnalysis(F, TTI)) {
// run the new GPU divergence analysis
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
/* KnownReducible = */ true);
} else {
// run LLVM's existing DivergenceAnalysis

View File

@ -32,6 +32,7 @@
#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"

View File

@ -172,6 +172,7 @@ FUNCTION_ANALYSIS("targetir",
TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
FUNCTION_ANALYSIS("verify", VerifierAnalysis())
FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
FUNCTION_ANALYSIS("divergence", DivergenceAnalysis())
#ifndef FUNCTION_ALIAS_ANALYSIS
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
@ -273,6 +274,7 @@ FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<divergence>", DivergenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<delinearization>", DelinearizationPrinterPass(dbgs()))

View File

@ -1,4 +1,5 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK-LABEL: for function 'readfirstlane':
define amdgpu_kernel void @readfirstlane() {
@ -39,7 +40,7 @@ define i32 @asm_sgpr(i32 %divergent) {
ret i32 %sgpr
}
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'asm_mixed_sgpr_vgpr':
; CHECK-LABEL: Divergence Analysis' for function 'asm_mixed_sgpr_vgpr':
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1, $2", "=s,=v,v"(i32 %divergent)
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1

View File

@ -1,4 +1,5 @@
; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
define i32 @test1(i32* %ptr, i32 %val) #0 {

View File

@ -1,10 +1,11 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
declare i32 @gf2(i32)
declare i32 @gf1(i32)
define void @tw1(i32 addrspace(4)* noalias nocapture readonly %A, i32 addrspace(4)* noalias nocapture %B) local_unnamed_addr #2 {
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'tw1':
; CHECK: Divergence Analysis' for function 'tw1':
; CHECK: DIVERGENT: i32 addrspace(4)* %A
; CHECK: DIVERGENT: i32 addrspace(4)* %B
entry:

View File

@ -1,8 +1,9 @@
; RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; Tests control flow intrinsics that should be treated as uniform
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_break':
; CHECK: Divergence Analysis' for function 'test_if_break':
; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0
; CHECK-NOT: DIVERGENT
; CHECK: ret void
@ -14,7 +15,7 @@ entry:
ret void
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if':
; CHECK: Divergence Analysis' for function 'test_if':
; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0
; CHECK-NEXT: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
@ -33,7 +34,7 @@ entry:
}
; The result should still be treated as divergent, even with a uniform source.
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_uniform':
; CHECK: Divergence Analysis' for function 'test_if_uniform':
; CHECK-NOT: DIVERGENT
; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
@ -51,7 +52,7 @@ entry:
ret void
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_loop_uniform':
; CHECK: Divergence Analysis' for function 'test_loop_uniform':
; CHECK: DIVERGENT: %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask)
define amdgpu_ps void @test_loop_uniform(i64 inreg %mask) {
entry:
@ -61,7 +62,7 @@ entry:
ret void
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else':
; CHECK: Divergence Analysis' for function 'test_else':
; CHECK: DIVERGENT: %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
; CHECK: DIVERGENT: %else.bool = extractvalue { i1, i64 } %else, 0
; CHECK: {{^[ \t]+}}%else.mask = extractvalue { i1, i64 } %else, 1
@ -77,7 +78,7 @@ entry:
}
; This case is probably always broken
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else_divergent_mask':
; CHECK: Divergence Analysis' for function 'test_else_divergent_mask':
; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
; CHECK-NOT: DIVERGENT

View File

@ -1,7 +1,7 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
define amdgpu_kernel void @hidden_diverge(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge'
; CHECK-LABEL: 'Divergence Analysis' for function 'hidden_diverge'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.var = icmp slt i32 %tid, 0
@ -22,7 +22,7 @@ merge:
}
define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd'
; CHECK-LABEL: 'Divergence Analysis' for function 'hidden_loop_ipd'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cond.var = icmp slt i32 %tid, 0

View File

@ -1,9 +1,10 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; divergent loop (H<header><exiting to X>, B<exiting to Y>)
; the divergent join point in %exit is obscured by uniform control joining in %X
define amdgpu_kernel void @hidden_loop_diverge(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_diverge':
; CHECK-LABEL: Divergence Analysis' for function 'hidden_loop_diverge'
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@ -45,7 +46,7 @@ exit:
; divergent loop (H<header><exiting to X>, B<exiting to Y>)
; the phi nodes in X and Y don't actually receive divergent values
define amdgpu_kernel void @unobserved_loop_diverge(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unobserved_loop_diverge':
; CHECK-LABEL: Divergence Analysis' for function 'unobserved_loop_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@ -86,7 +87,7 @@ exit:
; the inner loop has no exit to top level.
; the outer loop becomes divergent as its exiting branch in C is control-dependent on the inner loop's divergent loop exit in D.
define amdgpu_kernel void @hidden_nestedloop_diverge(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_nestedloop_diverge':
; CHECK-LABEL: Divergence Analysis' for function 'hidden_nestedloop_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@ -137,7 +138,7 @@ exit:
; the outer loop has no immediately divergent exiting edge.
; the inner exiting edge is exiting to top-level through the outer loop causing both to become divergent.
define amdgpu_kernel void @hidden_doublebreak_diverge(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_doublebreak_diverge':
; CHECK-LABEL: Divergence Analysis' for function 'hidden_doublebreak_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@ -179,7 +180,7 @@ Y:
; divergent loop (G<header>, L<exiting to D>) contained inside a uniform loop (H<header>, B, G, L , D<exiting to x>)
define amdgpu_kernel void @hidden_containedloop_diverge(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_containedloop_diverge':
; CHECK-LABEL: Divergence Analysis' for function 'hidden_containedloop_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.

View File

@ -1,50 +1,52 @@
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=tahiti -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx908 -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; Make sure nothing crashes on targets with or without AGPRs
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output':
; CHECK: Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output':
; CHECK-NOT: DIVERGENT
define i32 @inline_asm_1_sgpr_virtreg_output() {
%sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
ret i32 %sgpr
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output':
; CHECK: Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output':
; CHECK-NOT: DIVERGENT
define i32 @inline_asm_1_sgpr_physreg_output() {
%sgpr = call i32 asm "s_mov_b32 s0, 0", "={s0}"()
ret i32 %sgpr
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output':
; CHECK: Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output':
; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"()
define i32 @inline_asm_1_vgpr_virtreg_output() {
%vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"()
ret i32 %vgpr
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output':
; CHECK: Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output':
; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"()
define i32 @inline_asm_1_vgpr_physreg_output() {
%vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"()
ret i32 %vgpr
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output':
; CHECK: Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output':
; CHECK: DIVERGENT: %vgpr = call i32 asm "; def $0", "=a"()
define i32 @inline_asm_1_agpr_virtreg_output() {
%vgpr = call i32 asm "; def $0", "=a"()
ret i32 %vgpr
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output':
; CHECK: Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output':
; CHECK: DIVERGENT: %vgpr = call i32 asm "; def a0", "={a0}"()
define i32 @inline_asm_1_agpr_physreg_output() {
%vgpr = call i32 asm "; def a0", "={a0}"()
ret i32 %vgpr
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output':
; CHECK: Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output':
; CHECK-NOT: DIVERGENT
define void @inline_asm_2_sgpr_virtreg_output() {
%asm = call { i32, i32 } asm "; def $0, $1", "=s,=s"()
@ -56,7 +58,7 @@ define void @inline_asm_2_sgpr_virtreg_output() {
}
; One output is SGPR, one is VGPR. Infer divergent for the aggregate, but uniform on the SGPR extract
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output':
; CHECK: Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output':
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=s,=v"()
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1
@ -69,7 +71,7 @@ define void @inline_asm_sgpr_vgpr_virtreg_output() {
ret void
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output':
; CHECK: Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output':
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s"()
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1
@ -83,7 +85,7 @@ define void @inline_asm_vgpr_sgpr_virtreg_output() {
}
; Have an extra output constraint
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint':
; CHECK: Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint':
; CHECK-NOT: DIVERGENT
define void @multi_sgpr_inline_asm_output_input_constraint() {
%asm = call { i32, i32 } asm "; def $0, $1", "=s,=s,s"(i32 1234)
@ -94,7 +96,7 @@ define void @multi_sgpr_inline_asm_output_input_constraint() {
ret void
}
; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint':
; CHECK: Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint':
; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s,v"(i32 1234)
; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0
; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1

View File

@ -1,4 +1,5 @@
; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: for function 'interp_p1_f16'
; CHECK: DIVERGENT: %p1 = call float @llvm.amdgcn.interp.p1.f16

View File

@ -1,4 +1,5 @@
; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {

View File

@ -1,4 +1,14 @@
; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -mtriple amdgcn-- -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; NOTE: The new pass manager does not fall back on legacy divergence
; analysis even when the function contains an irreducible loop. The
; (new) divergence analysis conservatively reports all values as
; divergent. This test does not check for this conservative
; behaviour. Instead, it only checks for the values that are known to
; be divergent according to the legacy analysis.
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; This test contains an unstructured loop.
; +-------------- entry ----------------+
@ -14,7 +24,7 @@
; if (i3 == 5) // divergent
; because sync dependent on (tid / i3).
define i32 @unstructured_loop(i1 %entry_cond) {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'
; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2

View File

@ -1,4 +1,5 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: bb3:
; CHECK: DIVERGENT: %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ]

View File

@ -1,6 +1,7 @@
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps':
; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_ps':
; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0
; CHECK-NOT: DIVERGENT
; CHECK: DIVERGENT: <2 x i32> %arg3
@ -12,7 +13,7 @@ define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(4)* byref([4 x <
ret void
}
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_kernel':
; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_kernel':
; CHECK-NOT: %arg0
; CHECK-NOT: %arg1
; CHECK-NOT: %arg2
@ -24,7 +25,7 @@ define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(4)* byre
ret void
}
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_c':
; CHECK-LABEL: Divergence Analysis' for function 'test_c':
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:
; CHECK: DIVERGENT:

View File

@ -1,4 +1,5 @@
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {

View File

@ -1,4 +1,5 @@
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {

View File

@ -1,4 +1,5 @@
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4

View File

@ -1,4 +1,5 @@
; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK-LABEL: 'test1':
; CHECK-NEXT: DIVERGENT: i32 %bound

View File

@ -1,4 +1,5 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: bb6:
; CHECK: DIVERGENT: %.126.i355.i = phi i1 [ false, %bb5 ], [ true, %bb4 ]

View File

@ -1,8 +1,9 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; temporal-divergent use of value carried by divergent loop
define amdgpu_kernel void @temporal_diverge(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge':
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@ -26,7 +27,7 @@ X:
; temporal-divergent use of value carried by divergent loop inside a top-level loop
define amdgpu_kernel void @temporal_diverge_inloop(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_inloop':
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_inloop':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@ -58,7 +59,7 @@ Y:
; temporal-uniform use of a valud, definition and users are carried by a surrounding divergent loop
define amdgpu_kernel void @temporal_uniform_indivloop(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_uniform_indivloop':
; CHECK-LABEL: Divergence Analysis' for function 'temporal_uniform_indivloop':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@ -90,7 +91,7 @@ Y:
; temporal-divergent use of value carried by divergent loop, user is inside sibling loop
define amdgpu_kernel void @temporal_diverge_loopuser(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser':
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.
@ -120,7 +121,7 @@ Y:
; temporal-divergent use of value carried by divergent loop, user is inside sibling loop, defs and use are carried by a uniform loop
define amdgpu_kernel void @temporal_diverge_loopuser_nested(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser_nested':
; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser_nested':
; CHECK-NOT: DIVERGENT: %uni.
; CHECK-NOT: DIVERGENT: br i1 %uni.

View File

@ -1,4 +1,5 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: bb2:
; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ]

View File

@ -1,4 +1,5 @@
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {

View File

@ -1,4 +1,5 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0

View File

@ -1,10 +1,11 @@
; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
define i32 @daorder(i32 %n) {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'daorder'
; CHECK-LABEL: Divergence Analysis' for function 'daorder'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cond = icmp slt i32 %tid, 0

View File

@ -1,11 +1,12 @@
; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
define i32 @no_diverge(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'no_diverge'
; CHECK-LABEL: Divergence Analysis' for function 'no_diverge'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cond = icmp slt i32 %n, 0
@ -27,7 +28,7 @@ merge:
; c = b;
; return c; // c is divergent: sync dependent
define i32 @sync(i32 %a, i32 %b) {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'sync'
; CHECK-LABEL: Divergence Analysis' for function 'sync'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
%cond = icmp slt i32 %tid, 5
@ -48,7 +49,7 @@ bb3:
; // c here is divergent because it is sync dependent on threadIdx.x >= 5
; return c;
define i32 @mixed(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'mixed'
; CHECK-LABEL: Divergence Analysis' for function 'mixed'
bb1:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
%cond = icmp slt i32 %tid, 5
@ -73,7 +74,7 @@ bb6:
; We conservatively treats all parameters of a __device__ function as divergent.
define i32 @device(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'device'
; CHECK-LABEL: Divergence Analysis' for function 'device'
; CHECK: DIVERGENT: i32 %n
; CHECK: DIVERGENT: i32 %a
; CHECK: DIVERGENT: i32 %b
@ -98,7 +99,7 @@ merge:
;
; The i defined in the loop is used outside.
define i32 @loop() {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'loop'
; CHECK-LABEL: Divergence Analysis' for function 'loop'
entry:
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
br label %loop
@ -120,7 +121,7 @@ else:
; Same as @loop, but the loop is in the LCSSA form.
define i32 @lcssa() {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'lcssa'
; CHECK-LABEL: Divergence Analysis' for function 'lcssa'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
br label %loop

View File

@ -1,10 +1,11 @@
; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
define i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge'
; CHECK-LABEL: Divergence Analysis' for function 'hidden_diverge'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%cond.var = icmp slt i32 %tid, 0

View File

@ -1,4 +1,12 @@
; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s
; RUN: opt %s -passes='print<divergence>' -disable-output 2>&1 | FileCheck %s
; NOTE: The new pass manager does not fall back on legacy divergence
; analysis even when the function contains an irreducible loop. The
; (new) divergence analysis conservatively reports all values as
; divergent. This test does not check for this conservative
; behaviour. Instead, it only checks for the values that are known to
; be divergent according to the legacy analysis.
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
@ -17,7 +25,7 @@ target triple = "nvptx64-nvidia-cuda"
; if (i3 == 5) // divergent
; because sync dependent on (tid / i3).
define i32 @unstructured_loop(i1 %entry_cond) {
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop'
; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop'
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2

View File

@ -1,4 +1,4 @@
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
define i32 @test1(i32* %ptr, i32 %val) #0 {

View File

@ -1,4 +1,4 @@
; RUN: opt -mtriple=amdgcn-- -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {

View File

@ -1,4 +1,4 @@
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps':
; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0

View File

@ -1,4 +1,4 @@
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {

View File

@ -1,4 +1,4 @@
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {

View File

@ -1,4 +1,4 @@
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
; Test that we consider loads from flat and private addrspaces to be divergent.

View File

@ -1,4 +1,4 @@
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s
; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4

View File

@ -1,4 +1,4 @@
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
; CHECK-LABEL: 'test1':
; CHECK-NEXT: DIVERGENT: i32 %bound

View File

@ -1,4 +1,4 @@
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {

View File

@ -1,4 +1,4 @@
; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0

View File

@ -1,4 +1,4 @@
; RUN: opt %s -analyze -divergence | FileCheck %s
; RUN: opt %s -enable-new-pm=0 -analyze -divergence | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

View File

@ -496,7 +496,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"safe-stack", "cost-model",
"codegenprepare", "interleaved-load-combine",
"unreachableblockelim", "verify-safepoint-ir",
"divergence", "atomic-expand",
"atomic-expand",
"hardware-loops", "type-promotion",
"mve-tail-predication", "interleaved-access",
"global-merge", "pre-isel-intrinsic-lowering",

View File

@ -38,7 +38,7 @@ BasicBlock *GetBlockByName(StringRef BlockName, Function &F) {
return nullptr;
}
// We use this fixture to ensure that we clean up DivergenceAnalysis before
// We use this fixture to ensure that we clean up DivergenceAnalysisImpl before
// deleting the PassManager.
class DivergenceAnalysisTest : public testing::Test {
protected:
@ -54,21 +54,21 @@ protected:
DivergenceAnalysisTest() : M("", Context), TLII(), TLI(TLII) {}
DivergenceAnalysis buildDA(Function &F, bool IsLCSSA) {
DivergenceAnalysisImpl buildDA(Function &F, bool IsLCSSA) {
DT.reset(new DominatorTree(F));
PDT.reset(new PostDominatorTree(F));
LI.reset(new LoopInfo(*DT));
SDA.reset(new SyncDependenceAnalysis(*DT, *PDT, *LI));
return DivergenceAnalysis(F, nullptr, *DT, *LI, *SDA, IsLCSSA);
return DivergenceAnalysisImpl(F, nullptr, *DT, *LI, *SDA, IsLCSSA);
}
void runWithDA(
Module &M, StringRef FuncName, bool IsLCSSA,
function_ref<void(Function &F, LoopInfo &LI, DivergenceAnalysis &DA)>
function_ref<void(Function &F, LoopInfo &LI, DivergenceAnalysisImpl &DA)>
Test) {
auto *F = M.getFunction(FuncName);
ASSERT_NE(F, nullptr) << "Could not find " << FuncName;
DivergenceAnalysis DA = buildDA(*F, IsLCSSA);
DivergenceAnalysisImpl DA = buildDA(*F, IsLCSSA);
Test(*F, *LI, DA);
}
};
@ -82,7 +82,7 @@ TEST_F(DivergenceAnalysisTest, DAInitialState) {
BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
ReturnInst::Create(Context, nullptr, BB);
DivergenceAnalysis DA = buildDA(*F, false);
DivergenceAnalysisImpl DA = buildDA(*F, false);
// Whole function region
EXPECT_EQ(DA.getRegionLoop(), nullptr);
@ -135,7 +135,7 @@ TEST_F(DivergenceAnalysisTest, DANoLCSSA) {
Err, C);
Function *F = M->getFunction("f_1");
DivergenceAnalysis DA = buildDA(*F, false);
DivergenceAnalysisImpl DA = buildDA(*F, false);
EXPECT_FALSE(DA.hasDetectedDivergence());
auto ItArg = F->arg_begin();
@ -189,7 +189,7 @@ TEST_F(DivergenceAnalysisTest, DALCSSA) {
Err, C);
Function *F = M->getFunction("f_lcssa");
DivergenceAnalysis DA = buildDA(*F, true);
DivergenceAnalysisImpl DA = buildDA(*F, true);
EXPECT_FALSE(DA.hasDetectedDivergence());
auto ItArg = F->arg_begin();