Resubmit: [DA][TTI][AMDGPU] Add option to select GPUDA with TTI

Summary: Enable the new diveregence analysis by default for AMDGPU. Resubmit with test updates since GPUDA was causing failures on Windows. Reviewers: rampitec, nhaehnle, arsenm, thakis Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73315
2025-01-31 12:41:49 +01:00 · 2020-01-20 07:25:20 -08:00 · 2020-01-20 07:25:20 -08:00 · 0fa8b03aac
commit 0fa8b03aac
parent a53f5e2907
18 changed files with 41 additions and 16 deletions
--- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@ -54,7 +54,8 @@ public:

 private:
  // Whether analysis should be performed by GPUDivergenceAnalysis.
-  bool shouldUseGPUDivergenceAnalysis(const Function &F) const;
+  bool shouldUseGPUDivergenceAnalysis(const Function &F,
+                                      const TargetTransformInfo &TTI) const;

  // (optional) handle to new DivergenceAnalysis
  std::unique_ptr<GPUDivergenceAnalysis> gpuDA;
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@ -342,6 +342,10 @@ public:
  /// branches.
  bool hasBranchDivergence() const;

+  /// Return true if the target prefers to use GPU divergence analysis to
+  /// replace the legacy version.
+  bool useGPUDivergenceAnalysis() const;
+
  /// Returns whether V is a source of divergence.
  ///
  /// This function provides the target-dependent information for
@ -1198,6 +1202,7 @@ public:
  virtual int
  getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
  virtual bool hasBranchDivergence() = 0;
+  virtual bool useGPUDivergenceAnalysis() = 0;
  virtual bool isSourceOfDivergence(const Value *V) = 0;
  virtual bool isAlwaysUniform(const Value *V) = 0;
  virtual unsigned getFlatAddressSpace() = 0;
@ -1452,6 +1457,7 @@ public:
    return Impl.getUserCost(U, Operands);
  }
  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
+  bool useGPUDivergenceAnalysis() override { return Impl.useGPUDivergenceAnalysis(); }
  bool isSourceOfDivergence(const Value *V) override {
    return Impl.isSourceOfDivergence(V);
  }
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -152,6 +152,8 @@ public:

  bool hasBranchDivergence() { return false; }

+  bool useGPUDivergenceAnalysis() { return false; }
+
  bool isSourceOfDivergence(const Value *V) { return false; }

  bool isAlwaysUniform(const Value *V) { return false; }
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@ -207,6 +207,8 @@ public:

  bool hasBranchDivergence() { return false; }

+  bool useGPUDivergenceAnalysis() { return false; }
+
  bool isSourceOfDivergence(const Value *V) { return false; }

  bool isAlwaysUniform(const Value *V) { return false; }
--- a/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp
@ -301,14 +301,13 @@ FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
 void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
  AU.addRequired<DominatorTreeWrapperPass>();
  AU.addRequired<PostDominatorTreeWrapperPass>();
-  if (UseGPUDA)
-    AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequired<LoopInfoWrapperPass>();
  AU.setPreservesAll();
 }

 bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
-    const Function &F) const {
-  if (!UseGPUDA)
+    const Function &F, const TargetTransformInfo &TTI) const {
+  if (!(UseGPUDA || TTI.useGPUDivergenceAnalysis()))
    return false;

  // GPUDivergenceAnalysis requires a reducible CFG.
@ -337,7 +336,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();

-  if (shouldUseGPUDivergenceAnalysis(F)) {
+  if (shouldUseGPUDivergenceAnalysis(F, TTI)) {
    // run the new GPU divergence analysis
    auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
    gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@ -212,6 +212,10 @@ bool TargetTransformInfo::hasBranchDivergence() const {
  return TTIImpl->hasBranchDivergence();
 }

+bool TargetTransformInfo::useGPUDivergenceAnalysis() const {
+  return TTIImpl->useGPUDivergenceAnalysis();
+}
+
 bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
  return TTIImpl->isSourceOfDivergence(V);
 }
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@ -69,6 +69,11 @@ static cl::opt<unsigned> UnrollThresholdIf(
  cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
  cl::init(150), cl::Hidden);

+static cl::opt<bool> UseLegacyDA(
+  "amdgpu-use-legacy-divergence-analysis",
+  cl::desc("Enable legacy divergence analysis for AMDGPU"),
+  cl::init(false), cl::Hidden);
+
 static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
                              unsigned Depth = 0) {
  const Instruction *I = dyn_cast<Instruction>(Cond);
@ -601,6 +606,11 @@ static bool isArgPassedInSGPR(const Argument *A) {
  }
 }

+/// \returns true if the new GPU divergence analysis is enabled.
+bool GCNTTIImpl::useGPUDivergenceAnalysis() const {
+  return !UseLegacyDA;
+}
+
 /// \returns true if the result of the value could potentially be
 /// different across workitems in a wavefront.
 bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@ -136,6 +136,7 @@ public:
      HasFP32Denormals(ST->hasFP32Denormals(F)) { }

  bool hasBranchDivergence() { return true; }
+  bool useGPUDivergenceAnalysis() const;

  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                               TTI::UnrollingPreferences &UP);
--- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll
+++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll
@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s

 ; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
 define i32 @test1(i32* %ptr, i32 %val) #0 {
--- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll
+++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll
@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s

 ; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
 define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {
--- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll
+++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll
@ -1,4 +1,4 @@
-; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s

 ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps':
 ; CHECK: DIVERGENT:
--- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll
@ -1,4 +1,4 @@
-;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
+;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s

 ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
 define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
--- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
+++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
@ -1,4 +1,4 @@
-;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
+;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s

 ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
 define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
--- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll
+++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll
@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s

 ; Test that we consider loads from flat and private addrspaces to be divergent.

--- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll
+++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll
@ -1,4 +1,4 @@
-; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s

 ; CHECK: DIVERGENT:  %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
 ; CHECK: DIVERGENT:  %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4
--- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll
+++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll
@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s

 ; CHECK-LABEL: 'test1':
 ; CHECK-NEXT: DIVERGENT: i32 %bound
--- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
+++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll
@ -1,4 +1,4 @@
-; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
+; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s

 ; CHECK: DIVERGENT:  %tmp = cmpxchg volatile
 define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {
--- a/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
+++ b/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll
@ -1,4 +1,4 @@
-; RUN: opt  -mtriple amdgcn-unknown-amdhsa -analyze -divergence %s | FileCheck %s
+; RUN: opt  -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s

 declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare i32 @llvm.amdgcn.workitem.id.y() #0