From 1374d1a4d3942410e6f61e805d41b45f788a6e12 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 29 Dec 2020 20:48:27 -0800 Subject: [PATCH] [NewPM][NVPTX] Port NVPTX opt passes There are only two used in the IR optimization pipeline. Port these and add them to the default pipeline. Similar to https://reviews.llvm.org/D93863. I added -mtriple to some tests since under the new PM, the passes are only available when the TargetMachine is specified. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D93930 --- lib/Target/NVPTX/NVPTX.h | 19 +++++++++ lib/Target/NVPTX/NVPTXTargetMachine.cpp | 27 +++++++++++++ lib/Target/NVPTX/NVPTXTargetMachine.h | 2 + lib/Target/NVPTX/NVVMIntrRange.cpp | 40 +++++++++++++------ lib/Target/NVPTX/NVVMReflect.cpp | 15 ++++++- test/CodeGen/NVPTX/intrinsic-old.ll | 5 +++ .../CodeGen/NVPTX/nvvm-reflect-module-flag.ll | 3 +- test/CodeGen/NVPTX/nvvm-reflect.ll | 4 +- tools/opt/opt.cpp | 2 + 9 files changed, 101 insertions(+), 16 deletions(-) diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index dfe0b9cb5ee..c2fd090da08 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H #define LLVM_LIB_TARGET_NVPTX_NVPTX_H +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" @@ -47,6 +48,24 @@ FunctionPass *createNVPTXLowerAllocaPass(); MachineFunctionPass *createNVPTXPeephole(); MachineFunctionPass *createNVPTXProxyRegErasurePass(); +struct NVVMIntrRangePass : PassInfoMixin { + NVVMIntrRangePass(); + NVVMIntrRangePass(unsigned SmVersion) : SmVersion(SmVersion) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + unsigned SmVersion; +}; + +struct NVVMReflectPass : PassInfoMixin { + NVVMReflectPass(); + NVVMReflectPass(unsigned SmVersion) : SmVersion(SmVersion) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + unsigned SmVersion; +}; + namespace NVPTX { enum DrvInterface { NVCL, diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 21da566b639..f1a82f1cf60 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Pass.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" @@ -205,6 +206,32 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { }); } +void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, + bool DebugPassManager) { + PB.registerPipelineParsingCallback( + [](StringRef PassName, FunctionPassManager &PM, + ArrayRef) { + if (PassName == "nvvm-reflect") { + PM.addPass(NVVMReflectPass()); + return true; + } + if (PassName == "nvvm-intr-range") { + PM.addPass(NVVMIntrRangePass()); + return true; + } + return false; + }); + + PB.registerPipelineStartEPCallback( + [this, DebugPassManager](ModulePassManager &PM, + PassBuilder::OptimizationLevel Level) { + FunctionPassManager FPM(DebugPassManager); + FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion())); + FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion())); + PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + }); +} + TargetTransformInfo NVPTXTargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(NVPTXTTIImpl(this, F)); diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 5b1e77958eb..bef541c2b28 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -62,6 +62,8 @@ public: } void adjustPassManager(PassManagerBuilder &) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool DebugPassManager) override; TargetTransformInfo getTargetTransformInfo(const Function &F) override; diff --git a/lib/Target/NVPTX/NVVMIntrRange.cpp b/lib/Target/NVPTX/NVVMIntrRange.cpp index baaedc7ac87..5381646434e 100644 --- a/lib/Target/NVPTX/NVVMIntrRange.cpp +++ b/lib/Target/NVPTX/NVVMIntrRange.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -32,21 +33,13 @@ static cl::opt NVVMIntrRangeSM("nvvm-intr-range-sm", cl::init(20), namespace { class NVVMIntrRange : public FunctionPass { private: - struct { - unsigned x, y, z; - } MaxBlockSize, MaxGridSize; + unsigned SmVersion; public: static char ID; NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {} - NVVMIntrRange(unsigned int SmVersion) : FunctionPass(ID) { - MaxBlockSize.x = 1024; - MaxBlockSize.y = 1024; - MaxBlockSize.z = 64; - - MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff; - MaxGridSize.y = 0xffff; - MaxGridSize.z = 0xffff; + NVVMIntrRange(unsigned int SmVersion) + : FunctionPass(ID), SmVersion(SmVersion) { initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry()); } @@ -79,7 +72,18 @@ static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) { return true; } -bool NVVMIntrRange::runOnFunction(Function &F) { +static bool runNVVMIntrRange(Function &F, unsigned SmVersion) { + struct { + unsigned x, y, z; + } MaxBlockSize, MaxGridSize; + MaxBlockSize.x = 1024; + MaxBlockSize.y = 1024; + MaxBlockSize.z = 64; + + MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff; + MaxGridSize.y = 0xffff; + MaxGridSize.z = 0xffff; + // Go through the calls in this function. bool Changed = false; for (Instruction &I : instructions(F)) { @@ -151,3 +155,15 @@ bool NVVMIntrRange::runOnFunction(Function &F) { return Changed; } + +bool NVVMIntrRange::runOnFunction(Function &F) { + return runNVVMIntrRange(F, SmVersion); +} + +NVVMIntrRangePass::NVVMIntrRangePass() : NVVMIntrRangePass(NVVMIntrRangeSM) {} + +PreservedAnalyses NVVMIntrRangePass::run(Function &F, + FunctionAnalysisManager &AM) { + return runNVVMIntrRange(F, SmVersion) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp index ae166dc5a8d..339f51d2108 100644 --- a/lib/Target/NVPTX/NVVMReflect.cpp +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -73,7 +74,7 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect", "Replace occurrences of __nvvm_reflect() calls with 0/1", false, false) -bool NVVMReflect::runOnFunction(Function &F) { +static bool runNVVMReflect(Function &F, unsigned SmVersion) { if (!NVVMReflectEnabled) return false; @@ -179,3 +180,15 @@ bool NVVMReflect::runOnFunction(Function &F) { return ToRemove.size() > 0; } + +bool NVVMReflect::runOnFunction(Function &F) { + return runNVVMReflect(F, SmVersion); +} + +NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {} + +PreservedAnalyses NVVMReflectPass::run(Function &F, + FunctionAnalysisManager &AM) { + return runNVVMReflect(F, SmVersion) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/test/CodeGen/NVPTX/intrinsic-old.ll b/test/CodeGen/NVPTX/intrinsic-old.ll index 4ce31c00771..e16786d70fb 100644 --- a/test/CodeGen/NVPTX/intrinsic-old.ll +++ b/test/CodeGen/NVPTX/intrinsic-old.ll @@ -2,9 +2,14 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \ ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s +; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-intr-range \ +; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \ ; RUN: -nvvm-intr-range -nvvm-intr-range-sm=30 \ ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s +; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \ +; RUN: -passes=nvvm-intr-range -nvvm-intr-range-sm=30 \ +; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s define ptx_device i32 @test_tid_x() { ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x; diff --git a/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll b/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll index 4fdab5c087d..57ab3379870 100644 --- a/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll +++ b/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -S -nvvm-reflect | FileCheck %s +; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-reflect | FileCheck %s +; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-reflect | FileCheck %s declare i32 @__nvvm_reflect(i8*) @str = private unnamed_addr addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00" diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll index 165597d6baf..f7403df2cd4 100644 --- a/test/CodeGen/NVPTX/nvvm-reflect.ll +++ b/test/CodeGen/NVPTX/nvvm-reflect.ll @@ -3,12 +3,12 @@ ; RUN: cat %s > %t.noftz ; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz -; RUN: opt %t.noftz -S -nvvm-reflect -O2 \ +; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -nvvm-reflect -O2 \ ; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK ; RUN: cat %s > %t.ftz ; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz -; RUN: opt %t.ftz -S -nvvm-reflect -O2 \ +; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -nvvm-reflect -O2 \ ; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK @str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00" diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 2408aa939da..99197a8b5fd 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -463,6 +463,8 @@ struct TimeTracerRAII { // it exists. static bool shouldPinPassToLegacyPM(StringRef Pass) { std::vector PassNameExactToIgnore = { + "nvvm-reflect", + "nvvm-intr-range", "amdgpu-simplifylib", "amdgpu-usenative", "amdgpu-promote-alloca",