mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[NewPM][NVPTX] Port NVPTX opt passes
There are only two used in the IR optimization pipeline. Port these and add them to the default pipeline. Similar to https://reviews.llvm.org/D93863. I added -mtriple to some tests since under the new PM, the passes are only available when the TargetMachine is specified. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D93930
This commit is contained in:
parent
50616d8f56
commit
1374d1a4d3
@ -14,6 +14,7 @@
|
|||||||
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
|
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
|
||||||
#define LLVM_LIB_TARGET_NVPTX_NVPTX_H
|
#define LLVM_LIB_TARGET_NVPTX_NVPTX_H
|
||||||
|
|
||||||
|
#include "llvm/IR/PassManager.h"
|
||||||
#include "llvm/Pass.h"
|
#include "llvm/Pass.h"
|
||||||
#include "llvm/Support/CodeGen.h"
|
#include "llvm/Support/CodeGen.h"
|
||||||
|
|
||||||
@ -47,6 +48,24 @@ FunctionPass *createNVPTXLowerAllocaPass();
|
|||||||
MachineFunctionPass *createNVPTXPeephole();
|
MachineFunctionPass *createNVPTXPeephole();
|
||||||
MachineFunctionPass *createNVPTXProxyRegErasurePass();
|
MachineFunctionPass *createNVPTXProxyRegErasurePass();
|
||||||
|
|
||||||
|
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
|
||||||
|
NVVMIntrRangePass();
|
||||||
|
NVVMIntrRangePass(unsigned SmVersion) : SmVersion(SmVersion) {}
|
||||||
|
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned SmVersion;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct NVVMReflectPass : PassInfoMixin<NVVMReflectPass> {
|
||||||
|
NVVMReflectPass();
|
||||||
|
NVVMReflectPass(unsigned SmVersion) : SmVersion(SmVersion) {}
|
||||||
|
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned SmVersion;
|
||||||
|
};
|
||||||
|
|
||||||
namespace NVPTX {
|
namespace NVPTX {
|
||||||
enum DrvInterface {
|
enum DrvInterface {
|
||||||
NVCL,
|
NVCL,
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||||
#include "llvm/IR/LegacyPassManager.h"
|
#include "llvm/IR/LegacyPassManager.h"
|
||||||
#include "llvm/Pass.h"
|
#include "llvm/Pass.h"
|
||||||
|
#include "llvm/Passes/PassBuilder.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
#include "llvm/Target/TargetMachine.h"
|
#include "llvm/Target/TargetMachine.h"
|
||||||
@ -205,6 +206,32 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
|
||||||
|
bool DebugPassManager) {
|
||||||
|
PB.registerPipelineParsingCallback(
|
||||||
|
[](StringRef PassName, FunctionPassManager &PM,
|
||||||
|
ArrayRef<PassBuilder::PipelineElement>) {
|
||||||
|
if (PassName == "nvvm-reflect") {
|
||||||
|
PM.addPass(NVVMReflectPass());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (PassName == "nvvm-intr-range") {
|
||||||
|
PM.addPass(NVVMIntrRangePass());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerPipelineStartEPCallback(
|
||||||
|
[this, DebugPassManager](ModulePassManager &PM,
|
||||||
|
PassBuilder::OptimizationLevel Level) {
|
||||||
|
FunctionPassManager FPM(DebugPassManager);
|
||||||
|
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
|
||||||
|
FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion()));
|
||||||
|
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
TargetTransformInfo
|
TargetTransformInfo
|
||||||
NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
|
NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
|
||||||
return TargetTransformInfo(NVPTXTTIImpl(this, F));
|
return TargetTransformInfo(NVPTXTTIImpl(this, F));
|
||||||
|
@ -62,6 +62,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void adjustPassManager(PassManagerBuilder &) override;
|
void adjustPassManager(PassManagerBuilder &) override;
|
||||||
|
void registerPassBuilderCallbacks(PassBuilder &PB,
|
||||||
|
bool DebugPassManager) override;
|
||||||
|
|
||||||
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
|
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include "llvm/IR/Instructions.h"
|
#include "llvm/IR/Instructions.h"
|
||||||
#include "llvm/IR/Intrinsics.h"
|
#include "llvm/IR/Intrinsics.h"
|
||||||
#include "llvm/IR/IntrinsicsNVPTX.h"
|
#include "llvm/IR/IntrinsicsNVPTX.h"
|
||||||
|
#include "llvm/IR/PassManager.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
@ -32,21 +33,13 @@ static cl::opt<unsigned> NVVMIntrRangeSM("nvvm-intr-range-sm", cl::init(20),
|
|||||||
namespace {
|
namespace {
|
||||||
class NVVMIntrRange : public FunctionPass {
|
class NVVMIntrRange : public FunctionPass {
|
||||||
private:
|
private:
|
||||||
struct {
|
unsigned SmVersion;
|
||||||
unsigned x, y, z;
|
|
||||||
} MaxBlockSize, MaxGridSize;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static char ID;
|
static char ID;
|
||||||
NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {}
|
NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {}
|
||||||
NVVMIntrRange(unsigned int SmVersion) : FunctionPass(ID) {
|
NVVMIntrRange(unsigned int SmVersion)
|
||||||
MaxBlockSize.x = 1024;
|
: FunctionPass(ID), SmVersion(SmVersion) {
|
||||||
MaxBlockSize.y = 1024;
|
|
||||||
MaxBlockSize.z = 64;
|
|
||||||
|
|
||||||
MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff;
|
|
||||||
MaxGridSize.y = 0xffff;
|
|
||||||
MaxGridSize.z = 0xffff;
|
|
||||||
|
|
||||||
initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry());
|
initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry());
|
||||||
}
|
}
|
||||||
@ -79,7 +72,18 @@ static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool NVVMIntrRange::runOnFunction(Function &F) {
|
static bool runNVVMIntrRange(Function &F, unsigned SmVersion) {
|
||||||
|
struct {
|
||||||
|
unsigned x, y, z;
|
||||||
|
} MaxBlockSize, MaxGridSize;
|
||||||
|
MaxBlockSize.x = 1024;
|
||||||
|
MaxBlockSize.y = 1024;
|
||||||
|
MaxBlockSize.z = 64;
|
||||||
|
|
||||||
|
MaxGridSize.x = SmVersion >= 30 ? 0x7fffffff : 0xffff;
|
||||||
|
MaxGridSize.y = 0xffff;
|
||||||
|
MaxGridSize.z = 0xffff;
|
||||||
|
|
||||||
// Go through the calls in this function.
|
// Go through the calls in this function.
|
||||||
bool Changed = false;
|
bool Changed = false;
|
||||||
for (Instruction &I : instructions(F)) {
|
for (Instruction &I : instructions(F)) {
|
||||||
@ -151,3 +155,15 @@ bool NVVMIntrRange::runOnFunction(Function &F) {
|
|||||||
|
|
||||||
return Changed;
|
return Changed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool NVVMIntrRange::runOnFunction(Function &F) {
|
||||||
|
return runNVVMIntrRange(F, SmVersion);
|
||||||
|
}
|
||||||
|
|
||||||
|
NVVMIntrRangePass::NVVMIntrRangePass() : NVVMIntrRangePass(NVVMIntrRangeSM) {}
|
||||||
|
|
||||||
|
PreservedAnalyses NVVMIntrRangePass::run(Function &F,
|
||||||
|
FunctionAnalysisManager &AM) {
|
||||||
|
return runNVVMIntrRange(F, SmVersion) ? PreservedAnalyses::none()
|
||||||
|
: PreservedAnalyses::all();
|
||||||
|
}
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#include "llvm/IR/Intrinsics.h"
|
#include "llvm/IR/Intrinsics.h"
|
||||||
#include "llvm/IR/IntrinsicsNVPTX.h"
|
#include "llvm/IR/IntrinsicsNVPTX.h"
|
||||||
#include "llvm/IR/Module.h"
|
#include "llvm/IR/Module.h"
|
||||||
|
#include "llvm/IR/PassManager.h"
|
||||||
#include "llvm/IR/Type.h"
|
#include "llvm/IR/Type.h"
|
||||||
#include "llvm/Pass.h"
|
#include "llvm/Pass.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
@ -73,7 +74,7 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
|
|||||||
"Replace occurrences of __nvvm_reflect() calls with 0/1", false,
|
"Replace occurrences of __nvvm_reflect() calls with 0/1", false,
|
||||||
false)
|
false)
|
||||||
|
|
||||||
bool NVVMReflect::runOnFunction(Function &F) {
|
static bool runNVVMReflect(Function &F, unsigned SmVersion) {
|
||||||
if (!NVVMReflectEnabled)
|
if (!NVVMReflectEnabled)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -179,3 +180,15 @@ bool NVVMReflect::runOnFunction(Function &F) {
|
|||||||
|
|
||||||
return ToRemove.size() > 0;
|
return ToRemove.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool NVVMReflect::runOnFunction(Function &F) {
|
||||||
|
return runNVVMReflect(F, SmVersion);
|
||||||
|
}
|
||||||
|
|
||||||
|
NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {}
|
||||||
|
|
||||||
|
PreservedAnalyses NVVMReflectPass::run(Function &F,
|
||||||
|
FunctionAnalysisManager &AM) {
|
||||||
|
return runNVVMReflect(F, SmVersion) ? PreservedAnalyses::none()
|
||||||
|
: PreservedAnalyses::all();
|
||||||
|
}
|
||||||
|
@ -2,9 +2,14 @@
|
|||||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
|
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
|
||||||
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \
|
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \
|
||||||
; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
|
; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
|
||||||
|
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-intr-range \
|
||||||
|
; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
|
||||||
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
|
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
|
||||||
; RUN: -nvvm-intr-range -nvvm-intr-range-sm=30 \
|
; RUN: -nvvm-intr-range -nvvm-intr-range-sm=30 \
|
||||||
; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
|
; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
|
||||||
|
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
|
||||||
|
; RUN: -passes=nvvm-intr-range -nvvm-intr-range-sm=30 \
|
||||||
|
; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
|
||||||
|
|
||||||
define ptx_device i32 @test_tid_x() {
|
define ptx_device i32 @test_tid_x() {
|
||||||
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
|
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
; RUN: opt < %s -S -nvvm-reflect | FileCheck %s
|
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-reflect | FileCheck %s
|
||||||
|
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-reflect | FileCheck %s
|
||||||
|
|
||||||
declare i32 @__nvvm_reflect(i8*)
|
declare i32 @__nvvm_reflect(i8*)
|
||||||
@str = private unnamed_addr addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00"
|
@str = private unnamed_addr addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00"
|
||||||
|
@ -3,12 +3,12 @@
|
|||||||
|
|
||||||
; RUN: cat %s > %t.noftz
|
; RUN: cat %s > %t.noftz
|
||||||
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
|
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
|
||||||
; RUN: opt %t.noftz -S -nvvm-reflect -O2 \
|
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -nvvm-reflect -O2 \
|
||||||
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
|
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
|
||||||
|
|
||||||
; RUN: cat %s > %t.ftz
|
; RUN: cat %s > %t.ftz
|
||||||
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
|
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
|
||||||
; RUN: opt %t.ftz -S -nvvm-reflect -O2 \
|
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -nvvm-reflect -O2 \
|
||||||
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
|
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
|
||||||
|
|
||||||
@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
|
@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
|
||||||
|
@ -463,6 +463,8 @@ struct TimeTracerRAII {
|
|||||||
// it exists.
|
// it exists.
|
||||||
static bool shouldPinPassToLegacyPM(StringRef Pass) {
|
static bool shouldPinPassToLegacyPM(StringRef Pass) {
|
||||||
std::vector<StringRef> PassNameExactToIgnore = {
|
std::vector<StringRef> PassNameExactToIgnore = {
|
||||||
|
"nvvm-reflect",
|
||||||
|
"nvvm-intr-range",
|
||||||
"amdgpu-simplifylib",
|
"amdgpu-simplifylib",
|
||||||
"amdgpu-usenative",
|
"amdgpu-usenative",
|
||||||
"amdgpu-promote-alloca",
|
"amdgpu-promote-alloca",
|
||||||
|
Loading…
Reference in New Issue
Block a user