1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00
llvm-mirror/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Alexander Belyaev 4bc5332eae [llvm] Inline getAssociatedFunction() in LLVM_DEBUG.
Function* F is used only inside LLVM_DEBUG, so that it causes unused
variable warning.
2021-07-24 11:49:21 +02:00

529 lines
17 KiB
C++

//===- AMDGPUAttributor.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/Attributor.h"
#define DEBUG_TYPE "amdgpu-attributor"
using namespace llvm;
static constexpr StringLiteral ImplicitAttrNames[] = {
// X ids unnecessarily propagated to kernels.
"amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
"amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
"amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
"amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
"amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};
// We do not need to note the x workitem or workgroup id because they are always
// initialized.
//
// TODO: We should not add the attributes if the known compile time workgroup
// size is 1 for y/z.
static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
bool &IsQueuePtr) {
switch (ID) {
case Intrinsic::amdgcn_workitem_id_x:
NonKernelOnly = true;
return "amdgpu-work-item-id-x";
case Intrinsic::amdgcn_workgroup_id_x:
NonKernelOnly = true;
return "amdgpu-work-group-id-x";
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::r600_read_tidig_y:
return "amdgpu-work-item-id-y";
case Intrinsic::amdgcn_workitem_id_z:
case Intrinsic::r600_read_tidig_z:
return "amdgpu-work-item-id-z";
case Intrinsic::amdgcn_workgroup_id_y:
case Intrinsic::r600_read_tgid_y:
return "amdgpu-work-group-id-y";
case Intrinsic::amdgcn_workgroup_id_z:
case Intrinsic::r600_read_tgid_z:
return "amdgpu-work-group-id-z";
case Intrinsic::amdgcn_dispatch_ptr:
return "amdgpu-dispatch-ptr";
case Intrinsic::amdgcn_dispatch_id:
return "amdgpu-dispatch-id";
case Intrinsic::amdgcn_kernarg_segment_ptr:
return "amdgpu-kernarg-segment-ptr";
case Intrinsic::amdgcn_implicitarg_ptr:
return "amdgpu-implicitarg-ptr";
case Intrinsic::amdgcn_queue_ptr:
case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private:
// TODO: Does not require queue ptr on gfx9+
case Intrinsic::trap:
case Intrinsic::debugtrap:
IsQueuePtr = true;
return "amdgpu-queue-ptr";
default:
return "";
}
}
static bool castRequiresQueuePtr(unsigned SrcAS) {
return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
}
static bool isDSAddress(const Constant *C) {
const GlobalValue *GV = dyn_cast<GlobalValue>(C);
if (!GV)
return false;
unsigned AS = GV->getAddressSpace();
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
}
class AMDGPUInformationCache : public InformationCache {
public:
AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
BumpPtrAllocator &Allocator,
SetVector<Function *> *CGSCC, TargetMachine &TM)
: InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
TargetMachine &TM;
enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
/// Check if the subtarget has aperture regs.
bool hasApertureRegs(Function &F) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
return ST.hasApertureRegs();
}
private:
/// Check if the ConstantExpr \p CE requires queue ptr attribute.
static bool visitConstExpr(const ConstantExpr *CE) {
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
return castRequiresQueuePtr(SrcAS);
}
return false;
}
/// Get the constant access bitmap for \p C.
uint8_t getConstantAccess(const Constant *C) {
auto It = ConstantStatus.find(C);
if (It != ConstantStatus.end())
return It->second;
uint8_t Result = 0;
if (isDSAddress(C))
Result = DS_GLOBAL;
if (const auto *CE = dyn_cast<ConstantExpr>(C))
if (visitConstExpr(CE))
Result |= ADDR_SPACE_CAST;
for (const Use &U : C->operands()) {
const auto *OpC = dyn_cast<Constant>(U);
if (!OpC)
continue;
Result |= getConstantAccess(OpC);
}
return Result;
}
public:
/// Returns true if \p Fn needs a queue ptr attribute because of \p C.
bool needsQueuePtr(const Constant *C, Function &Fn) {
bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
bool HasAperture = hasApertureRegs(Fn);
// No need to explore the constants.
if (!IsNonEntryFunc && HasAperture)
return false;
uint8_t Access = getConstantAccess(C);
// We need to trap on DS globals in non-entry functions.
if (IsNonEntryFunc && (Access & DS_GLOBAL))
return true;
return !HasAperture && (Access & ADDR_SPACE_CAST);
}
private:
/// Used to determine if the Constant needs a queue ptr attribute.
DenseMap<const Constant *, uint8_t> ConstantStatus;
};
struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Create an abstract attribute view for the position \p IRP.
static AAAMDAttributes &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName().
const std::string getName() const override { return "AAAMDAttributes"; }
/// See AbstractAttribute::getIdAddr().
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAAMDAttributes.
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
virtual const DenseSet<StringRef> &getAttributes() const = 0;
/// Unique ID (due to the unique address)
static const char ID;
};
const char AAAMDAttributes::ID = 0;
struct AAAMDWorkGroupSize
: public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Create an abstract attribute view for the position \p IRP.
static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName().
const std::string getName() const override { return "AAAMDWorkGroupSize"; }
/// See AbstractAttribute::getIdAddr().
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAAMDAttributes.
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
const char AAAMDWorkGroupSize::ID = 0;
struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
: AAAMDWorkGroupSize(IRP, A) {}
void initialize(Attributor &A) override {
Function *F = getAssociatedFunction();
CallingConv::ID CC = F->getCallingConv();
if (CC != CallingConv::AMDGPU_KERNEL)
return;
bool InitialValue = false;
if (F->hasFnAttribute("uniform-work-group-size"))
InitialValue = F->getFnAttribute("uniform-work-group-size")
.getValueAsString()
.equals("true");
if (InitialValue)
indicateOptimisticFixpoint();
else
indicatePessimisticFixpoint();
}
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Change = ChangeStatus::UNCHANGED;
auto CheckCallSite = [&](AbstractCallSite CS) {
Function *Caller = CS.getInstruction()->getFunction();
LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
<< "->" << getAssociatedFunction()->getName() << "\n");
const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
Change = Change | clampStateAndIndicateChange(this->getState(),
CallerInfo.getState());
return true;
};
bool AllCallSitesKnown = true;
if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
indicatePessimisticFixpoint();
return Change;
}
ChangeStatus manifest(Attributor &A) override {
SmallVector<Attribute, 8> AttrList;
LLVMContext &Ctx = getAssociatedFunction()->getContext();
AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
getAssumed() ? "true" : "false"));
return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
/* ForceReplace */ true);
}
bool isValidState() const override {
// This state is always valid, even when the state is false.
return true;
}
const std::string getAsStr() const override {
return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
Attributor &A) {
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
}
struct AAAMDAttributesFunction : public AAAMDAttributes {
AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
: AAAMDAttributes(IRP, A) {}
void initialize(Attributor &A) override {
Function *F = getAssociatedFunction();
CallingConv::ID CC = F->getCallingConv();
bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
// Don't add attributes to instrinsics
if (F->isIntrinsic()) {
indicatePessimisticFixpoint();
return;
}
// Ignore functions with graphics calling conventions, these are currently
// not allowed to have kernel arguments.
if (AMDGPU::isGraphics(F->getCallingConv())) {
indicatePessimisticFixpoint();
return;
}
for (StringRef Attr : ImplicitAttrNames) {
if (F->hasFnAttribute(Attr))
Attributes.insert(Attr);
}
// TODO: We shouldn't need this in the future.
if (CallingConvSupportsAllImplicits &&
F->hasAddressTaken(nullptr, true, true, true)) {
for (StringRef AttrName : ImplicitAttrNames) {
Attributes.insert(AttrName);
}
}
}
ChangeStatus updateImpl(Attributor &A) override {
Function *F = getAssociatedFunction();
ChangeStatus Change = ChangeStatus::UNCHANGED;
bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
CallingConv::ID CC = F->getCallingConv();
bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
auto AddAttribute = [&](StringRef AttrName) {
if (Attributes.insert(AttrName).second)
Change = ChangeStatus::CHANGED;
};
// Check for Intrinsics and propagate attributes.
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, this->getIRPosition(), DepClassTy::REQUIRED);
// We have to assume that we can reach a function with these attributes.
// We do not consider inline assembly as a unknown callee.
if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
for (StringRef AttrName : ImplicitAttrNames) {
AddAttribute(AttrName);
}
}
bool NeedsQueuePtr = false;
bool HasCall = false;
for (Function *Callee : AAEdges.getOptimisticEdges()) {
Intrinsic::ID IID = Callee->getIntrinsicID();
if (IID != Intrinsic::not_intrinsic) {
if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
AddAttribute("amdgpu-kernarg-segment-ptr");
continue;
}
bool NonKernelOnly = false;
StringRef AttrName =
intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);
if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
AddAttribute(AttrName);
continue;
}
HasCall = true;
const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
*this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
// Propagate implicit attributes from called function.
for (StringRef AttrName : ImplicitAttrNames)
if (CalleeAttributes.count(AttrName))
AddAttribute(AttrName);
}
HasCall |= AAEdges.hasUnknownCallee();
if (!IsNonEntryFunc && HasCall)
AddAttribute("amdgpu-calls");
// Check the function body.
auto CheckAlloca = [&](Instruction &I) {
AddAttribute("amdgpu-stack-objects");
return false;
};
bool UsedAssumedInformation = false;
A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca},
UsedAssumedInformation);
// If we found that we need amdgpu-queue-ptr, nothing else to do.
if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
AddAttribute("amdgpu-queue-ptr");
return Change;
}
auto CheckAddrSpaceCasts = [&](Instruction &I) {
unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
if (castRequiresQueuePtr(SrcAS)) {
NeedsQueuePtr = true;
return false;
}
return true;
};
bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
// `checkForAllInstructions` is much more cheaper than going through all
// instructions, try it first.
// amdgpu-queue-ptr is not needed if aperture regs is present.
if (!HasApertureRegs)
A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
{Instruction::AddrSpaceCast},
UsedAssumedInformation);
// If we found that we need amdgpu-queue-ptr, nothing else to do.
if (NeedsQueuePtr) {
AddAttribute("amdgpu-queue-ptr");
return Change;
}
if (!IsNonEntryFunc && HasApertureRegs)
return Change;
for (BasicBlock &BB : *F) {
for (Instruction &I : BB) {
for (const Use &U : I.operands()) {
if (const auto *C = dyn_cast<Constant>(U)) {
if (InfoCache.needsQueuePtr(C, *F)) {
AddAttribute("amdgpu-queue-ptr");
return Change;
}
}
}
}
}
return Change;
}
ChangeStatus manifest(Attributor &A) override {
SmallVector<Attribute, 8> AttrList;
LLVMContext &Ctx = getAssociatedFunction()->getContext();
for (StringRef AttrName : Attributes)
AttrList.push_back(Attribute::get(Ctx, AttrName));
return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
/* ForceReplace */ true);
}
const std::string getAsStr() const override {
return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
}
const DenseSet<StringRef> &getAttributes() const override {
return Attributes;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
private:
DenseSet<StringRef> Attributes;
};
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
Attributor &A) {
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
llvm_unreachable("AAAMDAttributes is only valid for function position");
}
class AMDGPUAttributor : public ModulePass {
public:
AMDGPUAttributor() : ModulePass(ID) {}
/// doInitialization - Virtual method overridden by subclasses to do
/// any necessary initialization before any pass is run.
bool doInitialization(Module &) override {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
report_fatal_error("TargetMachine is required");
TM = &TPC->getTM<TargetMachine>();
return false;
}
bool runOnModule(Module &M) override {
SetVector<Function *> Functions;
AnalysisGetter AG;
for (Function &F : M)
Functions.insert(&F);
CallGraphUpdater CGUpdater;
BumpPtrAllocator Allocator;
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
Attributor A(Functions, InfoCache, CGUpdater);
for (Function &F : M) {
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
}
ChangeStatus Change = A.run();
return Change == ChangeStatus::CHANGED;
}
StringRef getPassName() const override { return "AMDGPU Attributor"; }
TargetMachine *TM;
static char ID;
};
char AMDGPUAttributor::ID = 0;
Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)