mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
Revert "AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination into TargetParser."
This reverts commit r341982. The change introduced a layering violation. Reverting to unbreak our integrate. llvm-svn: 342023
This commit is contained in:
parent
a6bdfdd0ca
commit
01270e7294
@ -320,13 +320,6 @@ enum GPUKind : uint32_t {
|
|||||||
GK_AMDGCN_LAST = GK_GFX906,
|
GK_AMDGCN_LAST = GK_GFX906,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Instruction set architecture version.
|
|
||||||
struct IsaVersion {
|
|
||||||
unsigned Major;
|
|
||||||
unsigned Minor;
|
|
||||||
unsigned Stepping;
|
|
||||||
};
|
|
||||||
|
|
||||||
// This isn't comprehensive for now, just things that are needed from the
|
// This isn't comprehensive for now, just things that are needed from the
|
||||||
// frontend driver.
|
// frontend driver.
|
||||||
enum ArchFeatureKind : uint32_t {
|
enum ArchFeatureKind : uint32_t {
|
||||||
@ -342,22 +335,18 @@ enum ArchFeatureKind : uint32_t {
|
|||||||
FEATURE_FAST_DENORMAL_F32 = 1 << 5
|
FEATURE_FAST_DENORMAL_F32 = 1 << 5
|
||||||
};
|
};
|
||||||
|
|
||||||
|
GPUKind parseArchAMDGCN(StringRef CPU);
|
||||||
|
GPUKind parseArchR600(StringRef CPU);
|
||||||
StringRef getArchNameAMDGCN(GPUKind AK);
|
StringRef getArchNameAMDGCN(GPUKind AK);
|
||||||
StringRef getArchNameR600(GPUKind AK);
|
StringRef getArchNameR600(GPUKind AK);
|
||||||
StringRef getCanonicalArchName(StringRef Arch);
|
StringRef getCanonicalArchName(StringRef Arch);
|
||||||
GPUKind parseArchAMDGCN(StringRef CPU);
|
|
||||||
GPUKind parseArchR600(StringRef CPU);
|
|
||||||
unsigned getArchAttrAMDGCN(GPUKind AK);
|
unsigned getArchAttrAMDGCN(GPUKind AK);
|
||||||
unsigned getArchAttrR600(GPUKind AK);
|
unsigned getArchAttrR600(GPUKind AK);
|
||||||
|
|
||||||
void fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values);
|
void fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values);
|
||||||
void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
|
void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
|
||||||
|
|
||||||
StringRef getArchNameFromElfMach(unsigned ElfMach);
|
}
|
||||||
unsigned getElfMach(StringRef GPU);
|
|
||||||
IsaVersion getIsaVersion(StringRef GPU);
|
|
||||||
|
|
||||||
} // namespace AMDGPU
|
|
||||||
|
|
||||||
} // namespace llvm
|
} // namespace llvm
|
||||||
|
|
||||||
|
@ -17,13 +17,11 @@
|
|||||||
#include "llvm/ADT/ArrayRef.h"
|
#include "llvm/ADT/ArrayRef.h"
|
||||||
#include "llvm/ADT/StringSwitch.h"
|
#include "llvm/ADT/StringSwitch.h"
|
||||||
#include "llvm/ADT/Twine.h"
|
#include "llvm/ADT/Twine.h"
|
||||||
#include "llvm/BinaryFormat/ELF.h"
|
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
using namespace ARM;
|
using namespace ARM;
|
||||||
using namespace AArch64;
|
using namespace AArch64;
|
||||||
using namespace AMDGPU;
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -949,8 +947,6 @@ bool llvm::AArch64::isX18ReservedByDefault(const Triple &TT) {
|
|||||||
TT.isOSWindows();
|
TT.isOSWindows();
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
struct GPUInfo {
|
struct GPUInfo {
|
||||||
StringLiteral Name;
|
StringLiteral Name;
|
||||||
StringLiteral CanonicalName;
|
StringLiteral CanonicalName;
|
||||||
@ -958,9 +954,11 @@ struct GPUInfo {
|
|||||||
unsigned Features;
|
unsigned Features;
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr GPUInfo R600GPUs[26] = {
|
using namespace AMDGPU;
|
||||||
// Name Canonical Kind Features
|
static constexpr GPUInfo R600GPUs[26] = {
|
||||||
// Name
|
// Name Canonical Kind Features
|
||||||
|
// Name
|
||||||
|
//
|
||||||
{{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
|
{{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
|
||||||
{{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
|
{{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
|
||||||
{{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
|
{{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
|
||||||
@ -991,9 +989,9 @@ constexpr GPUInfo R600GPUs[26] = {
|
|||||||
|
|
||||||
// This table should be sorted by the value of GPUKind
|
// This table should be sorted by the value of GPUKind
|
||||||
// Don't bother listing the implicitly true features
|
// Don't bother listing the implicitly true features
|
||||||
constexpr GPUInfo AMDGCNGPUs[32] = {
|
static constexpr GPUInfo AMDGCNGPUs[32] = {
|
||||||
// Name Canonical Kind Features
|
// Name Canonical Kind Features
|
||||||
// Name
|
// Name
|
||||||
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
|
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
|
||||||
{{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
|
{{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
|
||||||
{{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
|
{{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
|
||||||
@ -1028,7 +1026,8 @@ constexpr GPUInfo AMDGCNGPUs[32] = {
|
|||||||
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||||
};
|
};
|
||||||
|
|
||||||
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
|
static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK,
|
||||||
|
ArrayRef<GPUInfo> Table) {
|
||||||
GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
|
GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
|
||||||
|
|
||||||
auto I = std::lower_bound(Table.begin(), Table.end(), Search,
|
auto I = std::lower_bound(Table.begin(), Table.end(), Search,
|
||||||
@ -1041,8 +1040,6 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
|
|||||||
return I;
|
return I;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
|
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
|
||||||
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
|
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
|
||||||
return Entry->CanonicalName;
|
return Entry->CanonicalName;
|
||||||
@ -1095,118 +1092,3 @@ void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
|
|||||||
for (const auto C : R600GPUs)
|
for (const auto C : R600GPUs)
|
||||||
Values.push_back(C.Name);
|
Values.push_back(C.Name);
|
||||||
}
|
}
|
||||||
|
|
||||||
StringRef AMDGPU::getArchNameFromElfMach(unsigned ElfMach) {
|
|
||||||
AMDGPU::GPUKind AK;
|
|
||||||
|
|
||||||
switch (ElfMach) {
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
|
|
||||||
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
|
|
||||||
}
|
|
||||||
|
|
||||||
StringRef GPUName = getArchNameAMDGCN(AK);
|
|
||||||
if (GPUName != "")
|
|
||||||
return GPUName;
|
|
||||||
return getArchNameR600(AK);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned AMDGPU::getElfMach(StringRef GPU) {
|
|
||||||
AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
|
|
||||||
if (AK == AMDGPU::GPUKind::GK_NONE)
|
|
||||||
AK = parseArchR600(GPU);
|
|
||||||
|
|
||||||
switch (AK) {
|
|
||||||
case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
|
|
||||||
case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
|
|
||||||
case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
|
|
||||||
case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
|
|
||||||
case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
|
|
||||||
case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
|
|
||||||
case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
|
|
||||||
case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
|
|
||||||
case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
|
|
||||||
case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
|
|
||||||
case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
|
|
||||||
case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
|
|
||||||
case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
|
|
||||||
case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
|
|
||||||
case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
|
|
||||||
case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
|
|
||||||
case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
|
|
||||||
case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
|
|
||||||
case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
|
|
||||||
case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
|
|
||||||
case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
|
|
||||||
case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
|
|
||||||
case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
|
|
||||||
case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
|
|
||||||
case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
|
|
||||||
case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
|
|
||||||
case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
|
|
||||||
case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
|
|
||||||
case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
|
|
||||||
case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
|
|
||||||
case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
|
|
||||||
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
llvm_unreachable("unknown GPU");
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
|
|
||||||
if (GPU == "generic")
|
|
||||||
return {7, 0, 0};
|
|
||||||
|
|
||||||
AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
|
|
||||||
if (AK == AMDGPU::GPUKind::GK_NONE)
|
|
||||||
return {0, 0, 0};
|
|
||||||
|
|
||||||
switch (AK) {
|
|
||||||
case GK_GFX600: return {6, 0, 0};
|
|
||||||
case GK_GFX601: return {6, 0, 1};
|
|
||||||
case GK_GFX700: return {7, 0, 0};
|
|
||||||
case GK_GFX701: return {7, 0, 1};
|
|
||||||
case GK_GFX702: return {7, 0, 2};
|
|
||||||
case GK_GFX703: return {7, 0, 3};
|
|
||||||
case GK_GFX704: return {7, 0, 4};
|
|
||||||
case GK_GFX801: return {8, 0, 1};
|
|
||||||
case GK_GFX802: return {8, 0, 2};
|
|
||||||
case GK_GFX803: return {8, 0, 3};
|
|
||||||
case GK_GFX810: return {8, 1, 0};
|
|
||||||
case GK_GFX900: return {9, 0, 0};
|
|
||||||
case GK_GFX902: return {9, 0, 2};
|
|
||||||
case GK_GFX904: return {9, 0, 4};
|
|
||||||
case GK_GFX906: return {9, 0, 6};
|
|
||||||
default: return {0, 0, 0};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -40,7 +40,6 @@
|
|||||||
#include "llvm/MC/MCStreamer.h"
|
#include "llvm/MC/MCStreamer.h"
|
||||||
#include "llvm/Support/AMDGPUMetadata.h"
|
#include "llvm/Support/AMDGPUMetadata.h"
|
||||||
#include "llvm/Support/MathExtras.h"
|
#include "llvm/Support/MathExtras.h"
|
||||||
#include "llvm/Support/TargetParser.h"
|
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
#include "llvm/Target/TargetLoweringObjectFile.h"
|
||||||
|
|
||||||
@ -135,9 +134,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
|||||||
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
|
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
|
||||||
|
|
||||||
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
|
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
|
||||||
IsaVersion Version = getIsaVersion(getSTI()->getCPU());
|
IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
|
||||||
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
|
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
|
||||||
Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
|
ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
|
||||||
}
|
}
|
||||||
|
|
||||||
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
|
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
|
||||||
@ -241,7 +240,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
|
|||||||
*getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
|
*getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
|
||||||
CurrentProgramInfo.NumVGPRsForWavesPerEU,
|
CurrentProgramInfo.NumVGPRsForWavesPerEU,
|
||||||
CurrentProgramInfo.NumSGPRsForWavesPerEU -
|
CurrentProgramInfo.NumSGPRsForWavesPerEU -
|
||||||
IsaInfo::getNumExtraSGPRs(getSTI(),
|
IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
|
||||||
CurrentProgramInfo.VCCUsed,
|
CurrentProgramInfo.VCCUsed,
|
||||||
CurrentProgramInfo.FlatUsed),
|
CurrentProgramInfo.FlatUsed),
|
||||||
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
|
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
|
||||||
@ -562,7 +561,7 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
|
|||||||
|
|
||||||
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
|
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
|
||||||
const GCNSubtarget &ST) const {
|
const GCNSubtarget &ST) const {
|
||||||
return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
|
return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
|
||||||
UsesVCC, UsesFlatScratch);
|
UsesVCC, UsesFlatScratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -759,7 +758,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
|
|||||||
|
|
||||||
// 48 SGPRs - vcc, - flat_scr, -xnack
|
// 48 SGPRs - vcc, - flat_scr, -xnack
|
||||||
int MaxSGPRGuess =
|
int MaxSGPRGuess =
|
||||||
47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
|
47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
|
||||||
ST.hasFlatAddressSpace());
|
ST.hasFlatAddressSpace());
|
||||||
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
|
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
|
||||||
MaxVGPR = std::max(MaxVGPR, 23);
|
MaxVGPR = std::max(MaxVGPR, 23);
|
||||||
@ -824,7 +823,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||||||
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
|
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
|
||||||
// unified.
|
// unified.
|
||||||
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
|
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
|
||||||
getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
|
STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
|
||||||
|
|
||||||
// Check the addressable register limit before we add ExtraSGPRs.
|
// Check the addressable register limit before we add ExtraSGPRs.
|
||||||
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
|
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
|
||||||
@ -906,9 +905,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
|
ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
|
||||||
getSTI(), ProgInfo.NumSGPRsForWavesPerEU);
|
STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
|
||||||
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
|
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
|
||||||
getSTI(), ProgInfo.NumVGPRsForWavesPerEU);
|
STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
|
||||||
|
|
||||||
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
|
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
|
||||||
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
|
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
|
||||||
@ -1138,7 +1137,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
|
|||||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||||
|
|
||||||
AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
|
AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
|
||||||
|
|
||||||
Out.compute_pgm_resource_registers =
|
Out.compute_pgm_resource_registers =
|
||||||
CurrentProgramInfo.ComputePGMRSrc1 |
|
CurrentProgramInfo.ComputePGMRSrc1 |
|
||||||
|
@ -124,8 +124,10 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
|
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
|
||||||
|
const FeatureBitset &FeatureBits) :
|
||||||
TargetTriple(TT),
|
TargetTriple(TT),
|
||||||
|
SubtargetFeatureBits(FeatureBits),
|
||||||
Has16BitInsts(false),
|
Has16BitInsts(false),
|
||||||
HasMadMixInsts(false),
|
HasMadMixInsts(false),
|
||||||
FP32Denormals(false),
|
FP32Denormals(false),
|
||||||
@ -142,9 +144,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
|
|||||||
{ }
|
{ }
|
||||||
|
|
||||||
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||||
const GCNTargetMachine &TM) :
|
const GCNTargetMachine &TM) :
|
||||||
AMDGPUGenSubtargetInfo(TT, GPU, FS),
|
AMDGPUGenSubtargetInfo(TT, GPU, FS),
|
||||||
AMDGPUSubtarget(TT),
|
AMDGPUSubtarget(TT, getFeatureBits()),
|
||||||
TargetTriple(TT),
|
TargetTriple(TT),
|
||||||
Gen(SOUTHERN_ISLANDS),
|
Gen(SOUTHERN_ISLANDS),
|
||||||
IsaVersion(ISAVersion0_0_0),
|
IsaVersion(ISAVersion0_0_0),
|
||||||
@ -446,7 +448,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
|
|||||||
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||||
const TargetMachine &TM) :
|
const TargetMachine &TM) :
|
||||||
R600GenSubtargetInfo(TT, GPU, FS),
|
R600GenSubtargetInfo(TT, GPU, FS),
|
||||||
AMDGPUSubtarget(TT),
|
AMDGPUSubtarget(TT, getFeatureBits()),
|
||||||
InstrInfo(*this),
|
InstrInfo(*this),
|
||||||
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
|
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
|
||||||
FMA(false),
|
FMA(false),
|
||||||
|
@ -63,6 +63,7 @@ private:
|
|||||||
Triple TargetTriple;
|
Triple TargetTriple;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
const FeatureBitset &SubtargetFeatureBits;
|
||||||
bool Has16BitInsts;
|
bool Has16BitInsts;
|
||||||
bool HasMadMixInsts;
|
bool HasMadMixInsts;
|
||||||
bool FP32Denormals;
|
bool FP32Denormals;
|
||||||
@ -78,7 +79,7 @@ protected:
|
|||||||
unsigned WavefrontSize;
|
unsigned WavefrontSize;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
AMDGPUSubtarget(const Triple &TT);
|
AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
|
||||||
|
|
||||||
static const AMDGPUSubtarget &get(const MachineFunction &MF);
|
static const AMDGPUSubtarget &get(const MachineFunction &MF);
|
||||||
static const AMDGPUSubtarget &get(const TargetMachine &TM,
|
static const AMDGPUSubtarget &get(const TargetMachine &TM,
|
||||||
@ -202,21 +203,33 @@ public:
|
|||||||
|
|
||||||
/// \returns Maximum number of work groups per compute unit supported by the
|
/// \returns Maximum number of work groups per compute unit supported by the
|
||||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||||
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
|
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
|
||||||
|
return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
|
||||||
|
FlatWorkGroupSize);
|
||||||
|
}
|
||||||
|
|
||||||
/// \returns Minimum flat work group size supported by the subtarget.
|
/// \returns Minimum flat work group size supported by the subtarget.
|
||||||
virtual unsigned getMinFlatWorkGroupSize() const = 0;
|
unsigned getMinFlatWorkGroupSize() const {
|
||||||
|
return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
|
||||||
|
}
|
||||||
|
|
||||||
/// \returns Maximum flat work group size supported by the subtarget.
|
/// \returns Maximum flat work group size supported by the subtarget.
|
||||||
virtual unsigned getMaxFlatWorkGroupSize() const = 0;
|
unsigned getMaxFlatWorkGroupSize() const {
|
||||||
|
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
|
||||||
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of waves per execution unit supported by the
|
/// \returns Maximum number of waves per execution unit supported by the
|
||||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||||
virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
|
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
|
||||||
|
return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
|
||||||
|
FlatWorkGroupSize);
|
||||||
|
}
|
||||||
|
|
||||||
/// \returns Minimum number of waves per execution unit supported by the
|
/// \returns Minimum number of waves per execution unit supported by the
|
||||||
/// subtarget.
|
/// subtarget.
|
||||||
virtual unsigned getMinWavesPerEU() const = 0;
|
unsigned getMinWavesPerEU() const {
|
||||||
|
return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
|
||||||
|
}
|
||||||
|
|
||||||
unsigned getMaxWavesPerEU() const { return 10; }
|
unsigned getMaxWavesPerEU() const { return 10; }
|
||||||
|
|
||||||
@ -695,19 +708,20 @@ public:
|
|||||||
/// \returns Number of execution units per compute unit supported by the
|
/// \returns Number of execution units per compute unit supported by the
|
||||||
/// subtarget.
|
/// subtarget.
|
||||||
unsigned getEUsPerCU() const {
|
unsigned getEUsPerCU() const {
|
||||||
return AMDGPU::IsaInfo::getEUsPerCU(this);
|
return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of waves per compute unit supported by the
|
/// \returns Maximum number of waves per compute unit supported by the
|
||||||
/// subtarget without any kind of limitation.
|
/// subtarget without any kind of limitation.
|
||||||
unsigned getMaxWavesPerCU() const {
|
unsigned getMaxWavesPerCU() const {
|
||||||
return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
|
return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of waves per compute unit supported by the
|
/// \returns Maximum number of waves per compute unit supported by the
|
||||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
|
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
|
||||||
return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
|
return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
|
||||||
|
FlatWorkGroupSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of waves per execution unit supported by the
|
/// \returns Maximum number of waves per execution unit supported by the
|
||||||
@ -719,7 +733,8 @@ public:
|
|||||||
/// \returns Number of waves per work group supported by the subtarget and
|
/// \returns Number of waves per work group supported by the subtarget and
|
||||||
/// limited by given \p FlatWorkGroupSize.
|
/// limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
|
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
|
||||||
return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
|
return AMDGPU::IsaInfo::getWavesPerWorkGroup(
|
||||||
|
MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
// static wrappers
|
// static wrappers
|
||||||
@ -838,34 +853,39 @@ public:
|
|||||||
|
|
||||||
/// \returns SGPR allocation granularity supported by the subtarget.
|
/// \returns SGPR allocation granularity supported by the subtarget.
|
||||||
unsigned getSGPRAllocGranule() const {
|
unsigned getSGPRAllocGranule() const {
|
||||||
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
|
return AMDGPU::IsaInfo::getSGPRAllocGranule(
|
||||||
|
MCSubtargetInfo::getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns SGPR encoding granularity supported by the subtarget.
|
/// \returns SGPR encoding granularity supported by the subtarget.
|
||||||
unsigned getSGPREncodingGranule() const {
|
unsigned getSGPREncodingGranule() const {
|
||||||
return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
|
return AMDGPU::IsaInfo::getSGPREncodingGranule(
|
||||||
|
MCSubtargetInfo::getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Total number of SGPRs supported by the subtarget.
|
/// \returns Total number of SGPRs supported by the subtarget.
|
||||||
unsigned getTotalNumSGPRs() const {
|
unsigned getTotalNumSGPRs() const {
|
||||||
return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
|
return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Addressable number of SGPRs supported by the subtarget.
|
/// \returns Addressable number of SGPRs supported by the subtarget.
|
||||||
unsigned getAddressableNumSGPRs() const {
|
unsigned getAddressableNumSGPRs() const {
|
||||||
return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
|
return AMDGPU::IsaInfo::getAddressableNumSGPRs(
|
||||||
|
MCSubtargetInfo::getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
||||||
/// execution unit requirement supported by the subtarget.
|
/// execution unit requirement supported by the subtarget.
|
||||||
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
|
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
|
||||||
return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
|
return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
|
||||||
|
WavesPerEU);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
||||||
/// execution unit requirement supported by the subtarget.
|
/// execution unit requirement supported by the subtarget.
|
||||||
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
|
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
|
||||||
return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
|
return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
|
||||||
|
WavesPerEU, Addressable);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Reserved number of SGPRs for given function \p MF.
|
/// \returns Reserved number of SGPRs for given function \p MF.
|
||||||
@ -883,34 +903,39 @@ public:
|
|||||||
|
|
||||||
/// \returns VGPR allocation granularity supported by the subtarget.
|
/// \returns VGPR allocation granularity supported by the subtarget.
|
||||||
unsigned getVGPRAllocGranule() const {
|
unsigned getVGPRAllocGranule() const {
|
||||||
return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
|
return AMDGPU::IsaInfo::getVGPRAllocGranule(
|
||||||
|
MCSubtargetInfo::getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns VGPR encoding granularity supported by the subtarget.
|
/// \returns VGPR encoding granularity supported by the subtarget.
|
||||||
unsigned getVGPREncodingGranule() const {
|
unsigned getVGPREncodingGranule() const {
|
||||||
return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
|
return AMDGPU::IsaInfo::getVGPREncodingGranule(
|
||||||
|
MCSubtargetInfo::getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Total number of VGPRs supported by the subtarget.
|
/// \returns Total number of VGPRs supported by the subtarget.
|
||||||
unsigned getTotalNumVGPRs() const {
|
unsigned getTotalNumVGPRs() const {
|
||||||
return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
|
return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Addressable number of VGPRs supported by the subtarget.
|
/// \returns Addressable number of VGPRs supported by the subtarget.
|
||||||
unsigned getAddressableNumVGPRs() const {
|
unsigned getAddressableNumVGPRs() const {
|
||||||
return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
|
return AMDGPU::IsaInfo::getAddressableNumVGPRs(
|
||||||
|
MCSubtargetInfo::getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Minimum number of VGPRs that meets given number of waves per
|
/// \returns Minimum number of VGPRs that meets given number of waves per
|
||||||
/// execution unit requirement supported by the subtarget.
|
/// execution unit requirement supported by the subtarget.
|
||||||
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
|
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
|
||||||
return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
|
return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
|
||||||
|
WavesPerEU);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of VGPRs that meets given number of waves per
|
/// \returns Maximum number of VGPRs that meets given number of waves per
|
||||||
/// execution unit requirement supported by the subtarget.
|
/// execution unit requirement supported by the subtarget.
|
||||||
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
|
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
|
||||||
return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
|
return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
|
||||||
|
WavesPerEU);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
||||||
@ -926,34 +951,6 @@ public:
|
|||||||
void getPostRAMutations(
|
void getPostRAMutations(
|
||||||
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
|
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
|
||||||
const override;
|
const override;
|
||||||
|
|
||||||
/// \returns Maximum number of work groups per compute unit supported by the
|
|
||||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
|
||||||
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
|
|
||||||
return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Minimum flat work group size supported by the subtarget.
|
|
||||||
unsigned getMinFlatWorkGroupSize() const override {
|
|
||||||
return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Maximum flat work group size supported by the subtarget.
|
|
||||||
unsigned getMaxFlatWorkGroupSize() const override {
|
|
||||||
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Maximum number of waves per execution unit supported by the
|
|
||||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
|
||||||
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
|
|
||||||
return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Minimum number of waves per execution unit supported by the
|
|
||||||
/// subtarget.
|
|
||||||
unsigned getMinWavesPerEU() const override {
|
|
||||||
return AMDGPU::IsaInfo::getMinWavesPerEU(this);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class R600Subtarget final : public R600GenSubtargetInfo,
|
class R600Subtarget final : public R600GenSubtargetInfo,
|
||||||
@ -1064,34 +1061,6 @@ public:
|
|||||||
bool enableSubRegLiveness() const override {
|
bool enableSubRegLiveness() const override {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns Maximum number of work groups per compute unit supported by the
|
|
||||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
|
||||||
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
|
|
||||||
return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Minimum flat work group size supported by the subtarget.
|
|
||||||
unsigned getMinFlatWorkGroupSize() const override {
|
|
||||||
return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Maximum flat work group size supported by the subtarget.
|
|
||||||
unsigned getMaxFlatWorkGroupSize() const override {
|
|
||||||
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Maximum number of waves per execution unit supported by the
|
|
||||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
|
||||||
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
|
|
||||||
return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Minimum number of waves per execution unit supported by the
|
|
||||||
/// subtarget.
|
|
||||||
unsigned getMinWavesPerEU() const override {
|
|
||||||
return AMDGPU::IsaInfo::getMinWavesPerEU(this);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
@ -49,7 +49,6 @@
|
|||||||
#include "llvm/Support/MachineValueType.h"
|
#include "llvm/Support/MachineValueType.h"
|
||||||
#include "llvm/Support/MathExtras.h"
|
#include "llvm/Support/MathExtras.h"
|
||||||
#include "llvm/Support/SMLoc.h"
|
#include "llvm/Support/SMLoc.h"
|
||||||
#include "llvm/Support/TargetParser.h"
|
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -918,7 +917,8 @@ public:
|
|||||||
// Currently there is none suitable machinery in the core llvm-mc for this.
|
// Currently there is none suitable machinery in the core llvm-mc for this.
|
||||||
// MCSymbol::isRedefinable is intended for another purpose, and
|
// MCSymbol::isRedefinable is intended for another purpose, and
|
||||||
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
|
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
|
||||||
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
|
AMDGPU::IsaInfo::IsaVersion ISA =
|
||||||
|
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
|
||||||
MCContext &Ctx = getContext();
|
MCContext &Ctx = getContext();
|
||||||
if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
|
if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
|
||||||
MCSymbol *Sym =
|
MCSymbol *Sym =
|
||||||
@ -1826,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
|
|||||||
unsigned DwordRegIndex,
|
unsigned DwordRegIndex,
|
||||||
unsigned RegWidth) {
|
unsigned RegWidth) {
|
||||||
// Symbols are only defined for GCN targets
|
// Symbols are only defined for GCN targets
|
||||||
if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
|
if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
auto SymbolName = getGprCountSymbolName(RegKind);
|
auto SymbolName = getGprCountSymbolName(RegKind);
|
||||||
@ -2637,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
|
|||||||
unsigned &SGPRBlocks) {
|
unsigned &SGPRBlocks) {
|
||||||
// TODO(scott.linder): These calculations are duplicated from
|
// TODO(scott.linder): These calculations are duplicated from
|
||||||
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
|
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
|
||||||
IsaVersion Version = getIsaVersion(getSTI().getCPU());
|
IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
|
||||||
|
|
||||||
unsigned NumVGPRs = NextFreeVGPR;
|
unsigned NumVGPRs = NextFreeVGPR;
|
||||||
unsigned NumSGPRs = NextFreeSGPR;
|
unsigned NumSGPRs = NextFreeSGPR;
|
||||||
unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
|
unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
|
||||||
|
|
||||||
if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
|
if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
|
||||||
NumSGPRs > MaxAddressableNumSGPRs)
|
NumSGPRs > MaxAddressableNumSGPRs)
|
||||||
return OutOfRangeError(SGPRRange);
|
return OutOfRangeError(SGPRRange);
|
||||||
|
|
||||||
NumSGPRs +=
|
NumSGPRs +=
|
||||||
IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
|
IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
|
||||||
|
|
||||||
if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
|
if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
|
||||||
NumSGPRs > MaxAddressableNumSGPRs)
|
NumSGPRs > MaxAddressableNumSGPRs)
|
||||||
@ -2657,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
|
|||||||
if (Features.test(FeatureSGPRInitBug))
|
if (Features.test(FeatureSGPRInitBug))
|
||||||
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
||||||
|
|
||||||
VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
|
VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
|
||||||
SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
|
SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -2678,7 +2678,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
|
|||||||
|
|
||||||
StringSet<> Seen;
|
StringSet<> Seen;
|
||||||
|
|
||||||
IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
|
IsaInfo::IsaVersion IVersion =
|
||||||
|
IsaInfo::getIsaVersion(getSTI().getFeatureBits());
|
||||||
|
|
||||||
SMRange VGPRRange;
|
SMRange VGPRRange;
|
||||||
uint64_t NextFreeVGPR = 0;
|
uint64_t NextFreeVGPR = 0;
|
||||||
@ -2937,7 +2938,8 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
|
|||||||
// If this directive has no arguments, then use the ISA version for the
|
// If this directive has no arguments, then use the ISA version for the
|
||||||
// targeted GPU.
|
// targeted GPU.
|
||||||
if (getLexer().is(AsmToken::EndOfStatement)) {
|
if (getLexer().is(AsmToken::EndOfStatement)) {
|
||||||
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
|
AMDGPU::IsaInfo::IsaVersion ISA =
|
||||||
|
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
|
||||||
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
|
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
|
||||||
ISA.Stepping,
|
ISA.Stepping,
|
||||||
"AMD", "AMDGPU");
|
"AMD", "AMDGPU");
|
||||||
@ -2999,7 +3001,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
|
|||||||
|
|
||||||
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
|
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
|
||||||
amd_kernel_code_t Header;
|
amd_kernel_code_t Header;
|
||||||
AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
|
AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
// Lex EndOfStatement. This is in a while loop, because lexing a comment
|
// Lex EndOfStatement. This is in a while loop, because lexing a comment
|
||||||
@ -3677,12 +3679,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
|
|||||||
|
|
||||||
static bool
|
static bool
|
||||||
encodeCnt(
|
encodeCnt(
|
||||||
const AMDGPU::IsaVersion ISA,
|
const AMDGPU::IsaInfo::IsaVersion ISA,
|
||||||
int64_t &IntVal,
|
int64_t &IntVal,
|
||||||
int64_t CntVal,
|
int64_t CntVal,
|
||||||
bool Saturate,
|
bool Saturate,
|
||||||
unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
|
unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
|
||||||
unsigned (*decode)(const IsaVersion &Version, unsigned))
|
unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
|
||||||
{
|
{
|
||||||
bool Failed = false;
|
bool Failed = false;
|
||||||
|
|
||||||
@ -3713,7 +3715,8 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
|
|||||||
if (getParser().parseAbsoluteExpression(CntVal))
|
if (getParser().parseAbsoluteExpression(CntVal))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
|
AMDGPU::IsaInfo::IsaVersion ISA =
|
||||||
|
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
|
||||||
|
|
||||||
bool Failed = true;
|
bool Failed = true;
|
||||||
bool Sat = CntName.endswith("_sat");
|
bool Sat = CntName.endswith("_sat");
|
||||||
@ -3748,7 +3751,8 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
|
|||||||
|
|
||||||
OperandMatchResultTy
|
OperandMatchResultTy
|
||||||
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
|
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
|
||||||
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
|
AMDGPU::IsaInfo::IsaVersion ISA =
|
||||||
|
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
|
||||||
int64_t Waitcnt = getWaitcntBitMask(ISA);
|
int64_t Waitcnt = getWaitcntBitMask(ISA);
|
||||||
SMLoc S = Parser.getTok().getLoc();
|
SMLoc S = Parser.getTok().getLoc();
|
||||||
|
|
||||||
|
@ -1155,7 +1155,8 @@ void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
|
|||||||
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
|
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
|
||||||
const MCSubtargetInfo &STI,
|
const MCSubtargetInfo &STI,
|
||||||
raw_ostream &O) {
|
raw_ostream &O) {
|
||||||
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
|
AMDGPU::IsaInfo::IsaVersion ISA =
|
||||||
|
AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
|
||||||
|
|
||||||
unsigned SImm16 = MI->getOperand(OpNo).getImm();
|
unsigned SImm16 = MI->getOperand(OpNo).getImm();
|
||||||
unsigned Vmcnt, Expcnt, Lgkmcnt;
|
unsigned Vmcnt, Expcnt, Lgkmcnt;
|
||||||
|
@ -27,7 +27,6 @@
|
|||||||
#include "llvm/MC/MCObjectFileInfo.h"
|
#include "llvm/MC/MCObjectFileInfo.h"
|
||||||
#include "llvm/MC/MCSectionELF.h"
|
#include "llvm/MC/MCSectionELF.h"
|
||||||
#include "llvm/Support/FormattedStream.h"
|
#include "llvm/Support/FormattedStream.h"
|
||||||
#include "llvm/Support/TargetParser.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
#include "AMDGPUPTNote.h"
|
#include "AMDGPUPTNote.h"
|
||||||
@ -40,6 +39,84 @@ using namespace llvm::AMDGPU;
|
|||||||
// AMDGPUTargetStreamer
|
// AMDGPUTargetStreamer
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
static const struct {
|
||||||
|
const char *Name;
|
||||||
|
unsigned Mach;
|
||||||
|
} MachTable[] = {
|
||||||
|
// Radeon HD 2000/3000 Series (R600).
|
||||||
|
{ "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
|
||||||
|
{ "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
|
||||||
|
{ "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
|
||||||
|
{ "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
|
||||||
|
// Radeon HD 4000 Series (R700).
|
||||||
|
{ "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
|
||||||
|
{ "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
|
||||||
|
{ "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
|
||||||
|
// Radeon HD 5000 Series (Evergreen).
|
||||||
|
{ "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
|
||||||
|
{ "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
|
||||||
|
{ "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
|
||||||
|
{ "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
|
||||||
|
{ "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
|
||||||
|
// Radeon HD 6000 Series (Northern Islands).
|
||||||
|
{ "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
|
||||||
|
{ "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
|
||||||
|
{ "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
|
||||||
|
{ "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
|
||||||
|
// AMDGCN GFX6.
|
||||||
|
{ "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
|
||||||
|
{ "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
|
||||||
|
{ "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
|
||||||
|
{ "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
|
||||||
|
{ "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
|
||||||
|
{ "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
|
||||||
|
{ "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
|
||||||
|
// AMDGCN GFX7.
|
||||||
|
{ "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
|
||||||
|
{ "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
|
||||||
|
{ "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
|
||||||
|
{ "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
|
||||||
|
{ "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
|
||||||
|
{ "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
|
||||||
|
{ "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
|
||||||
|
{ "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
|
||||||
|
{ "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
|
||||||
|
{ "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
|
||||||
|
// AMDGCN GFX8.
|
||||||
|
{ "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
|
||||||
|
{ "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
|
||||||
|
{ "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
|
||||||
|
{ "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
|
||||||
|
{ "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
|
||||||
|
{ "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
|
||||||
|
{ "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
|
||||||
|
{ "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
|
||||||
|
{ "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
|
||||||
|
{ "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
|
||||||
|
{ "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
|
||||||
|
// AMDGCN GFX9.
|
||||||
|
{ "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
|
||||||
|
{ "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
|
||||||
|
{ "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
|
||||||
|
{ "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
|
||||||
|
// Not specified processor.
|
||||||
|
{ nullptr, ELF::EF_AMDGPU_MACH_NONE }
|
||||||
|
};
|
||||||
|
|
||||||
|
unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
|
||||||
|
auto Entry = MachTable;
|
||||||
|
for (; Entry->Name && GPU != Entry->Name; ++Entry)
|
||||||
|
;
|
||||||
|
return Entry->Mach;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
|
||||||
|
auto Entry = MachTable;
|
||||||
|
for (; Entry->Name && Mach != Entry->Mach; ++Entry)
|
||||||
|
;
|
||||||
|
return Entry->Name;
|
||||||
|
}
|
||||||
|
|
||||||
bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
|
bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
|
||||||
HSAMD::Metadata HSAMetadata;
|
HSAMD::Metadata HSAMetadata;
|
||||||
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
|
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
|
||||||
@ -128,7 +205,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
|
|||||||
bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
|
bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
|
||||||
amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
|
amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
|
||||||
|
|
||||||
IsaVersion IVersion = getIsaVersion(STI.getCPU());
|
IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
|
||||||
|
|
||||||
OS << "\t.amdhsa_kernel " << KernelName << '\n';
|
OS << "\t.amdhsa_kernel " << KernelName << '\n';
|
||||||
|
|
||||||
@ -265,7 +342,7 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
|
|||||||
unsigned EFlags = MCA.getELFHeaderEFlags();
|
unsigned EFlags = MCA.getELFHeaderEFlags();
|
||||||
|
|
||||||
EFlags &= ~ELF::EF_AMDGPU_MACH;
|
EFlags &= ~ELF::EF_AMDGPU_MACH;
|
||||||
EFlags |= getElfMach(STI.getCPU());
|
EFlags |= getMACH(STI.getCPU());
|
||||||
|
|
||||||
EFlags &= ~ELF::EF_AMDGPU_XNACK;
|
EFlags &= ~ELF::EF_AMDGPU_XNACK;
|
||||||
if (AMDGPU::hasXNACK(STI))
|
if (AMDGPU::hasXNACK(STI))
|
||||||
|
@ -31,7 +31,13 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
|
|||||||
protected:
|
protected:
|
||||||
MCContext &getContext() const { return Streamer.getContext(); }
|
MCContext &getContext() const { return Streamer.getContext(); }
|
||||||
|
|
||||||
|
/// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
|
||||||
|
unsigned getMACH(StringRef GPU) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
/// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
|
||||||
|
static const char *getMachName(unsigned Mach);
|
||||||
|
|
||||||
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
|
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
|
||||||
|
|
||||||
virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
|
virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
|
||||||
|
@ -369,7 +369,7 @@ private:
|
|||||||
const SIRegisterInfo *TRI = nullptr;
|
const SIRegisterInfo *TRI = nullptr;
|
||||||
const MachineRegisterInfo *MRI = nullptr;
|
const MachineRegisterInfo *MRI = nullptr;
|
||||||
const MachineLoopInfo *MLI = nullptr;
|
const MachineLoopInfo *MLI = nullptr;
|
||||||
AMDGPU::IsaVersion IV;
|
AMDGPU::IsaInfo::IsaVersion IV;
|
||||||
|
|
||||||
DenseSet<MachineBasicBlock *> BlockVisitedSet;
|
DenseSet<MachineBasicBlock *> BlockVisitedSet;
|
||||||
DenseSet<MachineInstr *> TrackedWaitcntSet;
|
DenseSet<MachineInstr *> TrackedWaitcntSet;
|
||||||
@ -1841,7 +1841,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
TRI = &TII->getRegisterInfo();
|
TRI = &TII->getRegisterInfo();
|
||||||
MRI = &MF.getRegInfo();
|
MRI = &MF.getRegInfo();
|
||||||
MLI = &getAnalysis<MachineLoopInfo>();
|
MLI = &getAnalysis<MachineLoopInfo>();
|
||||||
IV = AMDGPU::getIsaVersion(ST->getCPU());
|
IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
|
||||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
|
|
||||||
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
|
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
|
||||||
|
@ -253,7 +253,7 @@ protected:
|
|||||||
/// Instruction info.
|
/// Instruction info.
|
||||||
const SIInstrInfo *TII = nullptr;
|
const SIInstrInfo *TII = nullptr;
|
||||||
|
|
||||||
IsaVersion IV;
|
IsaInfo::IsaVersion IV;
|
||||||
|
|
||||||
SICacheControl(const GCNSubtarget &ST);
|
SICacheControl(const GCNSubtarget &ST);
|
||||||
|
|
||||||
@ -605,7 +605,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
|
|||||||
|
|
||||||
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
|
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
|
||||||
TII = ST.getInstrInfo();
|
TII = ST.getInstrInfo();
|
||||||
IV = getIsaVersion(ST.getCPU());
|
IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
|
@ -137,18 +137,68 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
|
|||||||
|
|
||||||
namespace IsaInfo {
|
namespace IsaInfo {
|
||||||
|
|
||||||
|
IsaVersion getIsaVersion(const FeatureBitset &Features) {
|
||||||
|
// GCN GFX6 (Southern Islands (SI)).
|
||||||
|
if (Features.test(FeatureISAVersion6_0_0))
|
||||||
|
return {6, 0, 0};
|
||||||
|
if (Features.test(FeatureISAVersion6_0_1))
|
||||||
|
return {6, 0, 1};
|
||||||
|
|
||||||
|
// GCN GFX7 (Sea Islands (CI)).
|
||||||
|
if (Features.test(FeatureISAVersion7_0_0))
|
||||||
|
return {7, 0, 0};
|
||||||
|
if (Features.test(FeatureISAVersion7_0_1))
|
||||||
|
return {7, 0, 1};
|
||||||
|
if (Features.test(FeatureISAVersion7_0_2))
|
||||||
|
return {7, 0, 2};
|
||||||
|
if (Features.test(FeatureISAVersion7_0_3))
|
||||||
|
return {7, 0, 3};
|
||||||
|
if (Features.test(FeatureISAVersion7_0_4))
|
||||||
|
return {7, 0, 4};
|
||||||
|
if (Features.test(FeatureSeaIslands))
|
||||||
|
return {7, 0, 0};
|
||||||
|
|
||||||
|
// GCN GFX8 (Volcanic Islands (VI)).
|
||||||
|
if (Features.test(FeatureISAVersion8_0_1))
|
||||||
|
return {8, 0, 1};
|
||||||
|
if (Features.test(FeatureISAVersion8_0_2))
|
||||||
|
return {8, 0, 2};
|
||||||
|
if (Features.test(FeatureISAVersion8_0_3))
|
||||||
|
return {8, 0, 3};
|
||||||
|
if (Features.test(FeatureISAVersion8_1_0))
|
||||||
|
return {8, 1, 0};
|
||||||
|
if (Features.test(FeatureVolcanicIslands))
|
||||||
|
return {8, 0, 0};
|
||||||
|
|
||||||
|
// GCN GFX9.
|
||||||
|
if (Features.test(FeatureISAVersion9_0_0))
|
||||||
|
return {9, 0, 0};
|
||||||
|
if (Features.test(FeatureISAVersion9_0_2))
|
||||||
|
return {9, 0, 2};
|
||||||
|
if (Features.test(FeatureISAVersion9_0_4))
|
||||||
|
return {9, 0, 4};
|
||||||
|
if (Features.test(FeatureISAVersion9_0_6))
|
||||||
|
return {9, 0, 6};
|
||||||
|
if (Features.test(FeatureGFX9))
|
||||||
|
return {9, 0, 0};
|
||||||
|
|
||||||
|
if (Features.test(FeatureSouthernIslands))
|
||||||
|
return {0, 0, 0};
|
||||||
|
return {7, 0, 0};
|
||||||
|
}
|
||||||
|
|
||||||
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
|
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
|
||||||
auto TargetTriple = STI->getTargetTriple();
|
auto TargetTriple = STI->getTargetTriple();
|
||||||
auto Version = getIsaVersion(STI->getCPU());
|
auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
|
||||||
|
|
||||||
Stream << TargetTriple.getArchName() << '-'
|
Stream << TargetTriple.getArchName() << '-'
|
||||||
<< TargetTriple.getVendorName() << '-'
|
<< TargetTriple.getVendorName() << '-'
|
||||||
<< TargetTriple.getOSName() << '-'
|
<< TargetTriple.getOSName() << '-'
|
||||||
<< TargetTriple.getEnvironmentName() << '-'
|
<< TargetTriple.getEnvironmentName() << '-'
|
||||||
<< "gfx"
|
<< "gfx"
|
||||||
<< Version.Major
|
<< ISAVersion.Major
|
||||||
<< Version.Minor
|
<< ISAVersion.Minor
|
||||||
<< Version.Stepping;
|
<< ISAVersion.Stepping;
|
||||||
|
|
||||||
if (hasXNACK(*STI))
|
if (hasXNACK(*STI))
|
||||||
Stream << "+xnack";
|
Stream << "+xnack";
|
||||||
@ -160,49 +210,49 @@ bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
|
|||||||
return STI->getFeatureBits().test(FeatureCodeObjectV3);
|
return STI->getFeatureBits().test(FeatureCodeObjectV3);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
|
unsigned getWavefrontSize(const FeatureBitset &Features) {
|
||||||
if (STI->getFeatureBits().test(FeatureWavefrontSize16))
|
if (Features.test(FeatureWavefrontSize16))
|
||||||
return 16;
|
return 16;
|
||||||
if (STI->getFeatureBits().test(FeatureWavefrontSize32))
|
if (Features.test(FeatureWavefrontSize32))
|
||||||
return 32;
|
return 32;
|
||||||
|
|
||||||
return 64;
|
return 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
|
unsigned getLocalMemorySize(const FeatureBitset &Features) {
|
||||||
if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
|
if (Features.test(FeatureLocalMemorySize32768))
|
||||||
return 32768;
|
return 32768;
|
||||||
if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
|
if (Features.test(FeatureLocalMemorySize65536))
|
||||||
return 65536;
|
return 65536;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
|
unsigned getEUsPerCU(const FeatureBitset &Features) {
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
|
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
|
||||||
unsigned FlatWorkGroupSize) {
|
unsigned FlatWorkGroupSize) {
|
||||||
if (!STI->getFeatureBits().test(FeatureGCN))
|
if (!Features.test(FeatureGCN))
|
||||||
return 8;
|
return 8;
|
||||||
unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
|
unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
|
||||||
if (N == 1)
|
if (N == 1)
|
||||||
return 40;
|
return 40;
|
||||||
N = 40 / N;
|
N = 40 / N;
|
||||||
return std::min(N, 16u);
|
return std::min(N, 16u);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
|
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
|
||||||
return getMaxWavesPerEU() * getEUsPerCU(STI);
|
return getMaxWavesPerEU() * getEUsPerCU(Features);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
|
unsigned getMaxWavesPerCU(const FeatureBitset &Features,
|
||||||
unsigned FlatWorkGroupSize) {
|
unsigned FlatWorkGroupSize) {
|
||||||
return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
|
return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
|
unsigned getMinWavesPerEU(const FeatureBitset &Features) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -211,89 +261,89 @@ unsigned getMaxWavesPerEU() {
|
|||||||
return 10;
|
return 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
|
unsigned getMaxWavesPerEU(const FeatureBitset &Features,
|
||||||
unsigned FlatWorkGroupSize) {
|
unsigned FlatWorkGroupSize) {
|
||||||
return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
|
return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
|
||||||
getEUsPerCU(STI)) / getEUsPerCU(STI);
|
getEUsPerCU(Features)) / getEUsPerCU(Features);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
|
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
|
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
|
||||||
return 2048;
|
return 2048;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
|
unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
|
||||||
unsigned FlatWorkGroupSize) {
|
unsigned FlatWorkGroupSize) {
|
||||||
return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
|
return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
|
||||||
getWavefrontSize(STI);
|
getWavefrontSize(Features);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
|
unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
|
||||||
IsaVersion Version = getIsaVersion(STI->getCPU());
|
IsaVersion Version = getIsaVersion(Features);
|
||||||
if (Version.Major >= 8)
|
if (Version.Major >= 8)
|
||||||
return 16;
|
return 16;
|
||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
|
unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
|
||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
|
unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
|
||||||
IsaVersion Version = getIsaVersion(STI->getCPU());
|
IsaVersion Version = getIsaVersion(Features);
|
||||||
if (Version.Major >= 8)
|
if (Version.Major >= 8)
|
||||||
return 800;
|
return 800;
|
||||||
return 512;
|
return 512;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
|
unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
|
||||||
if (STI->getFeatureBits().test(FeatureSGPRInitBug))
|
if (Features.test(FeatureSGPRInitBug))
|
||||||
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
||||||
|
|
||||||
IsaVersion Version = getIsaVersion(STI->getCPU());
|
IsaVersion Version = getIsaVersion(Features);
|
||||||
if (Version.Major >= 8)
|
if (Version.Major >= 8)
|
||||||
return 102;
|
return 102;
|
||||||
return 104;
|
return 104;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
|
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
|
||||||
assert(WavesPerEU != 0);
|
assert(WavesPerEU != 0);
|
||||||
|
|
||||||
if (WavesPerEU >= getMaxWavesPerEU())
|
if (WavesPerEU >= getMaxWavesPerEU())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
|
unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
|
||||||
if (STI->getFeatureBits().test(FeatureTrapHandler))
|
if (Features.test(FeatureTrapHandler))
|
||||||
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
|
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
|
||||||
MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
|
MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
|
||||||
return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
|
return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
|
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
|
||||||
bool Addressable) {
|
bool Addressable) {
|
||||||
assert(WavesPerEU != 0);
|
assert(WavesPerEU != 0);
|
||||||
|
|
||||||
IsaVersion Version = getIsaVersion(STI->getCPU());
|
IsaVersion Version = getIsaVersion(Features);
|
||||||
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
|
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
|
||||||
if (Version.Major >= 8 && !Addressable)
|
if (Version.Major >= 8 && !Addressable)
|
||||||
AddressableNumSGPRs = 112;
|
AddressableNumSGPRs = 112;
|
||||||
unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
|
unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
|
||||||
if (STI->getFeatureBits().test(FeatureTrapHandler))
|
if (Features.test(FeatureTrapHandler))
|
||||||
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
|
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
|
||||||
MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
|
MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
|
||||||
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
|
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
|
unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
|
||||||
bool FlatScrUsed, bool XNACKUsed) {
|
bool FlatScrUsed, bool XNACKUsed) {
|
||||||
unsigned ExtraSGPRs = 0;
|
unsigned ExtraSGPRs = 0;
|
||||||
if (VCCUsed)
|
if (VCCUsed)
|
||||||
ExtraSGPRs = 2;
|
ExtraSGPRs = 2;
|
||||||
|
|
||||||
IsaVersion Version = getIsaVersion(STI->getCPU());
|
IsaVersion Version = getIsaVersion(Features);
|
||||||
if (Version.Major < 8) {
|
if (Version.Major < 8) {
|
||||||
if (FlatScrUsed)
|
if (FlatScrUsed)
|
||||||
ExtraSGPRs = 4;
|
ExtraSGPRs = 4;
|
||||||
@ -308,74 +358,74 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
|
|||||||
return ExtraSGPRs;
|
return ExtraSGPRs;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
|
unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
|
||||||
bool FlatScrUsed) {
|
bool FlatScrUsed) {
|
||||||
return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
|
return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
|
||||||
STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
|
Features[AMDGPU::FeatureXNACK]);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
|
unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
|
||||||
NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
|
NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
|
||||||
// SGPRBlocks is actual number of SGPR blocks minus 1.
|
// SGPRBlocks is actual number of SGPR blocks minus 1.
|
||||||
return NumSGPRs / getSGPREncodingGranule(STI) - 1;
|
return NumSGPRs / getSGPREncodingGranule(Features) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
|
unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
|
unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
|
||||||
return getVGPRAllocGranule(STI);
|
return getVGPRAllocGranule(Features);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
|
unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
|
||||||
return 256;
|
return 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
|
unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
|
||||||
return getTotalNumVGPRs(STI);
|
return getTotalNumVGPRs(Features);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
|
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
|
||||||
assert(WavesPerEU != 0);
|
assert(WavesPerEU != 0);
|
||||||
|
|
||||||
if (WavesPerEU >= getMaxWavesPerEU())
|
if (WavesPerEU >= getMaxWavesPerEU())
|
||||||
return 0;
|
return 0;
|
||||||
unsigned MinNumVGPRs =
|
unsigned MinNumVGPRs =
|
||||||
alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
|
alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
|
||||||
getVGPRAllocGranule(STI)) + 1;
|
getVGPRAllocGranule(Features)) + 1;
|
||||||
return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
|
return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
|
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
|
||||||
assert(WavesPerEU != 0);
|
assert(WavesPerEU != 0);
|
||||||
|
|
||||||
unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
|
unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
|
||||||
getVGPRAllocGranule(STI));
|
getVGPRAllocGranule(Features));
|
||||||
unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
|
unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
|
||||||
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
|
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
|
unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
|
||||||
NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
|
NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
|
||||||
// VGPRBlocks is actual number of VGPR blocks minus 1.
|
// VGPRBlocks is actual number of VGPR blocks minus 1.
|
||||||
return NumVGPRs / getVGPREncodingGranule(STI) - 1;
|
return NumVGPRs / getVGPREncodingGranule(Features) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace IsaInfo
|
} // end namespace IsaInfo
|
||||||
|
|
||||||
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
||||||
const MCSubtargetInfo *STI) {
|
const FeatureBitset &Features) {
|
||||||
IsaVersion Version = getIsaVersion(STI->getCPU());
|
IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
|
||||||
|
|
||||||
memset(&Header, 0, sizeof(Header));
|
memset(&Header, 0, sizeof(Header));
|
||||||
|
|
||||||
Header.amd_kernel_code_version_major = 1;
|
Header.amd_kernel_code_version_major = 1;
|
||||||
Header.amd_kernel_code_version_minor = 2;
|
Header.amd_kernel_code_version_minor = 2;
|
||||||
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
|
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
|
||||||
Header.amd_machine_version_major = Version.Major;
|
Header.amd_machine_version_major = ISA.Major;
|
||||||
Header.amd_machine_version_minor = Version.Minor;
|
Header.amd_machine_version_minor = ISA.Minor;
|
||||||
Header.amd_machine_version_stepping = Version.Stepping;
|
Header.amd_machine_version_stepping = ISA.Stepping;
|
||||||
Header.kernel_code_entry_byte_offset = sizeof(Header);
|
Header.kernel_code_entry_byte_offset = sizeof(Header);
|
||||||
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
|
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
|
||||||
Header.wavefront_size = 6;
|
Header.wavefront_size = 6;
|
||||||
@ -463,7 +513,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
|
|||||||
return Ints;
|
return Ints;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getVmcntBitMask(const IsaVersion &Version) {
|
unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
|
||||||
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
|
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
|
||||||
if (Version.Major < 9)
|
if (Version.Major < 9)
|
||||||
return VmcntLo;
|
return VmcntLo;
|
||||||
@ -472,15 +522,15 @@ unsigned getVmcntBitMask(const IsaVersion &Version) {
|
|||||||
return VmcntLo | VmcntHi;
|
return VmcntLo | VmcntHi;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getExpcntBitMask(const IsaVersion &Version) {
|
unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
|
||||||
return (1 << getExpcntBitWidth()) - 1;
|
return (1 << getExpcntBitWidth()) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getLgkmcntBitMask(const IsaVersion &Version) {
|
unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
|
||||||
return (1 << getLgkmcntBitWidth()) - 1;
|
return (1 << getLgkmcntBitWidth()) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getWaitcntBitMask(const IsaVersion &Version) {
|
unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
|
||||||
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
||||||
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
|
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
|
||||||
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
|
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
|
||||||
@ -492,7 +542,7 @@ unsigned getWaitcntBitMask(const IsaVersion &Version) {
|
|||||||
return Waitcnt | VmcntHi;
|
return Waitcnt | VmcntHi;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
|
unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
|
||||||
unsigned VmcntLo =
|
unsigned VmcntLo =
|
||||||
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
||||||
if (Version.Major < 9)
|
if (Version.Major < 9)
|
||||||
@ -504,22 +554,22 @@ unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
|
|||||||
return VmcntLo | VmcntHi;
|
return VmcntLo | VmcntHi;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
|
unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
|
||||||
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
|
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
|
unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
|
||||||
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
|
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
|
||||||
}
|
}
|
||||||
|
|
||||||
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
|
void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
|
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
|
||||||
Vmcnt = decodeVmcnt(Version, Waitcnt);
|
Vmcnt = decodeVmcnt(Version, Waitcnt);
|
||||||
Expcnt = decodeExpcnt(Version, Waitcnt);
|
Expcnt = decodeExpcnt(Version, Waitcnt);
|
||||||
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
|
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Vmcnt) {
|
unsigned Vmcnt) {
|
||||||
Waitcnt =
|
Waitcnt =
|
||||||
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
|
||||||
@ -530,17 +580,17 @@ unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
|
|||||||
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
|
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Expcnt) {
|
unsigned Expcnt) {
|
||||||
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
|
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Lgkmcnt) {
|
unsigned Lgkmcnt) {
|
||||||
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
|
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned encodeWaitcnt(const IsaVersion &Version,
|
unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
|
||||||
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
|
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
|
||||||
unsigned Waitcnt = getWaitcntBitMask(Version);
|
unsigned Waitcnt = getWaitcntBitMask(Version);
|
||||||
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
|
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
|
||||||
|
@ -19,7 +19,6 @@
|
|||||||
#include "llvm/Support/AMDHSAKernelDescriptor.h"
|
#include "llvm/Support/AMDHSAKernelDescriptor.h"
|
||||||
#include "llvm/Support/Compiler.h"
|
#include "llvm/Support/Compiler.h"
|
||||||
#include "llvm/Support/ErrorHandling.h"
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
#include "llvm/Support/TargetParser.h"
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@ -57,6 +56,16 @@ enum {
|
|||||||
TRAP_NUM_SGPRS = 16
|
TRAP_NUM_SGPRS = 16
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Instruction set architecture version.
|
||||||
|
struct IsaVersion {
|
||||||
|
unsigned Major;
|
||||||
|
unsigned Minor;
|
||||||
|
unsigned Stepping;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// \returns Isa version for given subtarget \p Features.
|
||||||
|
IsaVersion getIsaVersion(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// Streams isa version string for given subtarget \p STI into \p Stream.
|
/// Streams isa version string for given subtarget \p STI into \p Stream.
|
||||||
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
|
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
|
||||||
|
|
||||||
@ -64,114 +73,114 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
|
|||||||
/// false otherwise.
|
/// false otherwise.
|
||||||
bool hasCodeObjectV3(const MCSubtargetInfo *STI);
|
bool hasCodeObjectV3(const MCSubtargetInfo *STI);
|
||||||
|
|
||||||
/// \returns Wavefront size for given subtarget \p STI.
|
/// \returns Wavefront size for given subtarget \p Features.
|
||||||
unsigned getWavefrontSize(const MCSubtargetInfo *STI);
|
unsigned getWavefrontSize(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Local memory size in bytes for given subtarget \p STI.
|
/// \returns Local memory size in bytes for given subtarget \p Features.
|
||||||
unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
|
unsigned getLocalMemorySize(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Number of execution units per compute unit for given subtarget \p
|
/// \returns Number of execution units per compute unit for given subtarget \p
|
||||||
/// STI.
|
/// Features.
|
||||||
unsigned getEUsPerCU(const MCSubtargetInfo *STI);
|
unsigned getEUsPerCU(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Maximum number of work groups per compute unit for given subtarget
|
/// \returns Maximum number of work groups per compute unit for given subtarget
|
||||||
/// \p STI and limited by given \p FlatWorkGroupSize.
|
/// \p Features and limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
|
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
|
||||||
unsigned FlatWorkGroupSize);
|
unsigned FlatWorkGroupSize);
|
||||||
|
|
||||||
/// \returns Maximum number of waves per compute unit for given subtarget \p
|
/// \returns Maximum number of waves per compute unit for given subtarget \p
|
||||||
/// STI without any kind of limitation.
|
/// Features without any kind of limitation.
|
||||||
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
|
unsigned getMaxWavesPerCU(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Maximum number of waves per compute unit for given subtarget \p
|
/// \returns Maximum number of waves per compute unit for given subtarget \p
|
||||||
/// STI and limited by given \p FlatWorkGroupSize.
|
/// Features and limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
|
unsigned getMaxWavesPerCU(const FeatureBitset &Features,
|
||||||
unsigned FlatWorkGroupSize);
|
unsigned FlatWorkGroupSize);
|
||||||
|
|
||||||
/// \returns Minimum number of waves per execution unit for given subtarget \p
|
/// \returns Minimum number of waves per execution unit for given subtarget \p
|
||||||
/// STI.
|
/// Features.
|
||||||
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
|
unsigned getMinWavesPerEU(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
||||||
/// STI without any kind of limitation.
|
/// Features without any kind of limitation.
|
||||||
unsigned getMaxWavesPerEU();
|
unsigned getMaxWavesPerEU();
|
||||||
|
|
||||||
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
||||||
/// STI and limited by given \p FlatWorkGroupSize.
|
/// Features and limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
|
unsigned getMaxWavesPerEU(const FeatureBitset &Features,
|
||||||
unsigned FlatWorkGroupSize);
|
unsigned FlatWorkGroupSize);
|
||||||
|
|
||||||
/// \returns Minimum flat work group size for given subtarget \p STI.
|
/// \returns Minimum flat work group size for given subtarget \p Features.
|
||||||
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
|
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Maximum flat work group size for given subtarget \p STI.
|
/// \returns Maximum flat work group size for given subtarget \p Features.
|
||||||
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
|
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Number of waves per work group for given subtarget \p STI and
|
/// \returns Number of waves per work group for given subtarget \p Features and
|
||||||
/// limited by given \p FlatWorkGroupSize.
|
/// limited by given \p FlatWorkGroupSize.
|
||||||
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
|
unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
|
||||||
unsigned FlatWorkGroupSize);
|
unsigned FlatWorkGroupSize);
|
||||||
|
|
||||||
/// \returns SGPR allocation granularity for given subtarget \p STI.
|
/// \returns SGPR allocation granularity for given subtarget \p Features.
|
||||||
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
|
unsigned getSGPRAllocGranule(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns SGPR encoding granularity for given subtarget \p STI.
|
/// \returns SGPR encoding granularity for given subtarget \p Features.
|
||||||
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
|
unsigned getSGPREncodingGranule(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Total number of SGPRs for given subtarget \p STI.
|
/// \returns Total number of SGPRs for given subtarget \p Features.
|
||||||
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
|
unsigned getTotalNumSGPRs(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Addressable number of SGPRs for given subtarget \p STI.
|
/// \returns Addressable number of SGPRs for given subtarget \p Features.
|
||||||
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
|
unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
||||||
/// execution unit requirement for given subtarget \p STI.
|
/// execution unit requirement for given subtarget \p Features.
|
||||||
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
|
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
|
||||||
|
|
||||||
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
||||||
/// execution unit requirement for given subtarget \p STI.
|
/// execution unit requirement for given subtarget \p Features.
|
||||||
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
|
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
|
||||||
bool Addressable);
|
bool Addressable);
|
||||||
|
|
||||||
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
|
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
|
||||||
/// STI when the given special registers are used.
|
/// Features when the given special registers are used.
|
||||||
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
|
unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
|
||||||
bool FlatScrUsed, bool XNACKUsed);
|
bool FlatScrUsed, bool XNACKUsed);
|
||||||
|
|
||||||
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
|
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
|
||||||
/// STI when the given special registers are used. XNACK is inferred from
|
/// Features when the given special registers are used. XNACK is inferred from
|
||||||
/// \p STI.
|
/// \p Features.
|
||||||
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
|
unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
|
||||||
bool FlatScrUsed);
|
bool FlatScrUsed);
|
||||||
|
|
||||||
/// \returns Number of SGPR blocks needed for given subtarget \p STI when
|
/// \returns Number of SGPR blocks needed for given subtarget \p Features when
|
||||||
/// \p NumSGPRs are used. \p NumSGPRs should already include any special
|
/// \p NumSGPRs are used. \p NumSGPRs should already include any special
|
||||||
/// register counts.
|
/// register counts.
|
||||||
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
|
unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
|
||||||
|
|
||||||
/// \returns VGPR allocation granularity for given subtarget \p STI.
|
/// \returns VGPR allocation granularity for given subtarget \p Features.
|
||||||
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
|
unsigned getVGPRAllocGranule(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns VGPR encoding granularity for given subtarget \p STI.
|
/// \returns VGPR encoding granularity for given subtarget \p Features.
|
||||||
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
|
unsigned getVGPREncodingGranule(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Total number of VGPRs for given subtarget \p STI.
|
/// \returns Total number of VGPRs for given subtarget \p Features.
|
||||||
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
|
unsigned getTotalNumVGPRs(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Addressable number of VGPRs for given subtarget \p STI.
|
/// \returns Addressable number of VGPRs for given subtarget \p Features.
|
||||||
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
|
unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
|
||||||
|
|
||||||
/// \returns Minimum number of VGPRs that meets given number of waves per
|
/// \returns Minimum number of VGPRs that meets given number of waves per
|
||||||
/// execution unit requirement for given subtarget \p STI.
|
/// execution unit requirement for given subtarget \p Features.
|
||||||
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
|
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
|
||||||
|
|
||||||
/// \returns Maximum number of VGPRs that meets given number of waves per
|
/// \returns Maximum number of VGPRs that meets given number of waves per
|
||||||
/// execution unit requirement for given subtarget \p STI.
|
/// execution unit requirement for given subtarget \p Features.
|
||||||
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
|
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
|
||||||
|
|
||||||
/// \returns Number of VGPR blocks needed for given subtarget \p STI when
|
/// \returns Number of VGPR blocks needed for given subtarget \p Features when
|
||||||
/// \p NumVGPRs are used.
|
/// \p NumVGPRs are used.
|
||||||
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
|
unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
|
||||||
|
|
||||||
} // end namespace IsaInfo
|
} // end namespace IsaInfo
|
||||||
|
|
||||||
@ -224,7 +233,7 @@ LLVM_READONLY
|
|||||||
int getMCOpcode(uint16_t Opcode, unsigned Gen);
|
int getMCOpcode(uint16_t Opcode, unsigned Gen);
|
||||||
|
|
||||||
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
||||||
const MCSubtargetInfo *STI);
|
const FeatureBitset &Features);
|
||||||
|
|
||||||
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
|
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
|
||||||
|
|
||||||
@ -259,25 +268,25 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
|
|||||||
bool OnlyFirstRequired = false);
|
bool OnlyFirstRequired = false);
|
||||||
|
|
||||||
/// \returns Vmcnt bit mask for given isa \p Version.
|
/// \returns Vmcnt bit mask for given isa \p Version.
|
||||||
unsigned getVmcntBitMask(const IsaVersion &Version);
|
unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
|
||||||
|
|
||||||
/// \returns Expcnt bit mask for given isa \p Version.
|
/// \returns Expcnt bit mask for given isa \p Version.
|
||||||
unsigned getExpcntBitMask(const IsaVersion &Version);
|
unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
|
||||||
|
|
||||||
/// \returns Lgkmcnt bit mask for given isa \p Version.
|
/// \returns Lgkmcnt bit mask for given isa \p Version.
|
||||||
unsigned getLgkmcntBitMask(const IsaVersion &Version);
|
unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
|
||||||
|
|
||||||
/// \returns Waitcnt bit mask for given isa \p Version.
|
/// \returns Waitcnt bit mask for given isa \p Version.
|
||||||
unsigned getWaitcntBitMask(const IsaVersion &Version);
|
unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
|
||||||
|
|
||||||
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
|
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
|
||||||
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
|
unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
||||||
|
|
||||||
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
|
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
|
||||||
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
|
unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
||||||
|
|
||||||
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
|
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
|
||||||
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
|
unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
||||||
|
|
||||||
/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
|
/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
|
||||||
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
|
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
|
||||||
@ -288,19 +297,19 @@ unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
|
|||||||
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
|
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
|
||||||
/// \p Expcnt = \p Waitcnt[6:4]
|
/// \p Expcnt = \p Waitcnt[6:4]
|
||||||
/// \p Lgkmcnt = \p Waitcnt[11:8]
|
/// \p Lgkmcnt = \p Waitcnt[11:8]
|
||||||
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
|
void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
|
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
|
||||||
|
|
||||||
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
|
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
|
||||||
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Vmcnt);
|
unsigned Vmcnt);
|
||||||
|
|
||||||
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
|
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
|
||||||
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Expcnt);
|
unsigned Expcnt);
|
||||||
|
|
||||||
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
|
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
|
||||||
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
|
unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
||||||
unsigned Lgkmcnt);
|
unsigned Lgkmcnt);
|
||||||
|
|
||||||
/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
|
/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
|
||||||
@ -315,7 +324,7 @@ unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
|
|||||||
///
|
///
|
||||||
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
|
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
|
||||||
/// isa \p Version.
|
/// isa \p Version.
|
||||||
unsigned encodeWaitcnt(const IsaVersion &Version,
|
unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
|
||||||
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
|
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
|
||||||
|
|
||||||
unsigned getInitialPSInputAddr(const Function &F);
|
unsigned getInitialPSInputAddr(const Function &F);
|
||||||
|
@ -1,8 +0,0 @@
|
|||||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s
|
|
||||||
|
|
||||||
; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
|
|
||||||
define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
|
|
||||||
store float 0.0, float addrspace(1)* %out0
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
@ -2,5 +2,5 @@
|
|||||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
|
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
|
||||||
|
|
||||||
.hsa_code_object_isa
|
.hsa_code_object_isa
|
||||||
// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
|
// GFX8: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU"
|
||||||
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
|
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
|
||||||
|
Loading…
Reference in New Issue
Block a user