1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-21 18:22:53 +01:00

AMDGPU: Add target id and code object v4 support

- Add target id support (https://clang.llvm.org/docs/ClangOffloadBundler.html#target-id)
  - Add code object v4 support (https://llvm.org/docs/AMDGPUUsage.html#elf-code-object)
    - Add kernarg_size to kernel descriptor
    - Change trap handler ABI to no longer move queue pointer into s[0:1]
  - Cleanup ELF definitions
    - Add V2, V3, V4 suffixes to make a clear distinction for code object version
    - Consolidate note names

Differential Revision: https://reviews.llvm.org/D95638
This commit is contained in:
Konstantin Zhuravlyov 2021-03-24 11:52:10 -04:00
parent 7a8a7cbb9c
commit a76ecb87cf
95 changed files with 3761 additions and 702 deletions

View File

@ -370,6 +370,7 @@ enum {
// was never defined for V1.
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
ELFABIVERSION_AMDGPU_HSA_V4 = 2
};
#define ELF_RELOC(name, value) name = value,
@ -742,10 +743,51 @@ enum : unsigned {
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
EF_AMDGPU_XNACK = 0x100,
// Indicates if the "sram-ecc" target feature is enabled for all code
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
EF_AMDGPU_FEATURE_XNACK_V2 = 0x01,
// Indicates if the trap handler is enabled for all code contained
// in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V2.
EF_AMDGPU_FEATURE_TRAP_HANDLER_V2 = 0x02,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
EF_AMDGPU_FEATURE_XNACK_V3 = 0x100,
// Indicates if the "sramecc" target feature is enabled for all code
// contained in the object.
EF_AMDGPU_SRAM_ECC = 0x200,
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
// XNACK selection mask for EF_AMDGPU_FEATURE_XNACK_* values.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
// XNACK is not supported.
EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
// XNACK is any/default/unspecified.
EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
// XNACK is off.
EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200,
// XNACK is on.
EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
// SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
//
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
// SRAMECC is not supported.
EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
// SRAMECC is any/default/unspecified.
EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
// SRAMECC is off.
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
// SRAMECC is on.
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
};
// ELF Relocation types for AMDGPU
@ -1547,15 +1589,18 @@ enum {
SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON
};
// AMD specific notes. (Code Object V2)
// AMD vendor specific notes. (Code Object V2)
enum {
// Note types with values between 0 and 9 (inclusive) are reserved.
NT_AMD_AMDGPU_HSA_METADATA = 10,
NT_AMD_AMDGPU_ISA = 11,
NT_AMD_AMDGPU_PAL_METADATA = 12
NT_AMD_HSA_CODE_OBJECT_VERSION = 1,
NT_AMD_HSA_HSAIL = 2,
NT_AMD_HSA_ISA_VERSION = 3,
// Note types with values between 4 and 9 (inclusive) are reserved.
NT_AMD_HSA_METADATA = 10,
NT_AMD_HSA_ISA_NAME = 11,
NT_AMD_PAL_METADATA = 12
};
// AMDGPU specific notes. (Code Object V3)
// AMDGPU vendor specific notes. (Code Object V3)
enum {
// Note types with values between 0 and 31 (inclusive) are reserved.
NT_AMDGPU_METADATA = 32

View File

@ -491,6 +491,9 @@ public:
return nullptr;
}
// For any initialization at the beginning of parsing.
virtual void onBeginOfFile() {}
// For any checks or cleanups at the end of parsing.
virtual void onEndOfFile() {}
};

View File

@ -89,6 +89,7 @@ class MCSubtargetInfo {
const unsigned *OperandCycles; // Itinerary operand cycles
const unsigned *ForwardingPaths;
FeatureBitset FeatureBits; // Feature bits for current CPU + FS
std::string FeatureString; // Feature string
public:
MCSubtargetInfo(const MCSubtargetInfo &) = default;
@ -112,6 +113,8 @@ public:
FeatureBits = FeatureBits_;
}
StringRef getFeatureString() const { return FeatureString; }
bool hasFeature(unsigned Feature) const {
return FeatureBits[Feature];
}

View File

@ -29,10 +29,20 @@ namespace AMDGPU {
//===----------------------------------------------------------------------===//
namespace HSAMD {
/// HSA metadata major version.
constexpr uint32_t VersionMajor = 1;
/// HSA metadata minor version.
constexpr uint32_t VersionMinor = 0;
/// HSA metadata major version for code object V2.
constexpr uint32_t VersionMajorV2 = 1;
/// HSA metadata minor version for code object V2.
constexpr uint32_t VersionMinorV2 = 0;
/// HSA metadata major version for code object V3.
constexpr uint32_t VersionMajorV3 = 1;
/// HSA metadata minor version for code object V3.
constexpr uint32_t VersionMinorV3 = 0;
/// HSA metadata major version for code object V4.
constexpr uint32_t VersionMajorV4 = 1;
/// HSA metadata minor version for code object V4.
constexpr uint32_t VersionMinorV4 = 1;
/// HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";

View File

@ -165,7 +165,8 @@ enum : int32_t {
struct kernel_descriptor_t {
uint32_t group_segment_fixed_size;
uint32_t private_segment_fixed_size;
uint8_t reserved0[8];
uint32_t kernarg_size;
uint8_t reserved0[4];
int64_t kernel_code_entry_byte_offset;
uint8_t reserved1[20];
uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
@ -178,7 +179,8 @@ struct kernel_descriptor_t {
enum : uint32_t {
GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
RESERVED0_OFFSET = 8,
KERNARG_SIZE_OFFSET = 8,
RESERVED0_OFFSET = 12,
KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
RESERVED1_OFFSET = 24,
COMPUTE_PGM_RSRC3_OFFSET = 44,
@ -197,6 +199,9 @@ static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
"invalid offset for private_segment_fixed_size");
static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
KERNARG_SIZE_OFFSET,
"invalid offset for kernarg_size");
static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
"invalid offset for reserved0");
static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==

View File

@ -932,6 +932,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
(void)InsertResult;
}
getTargetParser().onBeginOfFile();
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof)) {
ParseStatementInfo Info(&AsmStrRewrites);

View File

@ -1235,6 +1235,8 @@ bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
(void)InsertResult;
}
getTargetParser().onBeginOfFile();
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof) ||
SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {

View File

@ -208,6 +208,8 @@ static FeatureBitset getFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS,
void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU,
StringRef FS) {
FeatureBits = getFeatures(CPU, TuneCPU, FS, ProcDesc, ProcFeatures);
FeatureString = std::string(FS);
if (!TuneCPU.empty())
CPUSchedModel = &getSchedModelForCPU(TuneCPU);
else
@ -217,6 +219,7 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU,
void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef TuneCPU,
StringRef FS) {
FeatureBits = getFeatures(CPU, TuneCPU, FS, ProcDesc, ProcFeatures);
FeatureString = std::string(FS);
}
MCSubtargetInfo::MCSubtargetInfo(const Triple &TT, StringRef C, StringRef TC,

View File

@ -155,9 +155,9 @@ void ScalarEnumerationTraits<ELFYAML::ELF_NT>::enumeration(
ECase(NT_FREEBSD_PROCSTAT_PSSTRINGS);
ECase(NT_FREEBSD_PROCSTAT_AUXV);
// AMD specific notes. (Code Object V2)
ECase(NT_AMD_AMDGPU_HSA_METADATA);
ECase(NT_AMD_AMDGPU_ISA);
ECase(NT_AMD_AMDGPU_PAL_METADATA);
ECase(NT_AMD_HSA_METADATA);
ECase(NT_AMD_HSA_ISA_NAME);
ECase(NT_AMD_PAL_METADATA);
// AMDGPU specific notes. (Code Object V3)
ECase(NT_AMDGPU_METADATA);
#undef ECase
@ -546,8 +546,33 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1031, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1032, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1033, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
BCase(EF_AMDGPU_SRAM_ECC);
switch (Object->Header.ABIVersion) {
default:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
LLVM_FALLTHROUGH;
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
BCase(EF_AMDGPU_FEATURE_XNACK_V3);
BCase(EF_AMDGPU_FEATURE_SRAMECC_V3);
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4,
EF_AMDGPU_FEATURE_XNACK_V4);
BCaseMask(EF_AMDGPU_FEATURE_XNACK_ANY_V4,
EF_AMDGPU_FEATURE_XNACK_V4);
BCaseMask(EF_AMDGPU_FEATURE_XNACK_OFF_V4,
EF_AMDGPU_FEATURE_XNACK_V4);
BCaseMask(EF_AMDGPU_FEATURE_XNACK_ON_V4,
EF_AMDGPU_FEATURE_XNACK_V4);
BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4,
EF_AMDGPU_FEATURE_SRAMECC_V4);
BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_ANY_V4,
EF_AMDGPU_FEATURE_SRAMECC_V4);
BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_OFF_V4,
EF_AMDGPU_FEATURE_SRAMECC_V4);
BCaseMask(EF_AMDGPU_FEATURE_SRAMECC_ON_V4,
EF_AMDGPU_FEATURE_SRAMECC_V4);
break;
}
break;
default:
break;

View File

@ -97,12 +97,14 @@ extern "C" void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter() {
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {
: AsmPrinter(TM, std::move(Streamer)) {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
if (isHsaAbiVersion2(getGlobalSTI())) {
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
} else {
} else if (isHsaAbiVersion3(getGlobalSTI())) {
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
} else {
HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4());
}
}
}
@ -122,34 +124,34 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
}
void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
if (isHsaAbiVersion3(getGlobalSTI())) {
std::string ExpectedTarget;
raw_string_ostream ExpectedTargetOS(ExpectedTarget);
IsaInfo::streamIsaVersion(getGlobalSTI(), ExpectedTargetOS);
getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
}
// TODO: Which one is called first, emitStartOfAsmFile or
// emitFunctionBodyStart?
if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
initializeTargetID(M);
if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
TM.getTargetTriple().getOS() != Triple::AMDPAL)
return;
if (isHsaAbiVersion3Or4(getGlobalSTI()))
getTargetStreamer()->EmitDirectiveAMDGCNTarget();
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
HSAMetadataStream->begin(M);
HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
getTargetStreamer()->getPALMetadata()->readFromIR(M);
if (isHsaAbiVersion3(getGlobalSTI()))
if (isHsaAbiVersion3Or4(getGlobalSTI()))
return;
// HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
// HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
// HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU());
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
getTargetStreamer()->EmitDirectiveHSACodeObjectISAV2(
Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
@ -159,15 +161,11 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
return;
if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
isHsaAbiVersion2(getGlobalSTI())) {
// Emit ISA Version (NT_AMD_AMDGPU_ISA).
std::string ISAVersionString;
raw_string_ostream ISAVersionStream(ISAVersionString);
IsaInfo::streamIsaVersion(getGlobalSTI(), ISAVersionStream);
getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
}
isHsaAbiVersion2(getGlobalSTI()))
getTargetStreamer()->EmitISAVersion();
// Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
// Emit HSA Metadata (NT_AMD_HSA_METADATA).
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
HSAMetadataStream->end();
bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
@ -192,11 +190,37 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
const Function &F = MF->getFunction();
// TODO: Which one is called first, emitStartOfAsmFile or
// emitFunctionBodyStart?
if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
initializeTargetID(*F.getParent());
const auto &FunctionTargetID = STM.getTargetID();
// Make sure function's xnack settings are compatible with module's
// xnack settings.
if (FunctionTargetID.isXnackSupported() &&
FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) {
OutContext.reportError({}, "xnack setting of '" + Twine(MF->getName()) +
"' function does not match module xnack setting");
return;
}
// Make sure function's sramecc settings are compatible with module's
// sramecc settings.
if (FunctionTargetID.isSramEccSupported() &&
FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) {
OutContext.reportError({}, "sramecc setting of '" + Twine(MF->getName()) +
"' function does not match module sramecc setting");
return;
}
if (!MFI.isEntryFunction())
return;
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
const Function &F = MF->getFunction();
if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
@ -232,26 +256,25 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
if (ReadOnlySection.getAlignment() < 64)
ReadOnlySection.setAlignment(Align(64));
const MCSubtargetInfo &STI = MF->getSubtarget();
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
SmallString<128> KernelName;
getNameWithPrefix(KernelName, &MF->getFunction());
getTargetStreamer()->EmitAmdhsaKernelDescriptor(
STI, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
IsaInfo::getNumExtraSGPRs(&STI,
IsaInfo::getNumExtraSGPRs(&STM,
CurrentProgramInfo.VCCUsed,
CurrentProgramInfo.FlatUsed),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
hasXNACK(STI));
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
Streamer.PopSection();
}
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
isHsaAbiVersion3(getGlobalSTI())) {
isHsaAbiVersion3Or4(getGlobalSTI())) {
AsmPrinter::emitFunctionEntryLabel();
return;
}
@ -401,6 +424,8 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
const MachineFunction &MF,
const SIProgramInfo &PI) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
amdhsa::kernel_descriptor_t KernelDescriptor;
memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
@ -410,6 +435,10 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
Align MaxKernArgAlign;
KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
@ -598,6 +627,36 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return false;
}
// TODO: Fold this into emitFunctionBodyStart.
void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
// In the beginning all features are either 'Any' or 'NotSupported',
// depending on global target features. This will cover empty modules.
getTargetStreamer()->initializeTargetID(
*getGlobalSTI(), getGlobalSTI()->getFeatureString());
// If module is empty, we are done.
if (M.empty())
return;
// If module is not empty, need to find first 'Off' or 'On' feature
// setting per feature from functions in module.
for (auto &F : M) {
auto &TSTargetID = getTargetStreamer()->getTargetID();
if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
(!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
break;
const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();
if (TSTargetID->isXnackSupported())
if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());
if (TSTargetID->isSramEccSupported())
if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
}
}
uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = STM.getInstrInfo();
@ -632,8 +691,8 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
const GCNSubtarget &ST) const {
return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
UsesVCC, UsesFlatScratch);
return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(
&ST, UsesVCC, UsesFlatScratch, ST.getTargetID().isXnackOnOrAny());
}
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(

View File

@ -56,6 +56,8 @@ private:
int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
};
void initializeTargetID(const Module &M);
SIProgramInfo CurrentProgramInfo;
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;

View File

@ -226,8 +226,8 @@ MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
void MetadataStreamerV2::emitVersion() {
auto &Version = HSAMetadata.mVersion;
Version.push_back(VersionMajor);
Version.push_back(VersionMinor);
Version.push_back(VersionMajorV2);
Version.push_back(VersionMinorV2);
}
void MetadataStreamerV2::emitPrintf(const Module &Mod) {
@ -435,7 +435,8 @@ bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
}
void MetadataStreamerV2::begin(const Module &Mod) {
void MetadataStreamerV2::begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) {
emitVersion();
emitPrintf(Mod);
}
@ -608,8 +609,8 @@ MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
void MetadataStreamerV3::emitVersion() {
auto Version = HSAMetadataDoc->getArrayNode();
Version.push_back(Version.getDocument()->getNode(VersionMajor));
Version.push_back(Version.getDocument()->getNode(VersionMinor));
Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
getRootMetadata("amdhsa.version") = Version;
}
@ -881,7 +882,8 @@ bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
}
void MetadataStreamerV3::begin(const Module &Mod) {
void MetadataStreamerV3::begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) {
emitVersion();
emitPrintf(Mod);
getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
@ -921,6 +923,30 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
Kernels.push_back(Kern);
}
//===----------------------------------------------------------------------===//
// HSAMetadataStreamerV4
//===----------------------------------------------------------------------===//
void MetadataStreamerV4::emitVersion() {
auto Version = HSAMetadataDoc->getArrayNode();
Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
getRootMetadata("amdhsa.version") = Version;
}
void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) {
getRootMetadata("amdhsa.target") =
HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
}
void MetadataStreamerV4::begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) {
emitVersion();
emitTargetID(TargetID);
emitPrintf(Mod);
getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
}
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm

View File

@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/Alignment.h"
@ -40,7 +41,8 @@ public:
virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0;
virtual void begin(const Module &Mod) = 0;
virtual void begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) = 0;
virtual void end() = 0;
@ -48,8 +50,9 @@ public:
const SIProgramInfo &ProgramInfo) = 0;
};
class MetadataStreamerV3 final : public MetadataStreamer {
private:
// TODO: Rename MetadataStreamerV3 -> MetadataStreamerMsgPackV3.
class MetadataStreamerV3 : public MetadataStreamer {
protected:
std::unique_ptr<msgpack::Document> HSAMetadataDoc =
std::make_unique<msgpack::Document>();
@ -108,7 +111,8 @@ public:
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
void begin(const Module &Mod) override;
void begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) override;
void end() override;
@ -116,6 +120,21 @@ public:
const SIProgramInfo &ProgramInfo) override;
};
// TODO: Rename MetadataStreamerV4 -> MetadataStreamerMsgPackV4.
class MetadataStreamerV4 final : public MetadataStreamerV3 {
void emitVersion();
void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
public:
MetadataStreamerV4() = default;
~MetadataStreamerV4() = default;
void begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) override;
};
// TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2.
class MetadataStreamerV2 final : public MetadataStreamer {
private:
Metadata HSAMetadata;
@ -172,7 +191,8 @@ public:
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
void begin(const Module &Mod) override;
void begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) override;
void end() override;

View File

@ -18,7 +18,9 @@
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@ -4537,27 +4539,55 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(
bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
// Is non-HSA path or trap-handler disabled? then, insert s_endpgm instruction
if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
!ST.isTrapHandlerEnabled()) {
B.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
} else {
// Pass queue pointer to trap handler as input, and insert trap instruction
// Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
MachineRegisterInfo &MRI = *B.getMRI();
if (!ST.isTrapHandlerEnabled() ||
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return legalizeTrapEndpgm(MI, MRI, B);
Register LiveIn =
MRI.createGenericVirtualRegister(LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
return false;
Register SGPR01(AMDGPU::SGPR0_SGPR1);
B.buildCopy(SGPR01, LiveIn);
B.buildInstr(AMDGPU::S_TRAP)
.addImm(GCNSubtarget::TrapIDLLVMTrap)
.addReg(SGPR01, RegState::Implicit);
if (Optional<uint8_t> HsaAbiVer = AMDGPU::getHsaAbiVersion(&ST)) {
switch (*HsaAbiVer) {
case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return legalizeTrapHsaQueuePtr(MI, MRI, B);
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
return ST.supportsGetDoorbellID() ?
legalizeTrapHsa(MI, MRI, B) :
legalizeTrapHsaQueuePtr(MI, MRI, B);
}
}
llvm_unreachable("Unknown trap handler");
}
bool AMDGPULegalizerInfo::legalizeTrapEndpgm(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
B.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
MI.eraseFromParent();
return true;
}
bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
// Pass queue pointer to trap handler as input, and insert trap instruction
// Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
Register LiveIn =
MRI.createGenericVirtualRegister(LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
return false;
Register SGPR01(AMDGPU::SGPR0_SGPR1);
B.buildCopy(SGPR01, LiveIn);
B.buildInstr(AMDGPU::S_TRAP)
.addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSATrap))
.addReg(SGPR01, RegState::Implicit);
MI.eraseFromParent();
return true;
}
bool AMDGPULegalizerInfo::legalizeTrapHsa(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
B.buildInstr(AMDGPU::S_TRAP)
.addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSATrap));
MI.eraseFromParent();
return true;
}
@ -4566,8 +4596,8 @@ bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
// Is non-HSA path or trap-handler disabled? then, report a warning
// accordingly
if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
!ST.isTrapHandlerEnabled()) {
if (!ST.isTrapHandlerEnabled() ||
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
DiagnosticInfoUnsupported NoTrap(B.getMF().getFunction(),
"debugtrap handler not supported",
MI.getDebugLoc(), DS_Warning);
@ -4575,7 +4605,8 @@ bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic(
Ctx.diagnose(NoTrap);
} else {
// Insert debug-trap instruction
B.buildInstr(AMDGPU::S_TRAP).addImm(GCNSubtarget::TrapIDLLVMDebugTrap);
B.buildInstr(AMDGPU::S_TRAP)
.addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSADebugTrap));
}
MI.eraseFromParent();

View File

@ -183,6 +183,12 @@ public:
bool legalizeTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeTrapEndpgm(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeTrapHsaQueuePtr(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeTrapHsa(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeDebugTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;

View File

@ -26,22 +26,6 @@ const char SectionName[] = ".note";
const char NoteNameV2[] = "AMD";
const char NoteNameV3[] = "AMDGPU";
// TODO: Remove this file once we drop code object v2.
enum NoteType{
NT_AMDGPU_HSA_RESERVED_0 = 0,
NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
NT_AMDGPU_HSA_HSAIL = 2,
NT_AMDGPU_HSA_ISA = 3,
NT_AMDGPU_HSA_PRODUCER = 4,
NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
NT_AMDGPU_HSA_EXTENSION = 6,
NT_AMDGPU_HSA_RESERVED_7 = 7,
NT_AMDGPU_HSA_RESERVED_8 = 8,
NT_AMDGPU_HSA_RESERVED_9 = 9,
NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
};
} // End namespace ElfNote
} // End namespace AMDGPU
} // End namespace llvm

View File

@ -1212,7 +1212,8 @@ private:
bool ParseDirectiveHSACodeObjectISA();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
// TODO: Possibly make subtargetHasRegister const.
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
bool ParseDirectiveAMDGPUHsaKernel();
bool ParseDirectiveISAVersion();
@ -1291,7 +1292,7 @@ public:
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
@ -1308,7 +1309,7 @@ public:
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
initializeGprCountSymbol(IS_VGPR);
initializeGprCountSymbol(IS_SGPR);
} else
@ -1316,10 +1317,6 @@ public:
}
}
bool hasXNACK() const {
return AMDGPU::hasXNACK(getSTI());
}
bool hasMIMG_R128() const {
return AMDGPU::hasMIMG_R128(getSTI());
}
@ -1580,6 +1577,8 @@ private:
void lex();
public:
void onBeginOfFile() override;
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
@ -2711,7 +2710,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
return nullptr;
}
if (isHsaAbiVersion3(&getSTI())) {
if (isHsaAbiVersion3Or4(&getSTI())) {
if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
return nullptr;
} else
@ -4386,21 +4385,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
return TokError("directive only supported for amdgcn architecture");
std::string Target;
SMLoc TargetStart = getLoc();
if (getParser().parseEscapedString(Target))
std::string TargetIDDirective;
SMLoc TargetStart = getTok().getLoc();
if (getParser().parseEscapedString(TargetIDDirective))
return true;
SMRange TargetRange = SMRange(TargetStart, getLoc());
std::string ExpectedTarget;
raw_string_ostream ExpectedTargetOS(ExpectedTarget);
IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
return getParser().Error(TargetRange.Start,
(Twine(".amdgcn_target directive's target id ") +
Twine(TargetIDDirective) +
Twine(" does not match the specified target id ") +
Twine(getTargetStreamer().getTargetID()->toString())).str());
if (Target != ExpectedTargetOS.str())
return Error(TargetRange.Start, "target must match options", TargetRange);
getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
return false;
}
@ -4473,7 +4470,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
unsigned UserSGPRCount = 0;
bool ReserveVCC = true;
bool ReserveFlatScr = true;
bool ReserveXNACK = hasXNACK();
Optional<bool> EnableWavefrontSize32;
while (true) {
@ -4516,6 +4512,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
KD.private_segment_fixed_size = Val;
} else if (ID == ".amdhsa_kernarg_size") {
if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
KD.kernarg_size = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
@ -4615,7 +4615,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start, "directive requires gfx8+", IDRange);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveXNACK = Val;
if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
IDRange);
} else if (ID == ".amdhsa_float_round_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
@ -4706,7 +4708,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
unsigned VGPRBlocks;
unsigned SGPRBlocks;
if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
getTargetStreamer().getTargetID()->isXnackOnOrAny(),
EnableWavefrontSize32, NextFreeVGPR,
VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
SGPRBlocks))
return true;
@ -4743,7 +4746,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
getTargetStreamer().EmitAmdhsaKernelDescriptor(
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
ReserveFlatScr, ReserveXNACK);
ReserveFlatScr);
return false;
}
@ -4769,9 +4772,9 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
// targeted GPU.
if (isToken(AsmToken::EndOfStatement)) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
ISA.Stepping,
"AMD", "AMDGPU");
getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
ISA.Stepping,
"AMD", "AMDGPU");
return false;
}
@ -4796,8 +4799,8 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
if (!parseString(ArchName, "invalid arch name"))
return true;
getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
VendorName, ArchName);
getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
VendorName, ArchName);
return false;
}
@ -4906,19 +4909,11 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
"architectures");
}
auto ISAVersionStringFromASM = getToken().getStringContents();
auto TargetIDDirective = getLexer().getTok().getStringContents();
if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
return Error(getParser().getTok().getLoc(), "target id must match options");
std::string ISAVersionStringFromSTI;
raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
return Error(getLoc(),
".amd_amdgpu_isa directive does not match triple and/or mcpu "
"arguments specified through the command line");
}
getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
getTargetStreamer().EmitISAVersion();
Lex();
return false;
@ -4928,7 +4923,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
const char *AssemblerDirectiveBegin;
const char *AssemblerDirectiveEnd;
std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
isHsaAbiVersion3(&getSTI())
isHsaAbiVersion3Or4(&getSTI())
? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
HSAMD::V3::AssemblerDirectiveEnd)
: std::make_tuple(HSAMD::AssemblerDirectiveBegin,
@ -4945,7 +4940,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
HSAMetadataString))
return true;
if (isHsaAbiVersion3(&getSTI())) {
if (isHsaAbiVersion3Or4(&getSTI())) {
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
return Error(getLoc(), "invalid HSA metadata");
} else {
@ -5095,12 +5090,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
if (isHsaAbiVersion3(&getSTI())) {
if (IDVal == ".amdgcn_target")
return ParseDirectiveAMDGCNTarget();
if (isHsaAbiVersion3Or4(&getSTI())) {
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();
return ParseDirectiveAMDHSAKernel();
// TODO: Restructure/combine with PAL metadata directive.
if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
@ -5125,6 +5117,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
return ParseDirectiveHSAMetadata();
}
if (IDVal == ".amdgcn_target")
return ParseDirectiveAMDGCNTarget();
if (IDVal == ".amdgpu_lds")
return ParseDirectiveAMDGPULDS();
@ -5138,7 +5133,7 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
}
bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
unsigned RegNo) const {
unsigned RegNo) {
for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
R.isValid(); ++R) {
@ -5170,7 +5165,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
case AMDGPU::XNACK_MASK:
case AMDGPU::XNACK_MASK_LO:
case AMDGPU::XNACK_MASK_HI:
return (isVI() || isGFX9()) && hasXNACK();
return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
case AMDGPU::SGPR_NULL:
return isGFX10Plus();
default:
@ -7261,6 +7256,18 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
};
void AMDGPUAsmParser::onBeginOfFile() {
if (!getParser().getStreamer().getTargetStreamer() ||
getSTI().getTargetTriple().getArch() == Triple::r600)
return;
if (!getTargetStreamer().getTargetID())
getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
if (isHsaAbiVersion3Or4(&getSTI()))
getTargetStreamer().EmitDirectiveAMDGCNTarget();
}
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
OperandMatchResultTy res = parseOptionalOpr(Operands);

View File

@ -1620,7 +1620,6 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
uint16_t TwoByteBuffer = 0;
uint32_t FourByteBuffer = 0;
uint64_t EightByteBuffer = 0;
StringRef ReservedBytes;
StringRef Indent = "\t";
@ -1641,11 +1640,19 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
<< FourByteBuffer << '\n';
return MCDisassembler::Success;
case amdhsa::KERNARG_SIZE_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
KdStream << Indent << ".amdhsa_kernarg_size "
<< FourByteBuffer << '\n';
return MCDisassembler::Success;
case amdhsa::RESERVED0_OFFSET:
// 8 reserved bytes, must be 0.
EightByteBuffer = DE.getU64(Cursor);
if (EightByteBuffer) {
return MCDisassembler::Fail;
// 4 reserved bytes, must be 0.
ReservedBytes = DE.getBytes(Cursor, 4);
for (int I = 0; I < 4; ++I) {
if (ReservedBytes[I] != 0) {
return MCDisassembler::Fail;
}
}
return MCDisassembler::Success;

View File

@ -41,24 +41,16 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
using AMDGPUSubtarget::getMaxWavesPerEU;
public:
enum TrapHandlerAbi {
TrapHandlerAbiNone = 0,
TrapHandlerAbiHsa = 1
// Following 2 enums are documented at:
// - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
enum class TrapHandlerAbi {
NONE = 0x00,
AMDHSA = 0x01,
};
enum TrapID {
TrapIDHardwareReserved = 0,
TrapIDHSADebugTrap = 1,
TrapIDLLVMTrap = 2,
TrapIDLLVMDebugTrap = 3,
TrapIDDebugBreakpoint = 7,
TrapIDDebugReserved8 = 8,
TrapIDDebugReservedFE = 0xfe,
TrapIDDebugReservedFF = 0xff
};
enum TrapRegValues {
LLVMTrapHandlerRegValue = 1
enum class TrapID {
LLVMAMDHSATrap = 0x02,
LLVMAMDHSADebugTrap = 0x03,
};
private:
@ -255,6 +247,10 @@ public:
return RegBankInfo.get();
}
const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {
return TargetID;
}
// Nothing implemented, just prevent crashes on use.
const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
return &TSInfo;
@ -388,7 +384,12 @@ public:
}
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;
}
bool supportsGetDoorbellID() const {
// The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
return getGeneration() >= GFX9;
}
/// True if the offset field of DS instructions works as expected. On SI, the

View File

@ -31,6 +31,20 @@ using namespace llvm::AMDGPU;
// AMDGPUTargetStreamer
//===----------------------------------------------------------------------===//
static void convertIsaVersionV2(uint32_t &Major, uint32_t &Minor,
uint32_t &Stepping, bool Sramecc, bool Xnack) {
if (Major == 9 && Minor == 0) {
switch (Stepping) {
case 0:
case 2:
case 4:
case 6:
if (Xnack)
Stepping++;
}
}
}
bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
HSAMD::Metadata HSAMetadata;
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
@ -182,8 +196,8 @@ void AMDGPUTargetAsmStreamer::finish() {
getPALMetadata()->reset();
}
void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
OS << "\t.amdgcn_target \"" << Target << "\"\n";
void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
}
void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
@ -193,15 +207,14 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
}
void
AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
uint32_t Minor,
uint32_t Stepping,
StringRef VendorName,
StringRef ArchName) {
OS << "\t.hsa_code_object_isa " <<
Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
",\"" << VendorName << "\",\"" << ArchName << "\"\n";
AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
uint32_t Minor,
uint32_t Stepping,
StringRef VendorName,
StringRef ArchName) {
convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
OS << "\t.hsa_code_object_isa " << Twine(Major) << "," << Twine(Minor) << ","
<< Twine(Stepping) << ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
}
void
@ -227,8 +240,8 @@ void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
<< Alignment.value() << '\n';
}
bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
return true;
}
@ -285,7 +298,7 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
bool ReserveVCC, bool ReserveFlatScr) {
IsaVersion IVersion = getIsaVersion(STI.getCPU());
OS << "\t.amdhsa_kernel " << KernelName << '\n';
@ -298,6 +311,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
<< '\n';
OS << "\t\t.amdhsa_private_segment_fixed_size "
<< KD.private_segment_fixed_size << '\n';
OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
kernel_code_properties,
@ -358,8 +372,20 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
if (IVersion.Major >= 7 && !ReserveFlatScr)
OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
switch (*HsaAbiVer) {
default:
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
if (getTargetID()->isXnackSupported())
OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
break;
}
}
PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
compute_pgm_rsrc1,
@ -432,23 +458,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
const MCSubtargetInfo &STI)
: AMDGPUTargetStreamer(S), Streamer(S), Os(STI.getTargetTriple().getOS()) {
MCAssembler &MCA = getStreamer().getAssembler();
unsigned EFlags = MCA.getELFHeaderEFlags();
EFlags &= ~ELF::EF_AMDGPU_MACH;
EFlags |= getElfMach(STI.getCPU());
EFlags &= ~ELF::EF_AMDGPU_XNACK;
if (AMDGPU::hasXNACK(STI))
EFlags |= ELF::EF_AMDGPU_XNACK;
EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
if (AMDGPU::hasSRAMECC(STI))
EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
MCA.setELFHeaderEFlags(EFlags);
}
: AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
@ -458,6 +468,9 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
// We use it for emitting the accumulated PAL metadata as a .note record.
// The PAL metadata is reset after it is emitted.
void AMDGPUTargetELFStreamer::finish() {
MCAssembler &MCA = getStreamer().getAssembler();
MCA.setELFHeaderEFlags(getEFlags());
std::string Blob;
const char *Vendor = getPALMetadata()->getVendor();
unsigned Type = getPALMetadata()->getType();
@ -483,7 +496,7 @@ void AMDGPUTargetELFStreamer::EmitNote(
unsigned NoteFlags = 0;
// TODO Apparently, this is currently needed for OpenCL as mentioned in
// https://reviews.llvm.org/D74995
if (Os == Triple::AMDHSA)
if (STI.getTargetTriple().getOS() == Triple::AMDHSA)
NoteFlags = ELF::SHF_ALLOC;
S.PushSection();
@ -499,24 +512,150 @@ void AMDGPUTargetELFStreamer::EmitNote(
S.PopSection();
}
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
unsigned AMDGPUTargetELFStreamer::getEFlags() {
switch (STI.getTargetTriple().getArch()) {
default:
llvm_unreachable("Unsupported Arch");
case Triple::r600:
return getEFlagsR600();
case Triple::amdgcn:
return getEFlagsAMDGCN();
}
}
unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
assert(STI.getTargetTriple().getArch() == Triple::r600);
return getElfMach(STI.getCPU());
}
unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
assert(STI.getTargetTriple().getArch() == Triple::amdgcn);
switch (STI.getTargetTriple().getOS()) {
default:
// TODO: Why are some tests have "mingw" listed as OS?
// llvm_unreachable("Unsupported OS");
case Triple::UnknownOS:
return getEFlagsUnknownOS();
case Triple::AMDHSA:
return getEFlagsAMDHSA();
case Triple::AMDPAL:
return getEFlagsAMDPAL();
case Triple::Mesa3D:
return getEFlagsMesa3D();
}
}
unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
// TODO: Why are some tests have "mingw" listed as OS?
// assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
return getEFlagsV3();
}
unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
assert(STI.getTargetTriple().getOS() == Triple::AMDHSA);
if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
switch (*HsaAbiVer) {
case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return getEFlagsV3();
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
return getEFlagsV4();
}
}
llvm_unreachable("HSA OS ABI Version identification must be defined");
}
unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
return getEFlagsV3();
}
unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
return getEFlagsV3();
}
unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
unsigned EFlagsV3 = 0;
// mach.
EFlagsV3 |= getElfMach(STI.getCPU());
// xnack.
if (getTargetID()->isXnackOnOrAny())
EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
// sramecc.
if (getTargetID()->isSramEccOnOrAny())
EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
return EFlagsV3;
}
unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
unsigned EFlagsV4 = 0;
// mach.
EFlagsV4 |= getElfMach(STI.getCPU());
// xnack.
switch (getTargetID()->getXnackSetting()) {
case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
break;
case AMDGPU::IsaInfo::TargetIDSetting::Any:
EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
break;
case AMDGPU::IsaInfo::TargetIDSetting::Off:
EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
break;
case AMDGPU::IsaInfo::TargetIDSetting::On:
EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
break;
}
// sramecc.
switch (getTargetID()->getSramEccSetting()) {
case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
break;
case AMDGPU::IsaInfo::TargetIDSetting::Any:
EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
break;
case AMDGPU::IsaInfo::TargetIDSetting::Off:
EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
break;
case AMDGPU::IsaInfo::TargetIDSetting::On:
EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
break;
}
return EFlagsV4;
}
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
uint32_t Major, uint32_t Minor) {
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
OS.emitInt32(Major);
OS.emitInt32(Minor);
});
}
void
AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
uint32_t Minor,
uint32_t Stepping,
StringRef VendorName,
StringRef ArchName) {
AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
uint32_t Minor,
uint32_t Stepping,
StringRef VendorName,
StringRef ArchName) {
uint16_t VendorNameSize = VendorName.size() + 1;
uint16_t ArchNameSize = ArchName.size() + 1;
@ -524,8 +663,9 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
VendorNameSize + ArchNameSize;
convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
ELF::NT_AMD_HSA_ISA_VERSION, [&](MCELFStreamer &OS) {
OS.emitInt16(VendorNameSize);
OS.emitInt16(ArchNameSize);
OS.emitInt32(Major);
@ -573,7 +713,7 @@ void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
}
bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
bool AMDGPUTargetELFStreamer::EmitISAVersion() {
// Create two labels to mark the beginning and end of the desc field
// and a MCExpr to calculate the size of the desc field.
auto &Context = getContext();
@ -583,10 +723,10 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME,
[&](MCELFStreamer &OS) {
OS.emitLabel(DescBegin);
OS.emitBytes(IsaVersionString);
OS.emitBytes(getTargetID()->toString());
OS.emitLabel(DescEnd);
});
return true;
@ -634,7 +774,7 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_METADATA,
[&](MCELFStreamer &OS) {
OS.emitLabel(DescBegin);
OS.emitBytes(HSAMetadataString);
@ -672,8 +812,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
bool ReserveXNACK) {
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {
auto &Streamer = getStreamer();
auto &Context = Streamer.getContext();
@ -700,8 +839,11 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
Streamer.emitLabel(KernelDescriptorSymbol);
Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size);
Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size);
Streamer.emitInt32(KernelDescriptor.kernarg_size);
for (uint8_t Res : KernelDescriptor.reserved0)
Streamer.emitInt8(Res);
// FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
// expression being created is:
// (start of kernel code) - (start of kernel descriptor)

View File

@ -9,6 +9,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDGPUPALMetadata.h"
#include "llvm/MC/MCStreamer.h"
@ -39,6 +40,9 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
AMDGPUPALMetadata PALMetadata;
protected:
// TODO: Move HSAMetadataStream to AMDGPUTargetStreamer.
Optional<AMDGPU::IsaInfo::AMDGPUTargetID> TargetID;
MCContext &getContext() const { return Streamer.getContext(); }
public:
@ -46,15 +50,15 @@ public:
AMDGPUPALMetadata *getPALMetadata() { return &PALMetadata; }
virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
virtual void EmitDirectiveAMDGCNTarget() = 0;
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) = 0;
virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
uint32_t Stepping,
StringRef VendorName,
StringRef ArchName) = 0;
virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor,
uint32_t Stepping,
StringRef VendorName,
StringRef ArchName) = 0;
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
@ -64,7 +68,7 @@ public:
Align Alignment) = 0;
/// \returns True on success, false on failure.
virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
virtual bool EmitISAVersion() = 0;
/// \returns True on success, false on failure.
virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString);
@ -90,11 +94,27 @@ public:
virtual void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
bool ReserveXNACK) = 0;
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) = 0;
static StringRef getArchNameFromElfMach(unsigned ElfMach);
static unsigned getElfMach(StringRef GPU);
const Optional<AMDGPU::IsaInfo::AMDGPUTargetID> &getTargetID() const {
return TargetID;
}
Optional<AMDGPU::IsaInfo::AMDGPUTargetID> &getTargetID() {
return TargetID;
}
void initializeTargetID(const MCSubtargetInfo &STI) {
assert(TargetID == None && "TargetID can only be initialized once");
TargetID.emplace(STI);
}
void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) {
initializeTargetID(STI);
assert(getTargetID() != None && "TargetID is None");
getTargetID()->setTargetIDFromFeaturesString(FeatureString);
}
};
class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
@ -104,14 +124,14 @@ public:
void finish() override;
void EmitDirectiveAMDGCNTarget(StringRef Target) override;
void EmitDirectiveAMDGCNTarget() override;
void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) override;
void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
uint32_t Stepping, StringRef VendorName,
StringRef ArchName) override;
void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor,
uint32_t Stepping, StringRef VendorName,
StringRef ArchName) override;
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
@ -120,7 +140,7 @@ public:
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override;
/// \returns True on success, false on failure.
bool EmitISAVersion(StringRef IsaVersionString) override;
bool EmitISAVersion() override;
/// \returns True on success, false on failure.
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override;
@ -134,17 +154,29 @@ public:
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
bool ReserveXNACK) override;
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
};
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
const MCSubtargetInfo &STI;
MCStreamer &Streamer;
Triple::OSType Os;
void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType,
function_ref<void(MCELFStreamer &)> EmitDesc);
unsigned getEFlags();
unsigned getEFlagsR600();
unsigned getEFlagsAMDGCN();
unsigned getEFlagsUnknownOS();
unsigned getEFlagsAMDHSA();
unsigned getEFlagsAMDPAL();
unsigned getEFlagsMesa3D();
unsigned getEFlagsV3();
unsigned getEFlagsV4();
public:
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
@ -152,14 +184,14 @@ public:
void finish() override;
void EmitDirectiveAMDGCNTarget(StringRef Target) override;
void EmitDirectiveAMDGCNTarget() override;
void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
uint32_t Minor) override;
void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
uint32_t Stepping, StringRef VendorName,
StringRef ArchName) override;
void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor,
uint32_t Stepping, StringRef VendorName,
StringRef ArchName) override;
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
@ -168,7 +200,7 @@ public:
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override;
/// \returns True on success, false on failure.
bool EmitISAVersion(StringRef IsaVersionString) override;
bool EmitISAVersion() override;
/// \returns True on success, false on failure.
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override;
@ -182,8 +214,7 @@ public:
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
bool ReserveXNACK) override;
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
};
}

View File

@ -19,6 +19,7 @@
#include "SIRegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@ -5157,12 +5158,35 @@ SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const {
}
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->isTrapHandlerEnabled() ||
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return lowerTrapEndpgm(Op, DAG);
if (Optional<uint8_t> HsaAbiVer = AMDGPU::getHsaAbiVersion(Subtarget)) {
switch (*HsaAbiVer) {
case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return lowerTrapHsaQueuePtr(Op, DAG);
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
return Subtarget->supportsGetDoorbellID() ?
lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG);
}
}
llvm_unreachable("Unknown trap handler");
}
SDValue SITargetLowering::lowerTrapEndpgm(
SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
}
if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
!Subtarget->isTrapHandlerEnabled())
return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
SDValue SITargetLowering::lowerTrapHsaQueuePtr(
SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
@ -5173,22 +5197,37 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
QueuePtr, SDValue());
uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSATrap);
SDValue Ops[] = {
ToReg,
DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMTrap, SL, MVT::i16),
DAG.getTargetConstant(TrapID, SL, MVT::i16),
SGPR01,
ToReg.getValue(1)
};
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
}
SDValue SITargetLowering::lowerTrapHsa(
SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSATrap);
SDValue Ops[] = {
Chain,
DAG.getTargetConstant(TrapID, SL, MVT::i16)
};
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
}
SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
MachineFunction &MF = DAG.getMachineFunction();
if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
!Subtarget->isTrapHandlerEnabled()) {
if (!Subtarget->isTrapHandlerEnabled() ||
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
DiagnosticInfoUnsupported NoTrap(MF.getFunction(),
"debugtrap handler not supported",
Op.getDebugLoc(),
@ -5198,9 +5237,10 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
return Chain;
}
uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSADebugTrap);
SDValue Ops[] = {
Chain,
DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16)
DAG.getTargetConstant(TrapID, SL, MVT::i16)
};
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
}

View File

@ -144,7 +144,11 @@ private:
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTrapEndpgm(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;

View File

@ -1373,11 +1373,6 @@ def DSTOMOD {
int NONE = 0;
}
def TRAPID{
int LLVM_TRAP = 2;
int LLVM_DEBUG_TRAP = 3;
}
def HWREG {
int MODE = 1;
int STATUS = 2;

View File

@ -30,7 +30,8 @@
static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion(
"amdhsa-code-object-version", llvm::cl::Hidden,
llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(3));
llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4),
llvm::cl::ZeroOrMore);
namespace {
@ -96,23 +97,36 @@ Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
return ELF::ELFABIVERSION_AMDGPU_HSA_V2;
case 3:
return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
case 4:
return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
default:
return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
Twine(AmdhsaCodeObjectVersion));
}
}
bool isHsaAbiVersion2(const MCSubtargetInfo *STI) {
if (const auto &&HsaAbiVer = getHsaAbiVersion(STI))
return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
return false;
}
bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
if (const auto &&HsaAbiVer = getHsaAbiVersion(STI))
return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
return false;
}
bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
return false;
}
bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI);
}
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
@ -247,7 +261,8 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
namespace IsaInfo {
AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
: XnackSetting(TargetIDSetting::Any), SramEccSetting(TargetIDSetting::Any) {
: STI(STI), XnackSetting(TargetIDSetting::Any),
SramEccSetting(TargetIDSetting::Any) {
if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
XnackSetting = TargetIDSetting::Unsupported;
if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
@ -334,25 +349,104 @@ void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
}
}
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
auto TargetTriple = STI->getTargetTriple();
auto Version = getIsaVersion(STI->getCPU());
std::string AMDGPUTargetID::toString() const {
std::string StringRep = "";
raw_string_ostream StreamRep(StringRep);
Stream << TargetTriple.getArchName() << '-'
<< TargetTriple.getVendorName() << '-'
<< TargetTriple.getOSName() << '-'
<< TargetTriple.getEnvironmentName() << '-'
<< "gfx"
<< Version.Major
<< Version.Minor
<< hexdigit(Version.Stepping, true);
auto TargetTriple = STI.getTargetTriple();
auto Version = getIsaVersion(STI.getCPU());
if (hasXNACK(*STI))
Stream << "+xnack";
if (hasSRAMECC(*STI))
Stream << "+sramecc";
StreamRep << TargetTriple.getArchName() << '-'
<< TargetTriple.getVendorName() << '-'
<< TargetTriple.getOSName() << '-'
<< TargetTriple.getEnvironmentName() << '-';
Stream.flush();
std::string Processor = "";
// TODO: Following else statement is present here because we used various
// alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
// Remove once all aliases are removed from GCNProcessors.td.
if (Version.Major >= 9)
Processor = STI.getCPU().str();
else
Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
Twine(Version.Stepping))
.str();
std::string Features = "";
if (Optional<uint8_t> HsaAbiVersion = getHsaAbiVersion(&STI)) {
switch (*HsaAbiVersion) {
case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
// Code object V2 only supported specific processors and had fixed
// settings for the XNACK.
if (Processor == "gfx600") {
} else if (Processor == "gfx601") {
} else if (Processor == "gfx602") {
} else if (Processor == "gfx700") {
} else if (Processor == "gfx701") {
} else if (Processor == "gfx702") {
} else if (Processor == "gfx703") {
} else if (Processor == "gfx704") {
} else if (Processor == "gfx705") {
} else if (Processor == "gfx801") {
if (!isXnackOnOrAny())
report_fatal_error(
"AMD GPU code object V2 does not support processor " + Processor +
" without XNACK");
} else if (Processor == "gfx802") {
} else if (Processor == "gfx803") {
} else if (Processor == "gfx805") {
} else if (Processor == "gfx810") {
if (!isXnackOnOrAny())
report_fatal_error(
"AMD GPU code object V2 does not support processor " + Processor +
" without XNACK");
} else if (Processor == "gfx900") {
if (isXnackOnOrAny())
Processor = "gfx901";
} else if (Processor == "gfx902") {
if (isXnackOnOrAny())
Processor = "gfx903";
} else if (Processor == "gfx904") {
if (isXnackOnOrAny())
Processor = "gfx905";
} else if (Processor == "gfx906") {
if (isXnackOnOrAny())
Processor = "gfx907";
} else {
report_fatal_error(
"AMD GPU code object V2 does not support processor " + Processor);
}
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
// xnack.
if (isXnackOnOrAny())
Features += "+xnack";
// In code object v2 and v3, "sramecc" feature was spelled with a
// hyphen ("sram-ecc").
if (isSramEccOnOrAny())
Features += "+sram-ecc";
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
// sramecc.
if (getSramEccSetting() == TargetIDSetting::Off)
Features += ":sramecc-";
else if (getSramEccSetting() == TargetIDSetting::On)
Features += ":sramecc+";
// xnack.
if (getXnackSetting() == TargetIDSetting::Off)
Features += ":xnack-";
else if (getXnackSetting() == TargetIDSetting::On)
Features += ":xnack+";
break;
default:
break;
}
}
StreamRep << Processor << Features;
StreamRep.flush();
return StringRep;
}
unsigned getWavefrontSize(const MCSubtargetInfo *STI) {

View File

@ -44,6 +44,12 @@ bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 3,
/// false otherwise.
bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 4,
/// false otherwise.
bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 3 or 4,
/// false otherwise.
bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI);
struct GcnBufferFormatInfo {
unsigned Format;
@ -78,6 +84,7 @@ enum class TargetIDSetting {
class AMDGPUTargetID {
private:
const MCSubtargetInfo &STI;
TargetIDSetting XnackSetting;
TargetIDSetting SramEccSetting;
@ -145,10 +152,10 @@ public:
void setTargetIDFromFeaturesString(StringRef FS);
void setTargetIDFromTargetIDStream(StringRef TargetID);
};
/// Streams isa version string for given subtarget \p STI into \p Stream.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
/// \returns String representation of an object.
std::string toString() const;
};
/// \returns Wavefront size for given subtarget \p STI.
unsigned getWavefrontSize(const MCSubtargetInfo *STI);

View File

@ -41,7 +41,7 @@ void AMDGPUPALMetadata::readFromIR(Module &M) {
}
return;
}
BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
BlobType = ELF::NT_AMD_PAL_METADATA;
NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
if (!NamedMD || !NamedMD->getNumOperands()) {
// Emit msgpack metadata by default
@ -69,7 +69,7 @@ void AMDGPUPALMetadata::readFromIR(Module &M) {
// Metadata.
bool AMDGPUPALMetadata::setFromBlob(unsigned Type, StringRef Blob) {
BlobType = Type;
if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA)
if (Type == ELF::NT_AMD_PAL_METADATA)
return setFromLegacyBlob(Blob);
return setFromMsgPackBlob(Blob);
}
@ -688,7 +688,7 @@ void AMDGPUPALMetadata::toString(std::string &String) {
// a .note record of the specified AMD type. Returns an empty blob if
// there is no PAL metadata,
void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) {
if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA)
if (Type == ELF::NT_AMD_PAL_METADATA)
toLegacyBlob(Blob);
else if (Type)
toMsgPackBlob(Blob);
@ -825,7 +825,7 @@ const char *AMDGPUPALMetadata::getVendor() const {
}
// Get .note record type of metadata blob to be emitted:
// ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or
// ELF::NT_AMD_PAL_METADATA (legacy key=val format), or
// ELF::NT_AMDGPU_METADATA (MsgPack format), or
// 0 (no PAL metadata).
unsigned AMDGPUPALMetadata::getType() const {
@ -834,12 +834,12 @@ unsigned AMDGPUPALMetadata::getType() const {
// Return whether the blob type is legacy PAL metadata.
bool AMDGPUPALMetadata::isLegacy() const {
return BlobType == ELF::NT_AMD_AMDGPU_PAL_METADATA;
return BlobType == ELF::NT_AMD_PAL_METADATA;
}
// Set legacy PAL metadata format.
void AMDGPUPALMetadata::setLegacy() {
BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
BlobType = ELF::NT_AMD_PAL_METADATA;
}
// Erase all PAL metadata.

View File

@ -95,7 +95,7 @@ public:
const char *getVendor() const;
// Get .note record type of metadata blob to be emitted:
// ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or
// ELF::NT_AMD_PAL_METADATA (legacy key=val format), or
// ELF::NT_AMDGPU_METADATA (MsgPack format), or
// 0 (no PAL metadata).
unsigned getType() const;

View File

@ -24,7 +24,6 @@ define void @func_use_lds_global() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX9-NEXT: s_trap 2
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@ -47,7 +46,6 @@ define void @func_use_lds_global_constexpr_cast() {
; GFX9-LABEL: func_use_lds_global_constexpr_cast:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX9-NEXT: s_trap 2
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.amdgcn.workitem.id.x() #0

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK-LABEL: {{^}}min_64_max_64:
; CHECK: SGPRBlocks: 0

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=HSAMD %s
; CHECK-LABEL: {{^}}min_64_max_64:
@ -129,7 +129,7 @@ define amdgpu_kernel void @min_1024_max_1024() #3 {
}
attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
; HSAMD: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
; HSAMD: NT_AMD_HSA_METADATA (AMD HSA Metadata)
; HSAMD: Version: [ 1, 0 ]
; HSAMD: Kernels:
; HSAMD: - Name: min_64_max_64

View File

@ -1,5 +1,5 @@
# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s
---
# Trivial clause at beginning of program

View File

@ -1,4 +1,4 @@
# RUN: llc -march=amdgcn -mcpu=tonga -run-pass post-RA-sched -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=tonga -mattr=-xnack -run-pass post-RA-sched -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
# GCN: FLAT_LOAD_DWORD
# GCN-NEXT: FLAT_LOAD_DWORD

View File

@ -1,3 +1,90 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx600 < %s | FileCheck --check-prefixes=V3-GFX600 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tahiti < %s | FileCheck --check-prefixes=V3-GFX600 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx601 < %s | FileCheck --check-prefixes=V3-GFX601 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=pitcairn < %s | FileCheck --check-prefixes=V3-GFX601 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=verde < %s | FileCheck --check-prefixes=V3-GFX601 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx602 < %s | FileCheck --check-prefixes=V3-GFX602 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=hainan < %s | FileCheck --check-prefixes=V3-GFX602 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=oland < %s | FileCheck --check-prefixes=V3-GFX602 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx700 < %s | FileCheck --check-prefixes=V3-GFX700 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=kaveri < %s | FileCheck --check-prefixes=V3-GFX700 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx701 < %s | FileCheck --check-prefixes=V3-GFX701 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=hawaii < %s | FileCheck --check-prefixes=V3-GFX701 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx702 < %s | FileCheck --check-prefixes=V3-GFX702 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx703 < %s | FileCheck --check-prefixes=V3-GFX703 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=kabini < %s | FileCheck --check-prefixes=V3-GFX703 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=mullins < %s | FileCheck --check-prefixes=V3-GFX703 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx704 < %s | FileCheck --check-prefixes=V3-GFX704 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=bonaire < %s | FileCheck --check-prefixes=V3-GFX704 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx705 < %s | FileCheck --check-prefixes=V3-GFX705 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx802 < %s | FileCheck --check-prefixes=V3-GFX802 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=iceland < %s | FileCheck --check-prefixes=V3-GFX802 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tonga < %s | FileCheck --check-prefixes=V3-GFX802 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx803 < %s | FileCheck --check-prefixes=V3-GFX803 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=fiji < %s | FileCheck --check-prefixes=V3-GFX803 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=polaris10 < %s | FileCheck --check-prefixes=V3-GFX803 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=polaris11 < %s | FileCheck --check-prefixes=V3-GFX803 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx805 < %s | FileCheck --check-prefixes=V3-GFX805 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tongapro < %s | FileCheck --check-prefixes=V3-GFX805 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX900-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX902-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX904-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX909-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1011-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1033 < %s | FileCheck --check-prefixes=V3-GFX1033 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX600 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=GFX600 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx601 < %s | FileCheck --check-prefixes=GFX601 %s
@ -17,8 +104,12 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX704 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire < %s | FileCheck --check-prefixes=GFX704 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx705 < %s | FileCheck --check-prefixes=GFX705 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 < %s | FileCheck --check-prefixes=GFX801 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo < %s | FileCheck --check-prefixes=GFX801 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefixes=GFX801 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX801-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX801-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo < %s | FileCheck --check-prefixes=GFX801 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=GFX801-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=GFX801-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 < %s | FileCheck --check-prefixes=GFX802 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland < %s | FileCheck --check-prefixes=GFX802 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga < %s | FileCheck --check-prefixes=GFX802 %s
@ -29,23 +120,102 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx805 < %s | FileCheck --check-prefixes=GFX805 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tongapro < %s | FileCheck --check-prefixes=GFX805 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 < %s | FileCheck --check-prefixes=GFX810 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX810-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX810-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney < %s | FileCheck --check-prefixes=GFX810 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=GFX810-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=GFX810-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX900-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX900-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 < %s | FileCheck --check-prefixes=GFX902 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX902-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX902-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 < %s | FileCheck --check-prefixes=GFX904 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX904-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX904-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck --check-prefixes=GFX906 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=GFX906-NOSRAMECC %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=GFX906-SRAMECC %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX906-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX906-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=GFX906-NOSRAMECC-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=GFX906-SRAMECC-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=GFX906-NOSRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=GFX906-SRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX908 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=GFX908-NOSRAMECC %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=GFX908-SRAMECC %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX908-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX908-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=GFX908-NOSRAMECC-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=GFX908-SRAMECC-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=GFX908-NOSRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=GFX908-SRAMECC-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 < %s | FileCheck --check-prefixes=GFX909 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX909-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX909-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=GFX90C %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=GFX90C-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=GFX90C-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 < %s | FileCheck --check-prefixes=GFX1011 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1011-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1011-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 < %s | FileCheck --check-prefixes=GFX1012 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1012-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1012-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck --check-prefixes=GFX1030 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX1031 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1032 < %s | FileCheck --check-prefixes=GFX1032 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1033 < %s | FileCheck --check-prefixes=GFX1033 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=XNACK-GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=NO-XNACK-GFX902 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+sramecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX904 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX904 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s
; FIXME: With the default attributes these directives are not accurate for
; xnack and sramecc. Subsequent Target-ID patches will address this.
; V3-GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
; V3-GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
; V3-GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602"
; V3-GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
; V3-GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701"
; V3-GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702"
; V3-GFX703: .amdgcn_target "amdgcn-amd-amdhsa--gfx703"
; V3-GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704"
; V3-GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705"
; V3-GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801"
; V3-GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801+xnack"
; V3-GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802"
; V3-GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803"
; V3-GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805"
; V3-GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810"
; V3-GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack"
; V3-GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
; V3-GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack"
; V3-GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"
; V3-GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack"
; V3-GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904"
; V3-GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
; V3-GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906"
; V3-GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc"
; V3-GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack"
; V3-GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc"
; V3-GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908"
; V3-GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+sram-ecc"
; V3-GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack"
; V3-GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack+sram-ecc"
; V3-GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909"
; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack"
; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack"
; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011"
; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack"
; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012"
; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack"
; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030"
; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031"
; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032"
; V3-GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033"
; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
@ -57,23 +227,60 @@
; GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704"
; GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705"
; GFX801: .amdgcn_target "amdgcn-amd-amdhsa--gfx801"
; GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801:xnack-"
; GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801:xnack+"
; GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802"
; GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803"
; GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805"
; GFX810: .amdgcn_target "amdgcn-amd-amdhsa--gfx810"
; GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack-"
; GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+"
; GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
; GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"
; GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack-"
; GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack+"
; GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904"
; GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack-"
; GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906"
; XNACK-GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack"
; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"
; SRAM-ECC-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+sramecc"
; SRAM-ECC-GFX906: "amdgcn-amd-amdhsa--gfx906+sramecc"
; SRAM-ECC-XNACK-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack+sramecc"
; SRAM-ECC-XNACK-GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sramecc"
; GFX906-NOSRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-"
; GFX906-SRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+"
; GFX906-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:xnack-"
; GFX906-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:xnack+"
; GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-:xnack-"
; GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+:xnack-"
; GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-:xnack+"
; GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+:xnack+"
; GFX908: .amdgcn_target "amdgcn-amd-amdhsa--gfx908"
; GFX908-NOSRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-"
; GFX908-SRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+"
; GFX908-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:xnack-"
; GFX908-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:xnack+"
; GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-:xnack-"
; GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+:xnack-"
; GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-:xnack+"
; GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+:xnack+"
; GFX909: .amdgcn_target "amdgcn-amd-amdhsa--gfx909"
; GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909:xnack-"
; GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909:xnack+"
; GFX90C: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
; GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack-"
; GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack+"
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"
; GFX1011: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011"
; GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011:xnack-"
; GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011:xnack+"
; GFX1012: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012"
; GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack-"
; GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack+"
; GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030"
; GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031"
; GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032"
; GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033"
define amdgpu_kernel void @directive_amdgcn_target() {
ret void

View File

@ -1,30 +1,41 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=-sramecc < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sramecc < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx908 < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=SRAM-ECC-GFX908 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx908 -mattr=+sramecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=SRAM-ECC-GFX908 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s
; NO-SRAM-ECC-GFX906: Flags [
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; NO-SRAM-ECC-GFX906-NEXT: ]
; SRAM-ECC-GFX906: Flags [
; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-GFX906-NEXT: ]
; SRAM-ECC-XNACK-GFX906: Flags [
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_XNACK (0x100)
; SRAM-ECC-XNACK-GFX906-NEXT: ]
; SRAM-ECC-GFX908: Flags [ (0x230)
; SRAM-ECC-GFX908: Flags [
; SRAM-ECC-GFX908: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
; SRAM-ECC-GFX908: EF_AMDGPU_MACH_AMDGCN_GFX908 (0x30)
; SRAM-ECC-GFX908: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-GFX908: ]
; SRAM-ECC-GFX90A: Flags [
; SRAM-ECC-GFX90A: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
; SRAM-ECC-GFX90A: EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)
; SRAM-ECC-GFX90A: ]
define amdgpu_kernel void @elf_header() {
ret void
}

View File

@ -1,14 +1,16 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 -mattr=-xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-XNACK-GFX801 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 -mattr=+xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=XNACK-GFX802 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=XNACK-GFX801 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx801 -mattr=+xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=XNACK-GFX801 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-XNACK-GFX802 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx802 -mattr=-xnack < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=NO-XNACK-GFX802 %s
; NO-XNACK-GFX801: Flags [
; NO-XNACK-GFX801-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX801 (0x28)
; NO-XNACK-GFX801-NEXT: ]
; XNACK-GFX801: Flags [
; XNACK-GFX801-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; XNACK-GFX801-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX801 (0x28)
; XNACK-GFX801-NEXT: ]
; XNACK-GFX802: Flags [
; XNACK-GFX802-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX802 (0x29)
; XNACK-GFX802-NEXT: EF_AMDGPU_XNACK (0x100)
; XNACK-GFX802-NEXT: ]
; NO-XNACK-GFX802: Flags [
; NO-XNACK-GFX802-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX802 (0x29)
; NO-XNACK-GFX802-NEXT: ]
define amdgpu_kernel void @elf_header() {
ret void

View File

@ -13,11 +13,11 @@
; NONE: OS/ABI: SystemV (0x0)
; HSA: OS/ABI: AMDGPU_HSA (0x40)
; HSA: ABIVersion: 1
; HSA: ABIVersion: 2
; PAL: OS/ABI: AMDGPU_PAL (0x41)
; PAL: ABIVersion: 0
; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
; MESA3D: ABIVersion: 0
; MESA3D: ABIVersion: 0
define amdgpu_kernel void @elf_header() {
ret void

View File

@ -16,13 +16,13 @@
; OSABI-UNK-NOT: .amd_amdgpu_pal_metadata
; OSABI-UNK-ELF-NOT: Unknown note type
; OSABI-UNK-ELF: NT_AMD_AMDGPU_ISA (ISA Version)
; OSABI-UNK-ELF: ISA Version:
; OSABI-UNK-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
; OSABI-UNK-ELF: AMD HSA ISA Name:
; OSABI-UNK-ELF: amdgcn-amd-unknown--gfx802
; OSABI-UNK-ELF-NOT: Unknown note type
; OSABI-UNK-ELF-NOT: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
; OSABI-UNK-ELF-NOT: NT_AMD_HSA_METADATA (AMD HSA Metadata)
; OSABI-UNK-ELF-NOT: Unknown note type
; OSABI-UNK-ELF-NOT: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)
; OSABI-UNK-ELF-NOT: NT_AMD_PAL_METADATA (AMD PAL Metadata)
; OSABI-UNK-ELF-NOT: Unknown note type
; OSABI-HSA: .hsa_code_object_version
@ -31,12 +31,12 @@
; OSABI-HSA: .amd_amdgpu_hsa_metadata
; OSABI-HSA-NOT: .amd_amdgpu_pal_metadata
; OSABI-HSA-ELF: Unknown note type: (0x00000001)
; OSABI-HSA-ELF: Unknown note type: (0x00000003)
; OSABI-HSA-ELF: NT_AMD_AMDGPU_ISA (ISA Version)
; OSABI-HSA-ELF: ISA Version:
; OSABI-HSA-ELF: NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)
; OSABI-HSA-ELF: NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version)
; OSABI-HSA-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
; OSABI-HSA-ELF: AMD HSA ISA Name:
; OSABI-HSA-ELF: amdgcn-amd-amdhsa--gfx802
; OSABI-HSA-ELF: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
; OSABI-HSA-ELF: NT_AMD_HSA_METADATA (AMD HSA Metadata)
; OSABI-HSA-ELF: HSA Metadata:
; OSABI-HSA-ELF: ---
; OSABI-HSA-ELF: Version: [ 1, 0 ]
@ -51,18 +51,18 @@
; OSABI-HSA-ELF: WavefrontSize: 64
; OSABI-HSA-ELF: NumSGPRs: 96
; OSABI-HSA-ELF: ...
; OSABI-HSA-ELF-NOT: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)
; OSABI-HSA-ELF-NOT: NT_AMD_PAL_METADATA (AMD PAL Metadata)
; OSABI-PAL-NOT: .hsa_code_object_version
; OSABI-PAL: .hsa_code_object_isa
; OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802"
; OSABI-PAL-NOT: .amd_amdgpu_hsa_metadata
; OSABI-PAL-ELF: Unknown note type: (0x00000003)
; OSABI-PAL-ELF: NT_AMD_AMDGPU_ISA (ISA Version)
; OSABI-PAL-ELF: ISA Version:
; OSABI-PAL-ELF: NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version)
; OSABI-PAL-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
; OSABI-PAL-ELF: AMD HSA ISA Name:
; OSABI-PAL-ELF: amdgcn-amd-amdpal--gfx802
; OSABI-PAL-ELF-NOT: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
; OSABI-PAL-ELF-NOT: NT_AMD_HSA_METADATA (AMD HSA Metadata)
; OSABI-PAL-ELF: NT_AMDGPU_METADATA (AMDGPU Metadata)
; OSABI-PAL-ELF: AMDGPU Metadata:
; OSABI-PAL-ELF: amdpal.pipelines:

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s

View File

@ -1,20 +1,27 @@
; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-CI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-VI-NOXNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-VI-XNACK -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,HSA-CI-V2,GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK-V2,GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s
; GCN-LABEL: {{^}}no_vcc_no_flat:
; HSA-CI: is_xnack_enabled = 0
; HSA-VI-NOXNACK: is_xnack_enabled = 0
; HSA-VI-XNACK: is_xnack_enabled = 1
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: ; NumSgprs: 8
; VI-NOXNACK: ; NumSgprs: 8
@ -26,9 +33,13 @@ entry:
}
; GCN-LABEL: {{^}}vcc_no_flat:
; HSA-CI: is_xnack_enabled = 0
; HSA-VI-NOXNACK: is_xnack_enabled = 0
; HSA-VI-XNACK: is_xnack_enabled = 1
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: ; NumSgprs: 10
; VI-NOXNACK: ; NumSgprs: 10
@ -40,16 +51,17 @@ entry:
}
; GCN-LABEL: {{^}}no_vcc_flat:
; HSA-CI: is_xnack_enabled = 0
; HSA-VI-NOXNACK: is_xnack_enabled = 0
; HSA-VI-XNACK: is_xnack_enabled = 1
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: ; NumSgprs: 12
; VI-NOXNACK: ; NumSgprs: 14
; VI-XNACK: ; NumSgprs: 14
; HSA-CI: ; NumSgprs: 12
; HSA-VI-NOXNACK: ; NumSgprs: 14
; HSA-VI-XNACK: ; NumSgprs: 14
define amdgpu_kernel void @no_vcc_flat() {
entry:
call void asm sideeffect "", "~{s7},~{flat_scratch}"()
@ -57,15 +69,17 @@ entry:
}
; GCN-LABEL: {{^}}vcc_flat:
; HSA-NOXNACK: is_xnack_enabled = 0
; HSA-XNACK: is_xnack_enabled = 1
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: ; NumSgprs: 12
; VI-NOXNACK: ; NumSgprs: 14
; VI-XNACK: ; NumSgprs: 14
; HSA-CI: ; NumSgprs: 12
; HSA-VI-NOXNACK: ; NumSgprs: 14
; HSA-VI-XNACK: ; NumSgprs: 14
define amdgpu_kernel void @vcc_flat() {
entry:
call void asm sideeffect "", "~{s7},~{vcc},~{flat_scratch}"()
@ -76,6 +90,14 @@ entry:
; scratch usage and implicit flat uses.
; GCN-LABEL: {{^}}use_flat_scr:
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: NumSgprs: 4
; VI-NOXNACK: NumSgprs: 6
; VI-XNACK: NumSgprs: 6
@ -86,6 +108,14 @@ entry:
}
; GCN-LABEL: {{^}}use_flat_scr_lo:
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: NumSgprs: 4
; VI-NOXNACK: NumSgprs: 6
; VI-XNACK: NumSgprs: 6
@ -96,6 +126,14 @@ entry:
}
; GCN-LABEL: {{^}}use_flat_scr_hi:
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
; CI: NumSgprs: 4
; VI-NOXNACK: NumSgprs: 6
; VI-XNACK: NumSgprs: 6

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: amdhsa.kernels:

View File

@ -1,9 +1,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
%struct.A = type { i8, float }
%opencl.image1d_t = type opaque

View File

@ -1,6 +1,6 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; CHECK: ---
; CHECK: amdhsa.kernels:

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: amdhsa.kernels:

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: amdhsa.kernels:

View File

@ -1,6 +1,6 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
%opencl.image1d_t = type opaque
%opencl.image1d_array_t = type opaque

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; Make sure llc does not crash for invalid opencl version metadata.

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; Make sure llc does not crash for invalid opencl version metadata.

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; Make sure llc does not crash for invalid opencl version metadata.

View File

@ -1,13 +1,10 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s
; GCN: ---
; GCN: Kernels:
; GCN: - Name: wavefrontsize
; GCN: CodeProps:
; GFX10-32: WavefrontSize: 32
; GFX10-64: WavefrontSize: 64
; GCN: ...
; GCN: amdhsa.kernels:
; GCN: .name: wavefrontsize
; GFX10-32: .wavefront_size: 32
; GFX10-64: .wavefront_size: 64
define amdgpu_kernel void @wavefrontsize() {
entry:
ret void

View File

@ -1,60 +1,59 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx600 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx601 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX904 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx908 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX908 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx909 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX909 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1010 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1011 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1011 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1012 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1012 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1030 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1030 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1031 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1031 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1032 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1032 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1033 --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1033 %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx600 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI600 %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx601 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI601 %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx602 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI602 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI702 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI705 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX900 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX901 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX902 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX903 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX904 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX905 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX906 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX907 %s
; HSA: .hsa_code_object_version 2,1
; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
; HSA-SI601: .hsa_code_object_isa 6,0,1,"AMD","AMDGPU"
; NONHSA-SI600: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx600"
; NONHSA-SI601: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx601"
; NONHSA-SI602: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx602"
; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU"
; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU"
; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU"
; HSA-CI704: .hsa_code_object_isa 7,0,4,"AMD","AMDGPU"
; HSA-CI705: .hsa_code_object_isa 7,0,5,"AMD","AMDGPU"
; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU"
; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
; HSA-VI805: .hsa_code_object_isa 8,0,5,"AMD","AMDGPU"
; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU"
; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU"
; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU"
; HSA-GFX905: .hsa_code_object_isa 9,0,5,"AMD","AMDGPU"
; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU"
; HSA-GFX908: .hsa_code_object_isa 9,0,8,"AMD","AMDGPU"
; HSA-GFX909: .hsa_code_object_isa 9,0,9,"AMD","AMDGPU"
; HSA-GFX1010: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
; HSA-GFX1011: .hsa_code_object_isa 10,1,1,"AMD","AMDGPU"
; HSA-GFX1012: .hsa_code_object_isa 10,1,2,"AMD","AMDGPU"
; HSA-GFX1030: .hsa_code_object_isa 10,3,0,"AMD","AMDGPU"
; HSA-GFX1031: .hsa_code_object_isa 10,3,1,"AMD","AMDGPU"
; HSA-GFX1032: .hsa_code_object_isa 10,3,2,"AMD","AMDGPU"
; HSA-GFX1033: .hsa_code_object_isa 10,3,3,"AMD","AMDGPU"
; HSA-GFX907: .hsa_code_object_isa 9,0,7,"AMD","AMDGPU"

View File

@ -4,8 +4,8 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj --amdhsa-code-object-version=2 | llvm-readobj -symbols -s -sd - | FileCheck --check-prefix=ELF %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
; The SHT_NOTE section contains the output from the .hsa_code_object_*
; directives.
@ -49,12 +49,10 @@
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; PRE-GFX10: enable_wavefront_size32 = 0
; GFX10-W32: enable_wavefront_size32 = 1
; GFX10-W64: enable_wavefront_size32 = 0
; GFX10-W32: .amdhsa_wavefront_size32 1
; GFX10-W64: .amdhsa_wavefront_size32 0
; PRE-GFX10: wavefront_size = 6
; GFX10-W32: wavefront_size = 5
; GFX10-W64: wavefront_size = 6
; HSA: call_convention = -1
; HSA: .end_amd_kernel_code_t
@ -66,7 +64,7 @@
; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000
; Make sure we generate flat store for HSA
; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
; GFX10: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
; GFX10: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off
; HSA: .Lfunc_end0:
; HSA: .size simple, .Lfunc_end0-simple

View File

@ -0,0 +1,21 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=HSA %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=HSA %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=HSA %s
declare void @llvm.trap() #0
declare void @llvm.debugtrap() #1
; HSA: .amdhsa_kernel trap
; HSA-NEXT: .amdhsa_group_segment_fixed_size 0
; HSA-NEXT: .amdhsa_private_segment_fixed_size 0
; HSA-NEXT: .amdhsa_kernarg_size 8
; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .end_amdhsa_kernel
define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
store volatile i32 1, i32 addrspace(1)* %arg0
call void @llvm.trap()
unreachable
store volatile i32 2, i32 addrspace(1)* %arg0
ret void
}

View File

@ -1,9 +1,9 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck --check-prefixes=GCN,CI,ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,VI,ALL %s
; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,GFX9,ALL %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s -mattr=-flat-for-global | FileCheck --check-prefixes=GCNHSA,ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,GFX10HSA,ALL %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 < %s -mattr=-flat-for-global | FileCheck --check-prefixes=GCNHSA,ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,GFX10HSA,ALL %s
; FIXME: align on alloca seems to be ignored for private_segment_alignment
@ -19,30 +19,6 @@
; GFX9-DAG: s_mov_b32 s{{[0-9]+}}, 0xe00000
; GCNHSA: .amd_kernel_code_t
; GCNHSA: enable_sgpr_private_segment_wave_byte_offset = 1
; GCNHSA: user_sgpr_count = 8
; GCNHSA: enable_sgpr_workgroup_id_x = 1
; GCNHSA: enable_sgpr_workgroup_id_y = 0
; GCNHSA: enable_sgpr_workgroup_id_z = 0
; GCNHSA: enable_sgpr_workgroup_info = 0
; GCNHSA: enable_vgpr_workitem_id = 0
; GCNHSA: enable_sgpr_private_segment_buffer = 1
; GCNHSA: enable_sgpr_dispatch_ptr = 0
; GCNHSA: enable_sgpr_queue_ptr = 0
; GCNHSA: enable_sgpr_kernarg_segment_ptr = 1
; GCNHSA: enable_sgpr_dispatch_id = 0
; GCNHSA: enable_sgpr_flat_scratch_init = 1
; GCNHSA: enable_sgpr_private_segment_size = 0
; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0
; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0
; GCNHSA: enable_sgpr_grid_workgroup_count_z = 0
; GCNHSA: workitem_private_segment_byte_size = 32772
; GCNHSA: private_segment_alignment = 4
; GCNHSA: .end_amd_kernel_code_t
; GFX10HSA: s_add_u32 [[FLAT_SCR_LO:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}}
; GFX10HSA-DAG: s_addc_u32 [[FLAT_SCR_HI:s[0-9]+]], s{{[0-9]+}}, 0
; GFX10HSA-DAG: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), [[FLAT_SCR_LO]]
@ -51,6 +27,39 @@
; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], 0 offen
; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], 0 offen
; GCNHSA: .amdhsa_kernel large_alloca_compute_shader
; GCNHSA: .amdhsa_group_segment_fixed_size 0
; GCNHSA: .amdhsa_private_segment_fixed_size 32772
; GCNHSA: .amdhsa_user_sgpr_private_segment_buffer 1
; GCNHSA: .amdhsa_user_sgpr_dispatch_ptr 0
; GCNHSA: .amdhsa_user_sgpr_queue_ptr 0
; GCNHSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; GCNHSA: .amdhsa_user_sgpr_dispatch_id 0
; GCNHSA: .amdhsa_user_sgpr_flat_scratch_init 1
; GCNHSA: .amdhsa_user_sgpr_private_segment_size 0
; GCNHSA: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
; GCNHSA: .amdhsa_system_sgpr_workgroup_id_x 1
; GCNHSA: .amdhsa_system_sgpr_workgroup_id_y 0
; GCNHSA: .amdhsa_system_sgpr_workgroup_id_z 0
; GCNHSA: .amdhsa_system_sgpr_workgroup_info 0
; GCNHSA: .amdhsa_system_vgpr_workitem_id 0
; GCNHSA: .amdhsa_next_free_vgpr 3
; GCNHSA: .amdhsa_next_free_sgpr 10
; GCNHSA: .amdhsa_float_round_mode_32 0
; GCNHSA: .amdhsa_float_round_mode_16_64 0
; GCNHSA: .amdhsa_float_denorm_mode_32 3
; GCNHSA: .amdhsa_float_denorm_mode_16_64 3
; GCNHSA: .amdhsa_dx10_clamp 1
; GCNHSA: .amdhsa_ieee_mode 1
; GCNHSA: .amdhsa_exception_fp_ieee_invalid_op 0
; GCNHSA: .amdhsa_exception_fp_denorm_src 0
; GCNHSA: .amdhsa_exception_fp_ieee_div_zero 0
; GCNHSA: .amdhsa_exception_fp_ieee_overflow 0
; GCNHSA: .amdhsa_exception_fp_ieee_underflow 0
; GCNHSA: .amdhsa_exception_fp_ieee_inexact 0
; GCNHSA: .amdhsa_exception_int_div_zero 0
; GCNHSA: .end_amdhsa_kernel
; Scratch size = alloca size + emergency stack slot, align {{.*}}, addrspace(5)
; ALL: ; ScratchSize: 32772
define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {

View File

@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GFX8 %s
; RUN: FileCheck -check-prefix=ERR %s < %t
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-disable-lower-module-lds=true %s 2> %t | FileCheck -check-prefixes=GFX9 %s
; RUN: FileCheck -check-prefix=ERR %s < %t
@lds = internal addrspace(3) global float undef, align 4
@ -25,7 +25,6 @@ define void @func_use_lds_global() {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX9-NEXT: s_trap 2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@ -35,12 +34,18 @@ define void @func_use_lds_global() {
; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
define void @func_use_lds_global_constexpr_cast() {
; GCN-LABEL: func_use_lds_global_constexpr_cast:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[0:1], s[6:7]
; GCN-NEXT: s_trap 2
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX8-LABEL: func_use_lds_global_constexpr_cast:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX8-NEXT: s_trap 2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_use_lds_global_constexpr_cast:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_trap 2
; GFX9-NEXT: s_setpc_b64 s[30:31]
store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4
ret void
}

View File

@ -2,8 +2,6 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=VI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=VI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s
define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
; GFX9-LABEL: s_lshr_v2i16:

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; TODO: Some of those tests fail with OS == amdhsa due to unreasonable register
; allocation differences.

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}s_mulk_i32_k0:
; SI: s_load_dword [[VAL:s[0-9]+]]

View File

@ -1,11 +1,13 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx902 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx904 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck -check-prefixes=GCN,ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; Make sure the correct set of targets are marked with
; FeatureDoesNotSupportSRAMECC, and +sram-ecc is ignored if it's never
; FeatureDoesNotSupportSRAMECC, and +sramecc is ignored if it's never
; supported.
; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg:

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=VI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=GFX9 %s
; Make sure the stack is never realigned for entry functions.
@ -20,6 +20,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
; VI-NEXT: .amdhsa_kernel max_alignment_128
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 256
; VI-NEXT: .amdhsa_kernarg_size 0
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@ -67,6 +68,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
; GFX9-NEXT: .amdhsa_kernel max_alignment_128
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256
; GFX9-NEXT: .amdhsa_kernarg_size 0
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@ -83,6 +85,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
; GFX9-NEXT: .amdhsa_next_free_sgpr 8
; GFX9-NEXT: .amdhsa_reserve_vcc 0
; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
@ -121,6 +124,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
; VI-NEXT: .amdhsa_kernel stackrealign_attr
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 8
; VI-NEXT: .amdhsa_kernarg_size 0
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@ -168,6 +172,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
; GFX9-NEXT: .amdhsa_kernel stackrealign_attr
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8
; GFX9-NEXT: .amdhsa_kernarg_size 0
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@ -184,6 +189,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
; GFX9-NEXT: .amdhsa_next_free_sgpr 8
; GFX9-NEXT: .amdhsa_reserve_vcc 0
; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3
@ -222,6 +228,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
; VI-NEXT: .amdhsa_kernel alignstack_attr
; VI-NEXT: .amdhsa_group_segment_fixed_size 0
; VI-NEXT: .amdhsa_private_segment_fixed_size 128
; VI-NEXT: .amdhsa_kernarg_size 0
; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@ -269,6 +276,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
; GFX9-NEXT: .amdhsa_kernel alignstack_attr
; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128
; GFX9-NEXT: .amdhsa_kernarg_size 0
; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@ -285,6 +293,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
; GFX9-NEXT: .amdhsa_next_free_vgpr 1
; GFX9-NEXT: .amdhsa_next_free_sgpr 8
; GFX9-NEXT: .amdhsa_reserve_vcc 0
; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1
; GFX9-NEXT: .amdhsa_float_round_mode_32 0
; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3

View File

@ -0,0 +1,30 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x12C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]
define void @func0() {
entry:
ret void
}
define void @func1() {
entry:
ret void
}
define void @func2() {
entry:
ret void
}

View File

@ -0,0 +1,29 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22)
; ELF-NEXT: ]
define void @func0() {
entry:
ret void
}
define void @func1() {
entry:
ret void
}
define void @func2() {
entry:
ret void
}

View File

@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]
define void @func0() #0 {
entry:
ret void
}
define void @func1() #0 {
entry:
ret void
}
define void @func2() #0 {
entry:
ret void
}
attributes #0 = { "target-features"="-xnack" }

View File

@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]
define void @func0() #0 {
entry:
ret void
}
define void @func1() #0 {
entry:
ret void
}
define void @func2() #0 {
entry:
ret void
}
attributes #0 = { "target-features"="+xnack" }

View File

@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]
define void @func0() {
entry:
ret void
}
define void @func1() #0 {
entry:
ret void
}
define void @func2() {
entry:
ret void
}
attributes #0 = { "target-features"="-xnack" }

View File

@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]
define void @func0() #0 {
entry:
ret void
}
define void @func1() {
entry:
ret void
}
define void @func2() {
entry:
ret void
}
attributes #0 = { "target-features"="-xnack" }

View File

@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]
define void @func0() {
entry:
ret void
}
define void @func1() #0 {
entry:
ret void
}
define void @func2() {
entry:
ret void
}
attributes #0 = { "target-features"="+xnack" }

View File

@ -0,0 +1,32 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]
define void @func0() #0 {
entry:
ret void
}
define void @func1() {
entry:
ret void
}
define void @func2() {
entry:
ret void
}
attributes #0 = { "target-features"="+xnack" }

View File

@ -0,0 +1,21 @@
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s 2>&1 | FileCheck --check-prefixes=ERR %s
; ERR: error: xnack setting of 'func2' function does not match module xnack setting
define void @func0() {
entry:
ret void
}
define void @func1() #0 {
entry:
ret void
}
define void @func2() #1 {
entry:
ret void
}
attributes #0 = { "target-features"="-xnack" }
attributes #1 = { "target-features"="+xnack" }

View File

@ -0,0 +1,20 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x12C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]
define void @func0() {
entry:
ret void
}

View File

@ -0,0 +1,19 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22)
; ELF-NEXT: ]
define void @func0() {
entry:
ret void
}

View File

@ -0,0 +1,22 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]
define void @func0() #0 {
entry:
ret void
}
attributes #0 = { "target-features"="-xnack" }

View File

@ -0,0 +1,22 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
; ASM: amdhsa.version:
; ASM: - 1
; ASM: - 1
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF: ABIVersion: 2
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
; ELF-NEXT: ]
define void @func0() #0 {
entry:
ret void
}
attributes #0 = { "target-features"="+xnack" }

File diff suppressed because it is too large Load Diff

View File

@ -1,16 +1,16 @@
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,NONGFX10,AMDHSA
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10,AMDHSA
// RUN: not llvm-mc -triple amdgcn-amd- -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA
// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,NONGFX10,AMDHSA
// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10,AMDHSA
// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA
// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,NONGFX10,AMDHSA,ALL
.text
// GCN-LABEL: warning: test_target
// GFX8-NOT: error:
// GFX10: error: target must match options
// NONAMDHSA: error: unknown directive
// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1010+xnack
// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-unknown--gfx810
.warning "test_target"
.amdgcn_target "amdgcn-amd-amdhsa--gfx803+xnack"
.amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack"
// GCN-LABEL: warning: test_amdhsa_kernel_no_name
// GCN: error: unknown directive

View File

@ -1,5 +1,5 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
@ -28,7 +28,7 @@
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
@ -80,6 +80,7 @@ special_sgpr:
.amdhsa_kernel complete
.amdhsa_group_segment_fixed_size 1
.amdhsa_private_segment_fixed_size 1
.amdhsa_kernarg_size 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 1
@ -98,7 +99,7 @@ special_sgpr:
.amdhsa_next_free_sgpr 27
.amdhsa_reserve_vcc 0
.amdhsa_reserve_flat_scratch 0
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_xnack_mask 1
.amdhsa_float_round_mode_32 1
.amdhsa_float_round_mode_16_64 1
.amdhsa_float_denorm_mode_32 1
@ -121,6 +122,7 @@ special_sgpr:
// ASM: .amdhsa_kernel complete
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
// ASM-NEXT: .amdhsa_kernarg_size 8
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
@ -139,7 +141,7 @@ special_sgpr:
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
@ -169,7 +171,7 @@ special_sgpr:
.amdhsa_reserve_flat_scratch 1
.amdhsa_reserve_vcc 0
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_xnack_mask 1
.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
@ -181,7 +183,7 @@ special_sgpr:
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0

View File

@ -1,5 +1,5 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
@ -31,7 +31,7 @@
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
@ -93,6 +93,7 @@ disabled_user_sgpr:
.amdhsa_kernel complete
.amdhsa_group_segment_fixed_size 1
.amdhsa_private_segment_fixed_size 1
.amdhsa_kernarg_size 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 1
@ -110,7 +111,7 @@ disabled_user_sgpr:
.amdhsa_next_free_sgpr 27
.amdhsa_reserve_vcc 0
.amdhsa_reserve_flat_scratch 0
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_xnack_mask 1
.amdhsa_float_round_mode_32 1
.amdhsa_float_round_mode_16_64 1
.amdhsa_float_denorm_mode_32 1
@ -130,6 +131,7 @@ disabled_user_sgpr:
// ASM: .amdhsa_kernel complete
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
// ASM-NEXT: .amdhsa_kernarg_size 8
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
@ -147,7 +149,7 @@ disabled_user_sgpr:
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
@ -174,7 +176,7 @@ disabled_user_sgpr:
.amdhsa_reserve_flat_scratch 1
.amdhsa_reserve_vcc 0
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_xnack_mask 1
.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
@ -186,7 +188,7 @@ disabled_user_sgpr:
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0

303
test/MC/AMDGPU/hsa-v4.s Normal file
View File

@ -0,0 +1,303 @@
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=4 -mattr=+xnack -filetype=obj < %s > %t
// RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// READOBJ: Section Headers
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
// READOBJ: Relocation section '.rela.rodata' at offset
// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310
// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd
// OBJDUMP: Contents of section .rodata
// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
// minimal
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
// special_sgpr
// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000
// disabled_user_sgpr
// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000
.text
// ASM: .text
.amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+"
.p2align 8
.type minimal,@function
minimal:
s_endpgm
.p2align 8
.type complete,@function
complete:
s_endpgm
.p2align 8
.type special_sgpr,@function
special_sgpr:
s_endpgm
.p2align 8
.type disabled_user_sgpr,@function
disabled_user_sgpr:
s_endpgm
.rodata
// ASM: .rodata
// Test that only specifying required directives is allowed, and that defaulted
// values are omitted.
.p2align 6
.amdhsa_kernel minimal
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.end_amdhsa_kernel
// ASM: .amdhsa_kernel minimal
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 0
// ASM: .end_amdhsa_kernel
// Test that we can specify all available directives with non-default values.
.p2align 6
.amdhsa_kernel complete
.amdhsa_group_segment_fixed_size 1
.amdhsa_private_segment_fixed_size 1
.amdhsa_kernarg_size 8
.amdhsa_user_sgpr_private_segment_buffer 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 1
.amdhsa_user_sgpr_kernarg_segment_ptr 1
.amdhsa_user_sgpr_dispatch_id 1
.amdhsa_user_sgpr_flat_scratch_init 1
.amdhsa_user_sgpr_private_segment_size 1
.amdhsa_system_sgpr_private_segment_wavefront_offset 1
.amdhsa_system_sgpr_workgroup_id_x 0
.amdhsa_system_sgpr_workgroup_id_y 1
.amdhsa_system_sgpr_workgroup_id_z 1
.amdhsa_system_sgpr_workgroup_info 1
.amdhsa_system_vgpr_workitem_id 1
.amdhsa_next_free_vgpr 9
.amdhsa_next_free_sgpr 27
.amdhsa_reserve_vcc 0
.amdhsa_reserve_flat_scratch 0
.amdhsa_reserve_xnack_mask 1
.amdhsa_float_round_mode_32 1
.amdhsa_float_round_mode_16_64 1
.amdhsa_float_denorm_mode_32 1
.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
.amdhsa_ieee_mode 0
.amdhsa_fp16_overflow 1
.amdhsa_exception_fp_ieee_invalid_op 1
.amdhsa_exception_fp_denorm_src 1
.amdhsa_exception_fp_ieee_div_zero 1
.amdhsa_exception_fp_ieee_overflow 1
.amdhsa_exception_fp_ieee_underflow 1
.amdhsa_exception_fp_ieee_inexact 1
.amdhsa_exception_int_div_zero 1
.end_amdhsa_kernel
// ASM: .amdhsa_kernel complete
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
// ASM-NEXT: .amdhsa_kernarg_size 8
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
// ASM-NEXT: .amdhsa_next_free_vgpr 9
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0
// ASM-NEXT: .amdhsa_fp16_overflow 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
// ASM-NEXT: .amdhsa_exception_int_div_zero 1
// ASM-NEXT: .end_amdhsa_kernel
// Test that we are including special SGPR usage in the granulated count.
.p2align 6
.amdhsa_kernel special_sgpr
// Same next_free_sgpr as "complete", but...
.amdhsa_next_free_sgpr 27
// ...on GFX9 this should require an additional 6 SGPRs, pushing us from
// 3 granules to 4
.amdhsa_reserve_flat_scratch 1
.amdhsa_reserve_vcc 0
.amdhsa_reserve_xnack_mask 1
.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
.amdhsa_ieee_mode 0
.amdhsa_next_free_vgpr 0
.end_amdhsa_kernel
// ASM: .amdhsa_kernel special_sgpr
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0
// ASM: .end_amdhsa_kernel
// Test that explicitly disabling user_sgpr's does not affect the user_sgpr
// count, i.e. this should produce the same descriptor as minimal.
.p2align 6
.amdhsa_kernel disabled_user_sgpr
.amdhsa_user_sgpr_private_segment_buffer 0
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.end_amdhsa_kernel
// ASM: .amdhsa_kernel disabled_user_sgpr
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 0
// ASM: .end_amdhsa_kernel
.section .foo
.byte .amdgcn.gfx_generation_number
// ASM: .byte 9
.byte .amdgcn.gfx_generation_minor
// ASM: .byte 0
.byte .amdgcn.gfx_generation_stepping
// ASM: .byte 4
.byte .amdgcn.next_free_vgpr
// ASM: .byte 0
.byte .amdgcn.next_free_sgpr
// ASM: .byte 0
v_mov_b32_e32 v7, s10
.byte .amdgcn.next_free_vgpr
// ASM: .byte 8
.byte .amdgcn.next_free_sgpr
// ASM: .byte 11
.set .amdgcn.next_free_vgpr, 0
.set .amdgcn.next_free_sgpr, 0
.byte .amdgcn.next_free_vgpr
// ASM: .byte 0
.byte .amdgcn.next_free_sgpr
// ASM: .byte 0
v_mov_b32_e32 v16, s3
.byte .amdgcn.next_free_vgpr
// ASM: .byte 17
.byte .amdgcn.next_free_sgpr
// ASM: .byte 4
// Metadata
.amdgpu_metadata
amdhsa.version:
- 3
- 0
amdhsa.kernels:
- .name: amd_kernel_code_t_test_all
.symbol: amd_kernel_code_t_test_all@kd
.kernarg_segment_size: 8
.group_segment_fixed_size: 16
.private_segment_fixed_size: 32
.kernarg_segment_align: 64
.wavefront_size: 128
.sgpr_count: 14
.vgpr_count: 40
.max_flat_workgroup_size: 256
- .name: amd_kernel_code_t_minimal
.symbol: amd_kernel_code_t_minimal@kd
.kernarg_segment_size: 8
.group_segment_fixed_size: 16
.private_segment_fixed_size: 32
.kernarg_segment_align: 64
.wavefront_size: 128
.sgpr_count: 14
.vgpr_count: 40
.max_flat_workgroup_size: 256
.end_amdgpu_metadata
// ASM: .amdgpu_metadata
// ASM: amdhsa.kernels:
// ASM: - .group_segment_fixed_size: 16
// ASM: .kernarg_segment_align: 64
// ASM: .kernarg_segment_size: 8
// ASM: .max_flat_workgroup_size: 256
// ASM: .name: amd_kernel_code_t_test_all
// ASM: .private_segment_fixed_size: 32
// ASM: .sgpr_count: 14
// ASM: .symbol: 'amd_kernel_code_t_test_all@kd'
// ASM: .vgpr_count: 40
// ASM: .wavefront_size: 128
// ASM: - .group_segment_fixed_size: 16
// ASM: .kernarg_segment_align: 64
// ASM: .kernarg_segment_size: 8
// ASM: .max_flat_workgroup_size: 256
// ASM: .name: amd_kernel_code_t_minimal
// ASM: .private_segment_fixed_size: 32
// ASM: .sgpr_count: 14
// ASM: .symbol: 'amd_kernel_code_t_minimal@kd'
// ASM: .vgpr_count: 40
// ASM: .wavefront_size: 128
// ASM: amdhsa.version:
// ASM-NEXT: - 3
// ASM-NEXT: - 0
// ASM: .end_amdgpu_metadata

View File

@ -1,8 +1,6 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx801 -mattr=-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts,-xnack -show-encoding %s | FileCheck --check-prefix=GFX9 %s
.hsa_code_object_isa
// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
// GFX10: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"

View File

@ -7,7 +7,7 @@
// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s
// OSABI-HSA: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802"
// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-UNK-ERR: error: target id must match options
// OSABI-HSA-ERR: error: target id must match options
// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802"

View File

@ -7,7 +7,7 @@
// RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s
// OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802"
// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-UNK-ERR: error: target id must match options
// OSABI-HSA-ERR: error: target id must match options
// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802"

View File

@ -7,7 +7,7 @@
// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s
// OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802"
// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-HSA-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive does not match triple and/or mcpu arguments specified through the command line
// OSABI-UNK-ERR: error: target id must match options
// OSABI-HSA-ERR: error: target id must match options
// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-unknown--gfx802"

View File

@ -1,5 +1,5 @@
# RUN: llvm-mc -preserve-comments -triple amdgcn-amd-amdhsa %s >%t-1.s
# RUN: llvm-mc -preserve-comments -triple amdgcn-amd-amdhsa %t-1.s >%t-2.s
# RUN: llvm-mc -preserve-comments -triple amdgcn-amd- %s >%t-1.s
# RUN: llvm-mc -preserve-comments -triple amdgcn-amd- %t-1.s >%t-2.s
# RUN: diff %t-1.s %t-2.s
# Test that AMDGPU assembly round-trips when run through MC; the first

View File

@ -9,23 +9,23 @@
# RUN: obj2yaml %t.o.3 | FileCheck --check-prefixes=YAML-SRAM-ECC-XNACK-GFX900 %s
# ELF-SRAM-ECC-NONE: Flags [
# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
# ELF-SRAM-ECC-NONE-NEXT: ]
# ELF-SRAM-ECC-GFX900: Flags [
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-GFX900-NEXT: ]
# ELF-SRAM-ECC-XNACK-GFX900: Flags [
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_XNACK (0x100)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: ]
# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_SRAM_ECC ]
# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_XNACK_V3, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
# Doc1
--- !ELF
@ -35,7 +35,7 @@ FileHeader:
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_SRAM_ECC ]
Flags: [ EF_AMDGPU_FEATURE_SRAMECC_V3 ]
...
# Doc2
@ -46,7 +46,7 @@ FileHeader:
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
...
# Doc3
@ -57,5 +57,5 @@ FileHeader:
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_FEATURE_XNACK_V3, EF_AMDGPU_FEATURE_SRAMECC_V3 ]
...

View File

@ -6,13 +6,13 @@
# RUN: obj2yaml %t.o.2 | FileCheck --check-prefixes=YAML-XNACK-GFX801 %s
# ELF-ALL: Flags [
# ELF-XNACK-NONE: EF_AMDGPU_XNACK (0x100)
# ELF-XNACK-NONE: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
# ELF-XNACK-GFX801: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
# ELF-XNACK-GFX801: EF_AMDGPU_MACH_AMDGCN_GFX801 (0x28)
# ELF-XNACK-GFX801: EF_AMDGPU_XNACK (0x100)
# ELF-ALL: ]
# YAML-XNACK-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_XNACK ]
# YAML-XNACK-GFX801: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_XNACK ]
# YAML-XNACK-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_FEATURE_XNACK_V3 ]
# YAML-XNACK-GFX801: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_FEATURE_XNACK_V3 ]
# Doc1
--- !ELF
@ -22,7 +22,7 @@ FileHeader:
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_XNACK ]
Flags: [ EF_AMDGPU_FEATURE_XNACK_V3 ]
...
# Doc2
@ -33,5 +33,5 @@ FileHeader:
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_XNACK ]
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX801, EF_AMDGPU_FEATURE_XNACK_V3 ]
...

View File

@ -19,11 +19,12 @@
my_kernel.kd:
.long 0x00000000 ;; group_segment_fixed_size
.long 0x00000000 ;; private_segment_fixed_size
.quad 0x00FF000000000000 ;; reserved bytes.
.long 0x00000000 ;; kernarg_segment_size.
.long 0x00000000 ;; reserved bytes.
.quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works.
;; 20 reserved bytes.
.quad 0x0000000000000000
.quad 0x00FF000000000000 ;; reserved bytes.
.quad 0x0000000000000000
.long 0x00000000

View File

@ -2,19 +2,19 @@
; RUN: split-file %s %t.dir
; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
; RUN: diff %t1 %t1-re-assemble
; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
; RUN: diff %t2 %t2-re-assemble
; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3
; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
; RUN: diff %t3 %t3-re-assemble
@ -34,7 +34,7 @@
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.amdhsa_reserve_flat_scratch 1
.amdhsa_reserve_xnack_mask 1
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_vcc 1
.end_amdhsa_kernel
@ -44,6 +44,6 @@
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 35
.amdhsa_reserve_flat_scratch 1
.amdhsa_reserve_xnack_mask 1
.amdhsa_reserve_xnack_mask 0
.amdhsa_reserve_vcc 1
.end_amdhsa_kernel

View File

@ -2,19 +2,19 @@
; RUN: split-file %s %t.dir
; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
; RUN: llvm-mc %t.dir/1.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1-re-assemble
; RUN: diff %t1 %t1-re-assemble
; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
; RUN: llvm-mc %t.dir/2.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2-re-assemble
; RUN: diff %t2 %t2-re-assemble
; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3
; RUN: llvm-mc %t.dir/3.s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3
; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble
; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t3-re-assemble
; RUN: diff %t3 %t3-re-assemble
;--- 1.s

View File

@ -1,6 +1,6 @@
;; Entirely zeroed kernel descriptor (for GFX10).
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack -filetype=obj -o %t
; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s
;; TODO:

View File

@ -1,8 +1,8 @@
;; Entirely zeroed kernel descriptor (for GFX9).
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \
; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: diff %t1 %t2
; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s

View File

@ -1,6 +1,6 @@
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1
; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t1
; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \
; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2
; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack -filetype=obj -o %t2
; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s
;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details).

View File

@ -1,127 +1,338 @@
# RUN: yaml2obj %s -o %t -DCPU=GFX600
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX600 -DFLAGS=0x20
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 -DFLAG_VALUE=0x20
# RUN: yaml2obj %s -o %t -DCPU=GFX601
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX601 -DFLAGS=0x21
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 -DFLAG_VALUE=0x20
# RUN: yaml2obj %s -o %t -DCPU=GFX602
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX602 -DFLAGS=0x3A
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX600 -DFLAG_VALUE=0x20
# RUN: yaml2obj %s -o %t -DCPU=GFX700
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX700 -DFLAGS=0x22
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 -DFLAG_VALUE=0x21
# RUN: yaml2obj %s -o %t -DCPU=GFX701
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX701 -DFLAGS=0x23
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 -DFLAG_VALUE=0x21
# RUN: yaml2obj %s -o %t -DCPU=GFX702
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX702 -DFLAGS=0x24
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX601 -DFLAG_VALUE=0x21
# RUN: yaml2obj %s -o %t -DCPU=GFX703
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX703 -DFLAGS=0x25
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 -DFLAG_VALUE=0x3A
# RUN: yaml2obj %s -o %t -DCPU=GFX704
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX704 -DFLAGS=0x26
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 -DFLAG_VALUE=0x3A
# RUN: yaml2obj %s -o %t -DCPU=GFX705
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX705 -DFLAGS=0x3B
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX602 -DFLAG_VALUE=0x3A
# RUN: yaml2obj %s -o %t -DCPU=GFX801
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX801 -DFLAGS=0x28
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 -DFLAG_VALUE=0x22
# RUN: yaml2obj %s -o %t -DCPU=GFX802
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX802 -DFLAGS=0x29
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 -DFLAG_VALUE=0x22
# RUN: yaml2obj %s -o %t -DCPU=GFX803
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX803 -DFLAGS=0x2A
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX700 -DFLAG_VALUE=0x22
# RUN: yaml2obj %s -o %t -DCPU=GFX805
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX805 -DFLAGS=0x3C
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 -DFLAG_VALUE=0x23
# RUN: yaml2obj %s -o %t -DCPU=GFX810
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX810 -DFLAGS=0x2B
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 -DFLAG_VALUE=0x23
# RUN: yaml2obj %s -o %t -DCPU=GFX900
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX900 -DFLAGS=0x2C
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX701 -DFLAG_VALUE=0x23
# RUN: yaml2obj %s -o %t -DCPU=GFX902
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX902 -DFLAGS=0x2D
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 -DFLAG_VALUE=0x24
# RUN: yaml2obj %s -o %t -DCPU=GFX904
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX904 -DFLAGS=0x2E
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 -DFLAG_VALUE=0x24
# RUN: yaml2obj %s -o %t -DCPU=GFX906
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX906 -DFLAGS=0x2F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX702 -DFLAG_VALUE=0x24
# RUN: yaml2obj %s -o %t -DCPU=GFX908
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX908 -DFLAGS=0x30
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 -DFLAG_VALUE=0x25
# RUN: yaml2obj %s -o %t -DCPU=GFX909
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX909 -DFLAGS=0x31
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 -DFLAG_VALUE=0x25
# RUN: yaml2obj %s -o %t -DCPU=GFX90A
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX90A -DFLAGS=0x3F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX703 -DFLAG_VALUE=0x25
# RUN: yaml2obj %s -o %t -DCPU=GFX90C
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX90C -DFLAGS=0x32
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 -DFLAG_VALUE=0x26
# RUN: yaml2obj %s -o %t -DCPU=GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1010 -DFLAGS=0x33
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 -DFLAG_VALUE=0x26
# RUN: yaml2obj %s -o %t -DCPU=GFX1011
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1011 -DFLAGS=0x34
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX704 -DFLAG_VALUE=0x26
# RUN: yaml2obj %s -o %t -DCPU=GFX1012
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1012 -DFLAGS=0x35
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 -DFLAG_VALUE=0x3B
# RUN: yaml2obj %s -o %t -DCPU=GFX1030
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1030 -DFLAGS=0x36
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 -DFLAG_VALUE=0x3B
# RUN: yaml2obj %s -o %t -DCPU=GFX1031
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1031 -DFLAGS=0x37
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX705 -DFLAG_VALUE=0x3B
# RUN: yaml2obj %s -o %t -DCPU=GFX1032
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1032 -DFLAGS=0x38
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 -DFLAG_VALUE=0x28
# RUN: yaml2obj %s -o %t -DCPU=GFX1033
# RUN: llvm-readobj -h %t | FileCheck %s --match-full-lines -DFILE=%t -DCPU=GFX1033 -DFLAGS=0x39
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 -DFLAG_VALUE=0x28
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX801 -DFLAG_VALUE=0x28
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 -DFLAG_VALUE=0x29
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 -DFLAG_VALUE=0x29
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX802 -DFLAG_VALUE=0x29
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 -DFLAG_VALUE=0x2A
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 -DFLAG_VALUE=0x2A
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX803 -DFLAG_VALUE=0x2A
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 -DFLAG_VALUE=0x3C
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 -DFLAG_VALUE=0x3C
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX805 -DFLAG_VALUE=0x3C
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 -DFLAG_VALUE=0x2B
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 -DFLAG_VALUE=0x2B
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX810 -DFLAG_VALUE=0x2B
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 -DFLAG_VALUE=0x2C
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 -DFLAG_VALUE=0x2C
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX900 -DFLAG_VALUE=0x2C
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 -DFLAG_VALUE=0x2D
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 -DFLAG_VALUE=0x2D
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX902 -DFLAG_VALUE=0x2D
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 -DFLAG_VALUE=0x2E
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 -DFLAG_VALUE=0x2E
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX904 -DFLAG_VALUE=0x2E
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 -DFLAG_VALUE=0x2F
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 -DFLAG_VALUE=0x2F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX906 -DFLAG_VALUE=0x2F
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 -DFLAG_VALUE=0x30
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 -DFLAG_VALUE=0x30
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX908 -DFLAG_VALUE=0x30
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 -DFLAG_VALUE=0x31
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 -DFLAG_VALUE=0x31
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX909 -DFLAG_VALUE=0x31
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A -DFLAG_VALUE=0x3F
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A -DFLAG_VALUE=0x3F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A -DFLAG_VALUE=0x3F
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 -DFLAG_VALUE=0x34
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 -DFLAG_VALUE=0x34
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1011 -DFLAG_VALUE=0x34
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 -DFLAG_VALUE=0x37
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 -DFLAG_VALUE=0x37
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1031 -DFLAG_VALUE=0x37
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 -DFLAG_VALUE=0x38
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 -DFLAG_VALUE=0x38
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1032 -DFLAG_VALUE=0x38
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 -DFLAG_VALUE=0x39
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 -DFLAG_VALUE=0x39
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1033 -DFLAG_VALUE=0x39
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_V3"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x23F
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_V3"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x23F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_ANY_V4"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_OFF_V4"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x23F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_ON_V4"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x33F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_ANY_V4"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 (0x400)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x43F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_OFF_V4"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 (0x800)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x83F
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_SRAMECC_ON_V4"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_SRAMECC_ON_V4 (0xC00)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0xC3F
# RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_AMDGPU_HSA
ABIVersion: [[ABI_VERSION]]
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_[[CPU]] ]
Flags: [ [[FLAG_NAME]] ]
# CHECK: File: [[FILE]]
# CHECK-NEXT: Format: elf64-amdgpu
# CHECK-NEXT: Arch: amdgcn
# CHECK-NEXT: AddressSize: 64bit
# CHECK-NEXT: LoadName: <Not found>
# CHECK-NEXT: ElfHeader {
# CHECK-NEXT: Ident {
# CHECK-NEXT: Magic: (7F 45 4C 46)
# CHECK-NEXT: Class: 64-bit (0x2)
# CHECK-NEXT: DataEncoding: LittleEndian (0x1)
# CHECK-NEXT: FileVersion: 1
# CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40)
# CHECK-NEXT: ABIVersion: 0
# CHECK-NEXT: Unused: (00 00 00 00 00 00 00)
# CHECK-NEXT: }
# CHECK-NEXT: Type: Relocatable (0x1)
# CHECK-NEXT: Machine: EM_AMDGPU (0xE0)
# CHECK-NEXT: Version: 1
# CHECK-NEXT: Entry: 0x0
# CHECK-NEXT: ProgramHeaderOffset: 0x0
# CHECK-NEXT: SectionHeaderOffset: 0x58
# CHECK-NEXT: Flags [ ([[FLAGS]])
# CHECK-NEXT: EF_AMDGPU_MACH_AMDGCN_[[CPU]] ([[FLAGS]])
# CHECK-NEXT: ]
# CHECK-NEXT: HeaderSize: 64
# CHECK-NEXT: ProgramHeaderEntrySize: 0
# CHECK-NEXT: ProgramHeaderCount: 0
# CHECK-NEXT: SectionHeaderEntrySize: 64
# CHECK-NEXT: SectionHeaderCount: 3
# CHECK-NEXT: StringTableSectionIndex: 2
# CHECK-NEXT: }
# ALL: File: [[FILE]]
# ALL-NEXT: Format: elf64-amdgpu
# ALL-NEXT: Arch: amdgcn
# ALL-NEXT: AddressSize: 64bit
# ALL-NEXT: LoadName: <Not found>
# ALL-NEXT: ElfHeader {
# ALL-NEXT: Ident {
# ALL-NEXT: Magic: (7F 45 4C 46)
# ALL-NEXT: Class: 64-bit (0x2)
# ALL-NEXT: DataEncoding: LittleEndian (0x1)
# ALL-NEXT: FileVersion: 1
# ALL-NEXT: OS/ABI: AMDGPU_HSA (0x40)
# ALL-NEXT: ABIVersion: [[ABI_VERSION]]
# ALL-NEXT: Unused: (00 00 00 00 00 00 00)
# ALL-NEXT: }
# ALL-NEXT: Type: Relocatable (0x1)
# ALL-NEXT: Machine: EM_AMDGPU (0xE0)
# ALL-NEXT: Version: 1
# ALL-NEXT: Entry: 0x0
# ALL-NEXT: ProgramHeaderOffset: 0x0
# ALL-NEXT: SectionHeaderOffset: 0x58
# KNOWN-ABI-VERSION-NEXT: Flags [ ([[FLAG_VALUE]])
# SINGLE-FLAG-NEXT: [[FLAG_NAME]] ([[FLAG_VALUE]])
# DOUBLE-FLAG-NEXT: [[FLAG_0]]
# DOUBLE-FLAG-NEXT: [[FLAG_1]]
# KNOWN-ABI-VERSION-NEXT: ]
# UNKNOWN-ABI-VERSION-NEXT: Flags: [[FLAG_VALUE]]
# ALL-NEXT: HeaderSize: 64
# ALL-NEXT: ProgramHeaderEntrySize: 0
# ALL-NEXT: ProgramHeaderCount: 0
# ALL-NEXT: SectionHeaderEntrySize: 64
# ALL-NEXT: SectionHeaderCount: 3
# ALL-NEXT: StringTableSectionIndex: 2
# ALL-NEXT: }

View File

@ -6,25 +6,27 @@
// GNU: Displaying notes found in: .note.no.desc
// GNU-NEXT: Owner Data size Description
// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
// GNU-NEXT: HSA Metadata:
// GNU-NEXT: AMD 0x00000000 NT_AMD_HSA_METADATA (AMD HSA Metadata)
// GNU-NEXT: AMD HSA Metadata:
// GNU-NEXT: {{^ $}}
// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_ISA (ISA Version)
// GNU-NEXT: ISA Version:
// GNU-NEXT: AMD 0x00000000 NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
// GNU-NEXT: AMD HSA ISA Name:
// GNU-NEXT: {{^ $}}
// GNU-EMPTY:
// GNU-NEXT: Displaying notes found in: .note.desc
// GNU-NEXT: Owner Data size Description
// GNU-NEXT: AMD 0x0000000a NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
// GNU-NEXT: HSA Metadata:
// GNU-NEXT: AMD 0x0000000a NT_AMD_HSA_METADATA (AMD HSA Metadata)
// GNU-NEXT: AMD HSA Metadata:
// GNU-NEXT: meta_blah
// GNU-NEXT: AMD 0x00000009 NT_AMD_AMDGPU_ISA (ISA Version)
// GNU-NEXT: ISA Version:
// GNU-NEXT: AMD 0x00000009 NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
// GNU-NEXT: AMD HSA ISA Name:
// GNU-NEXT: isa_blah
// GNU-EMPTY:
// GNU-NEXT: Displaying notes found in: .note.other
// GNU-NEXT: Owner Data size Description
// GNU-NEXT: AMD 0x00000000 NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)
// GNU-NEXT: AMD 0x00000000 NT_AMD_PAL_METADATA (AMD PAL Metadata)
// GNU-NEXT: AMD PAL Metadata:
// GNU-NEXT: {{^ $}}
// GNU-EMPTY:
// GNU-NEXT: Displaying notes found in: .note.unknown
// GNU-NEXT: Owner Data size Description
@ -40,14 +42,14 @@
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x0
// LLVM-NEXT: Type: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
// LLVM-NEXT: HSA Metadata:
// LLVM-NEXT: Type: NT_AMD_HSA_METADATA (AMD HSA Metadata)
// LLVM-NEXT: AMD HSA Metadata:
// LLVM-NEXT: }
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x0
// LLVM-NEXT: Type: NT_AMD_AMDGPU_ISA (ISA Version)
// LLVM-NEXT: ISA Version:
// LLVM-NEXT: Type: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
// LLVM-NEXT: AMD HSA ISA Name:
// LLVM-NEXT: }
// LLVM-NEXT: }
// LLVM-NEXT: NoteSection {
@ -57,14 +59,14 @@
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0xA
// LLVM-NEXT: Type: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
// LLVM-NEXT: HSA Metadata: meta_blah
// LLVM-NEXT: Type: NT_AMD_HSA_METADATA (AMD HSA Metadata)
// LLVM-NEXT: AMD HSA Metadata: meta_blah
// LLVM-NEXT: }
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x9
// LLVM-NEXT: Type: NT_AMD_AMDGPU_ISA (ISA Version)
// LLVM-NEXT: ISA Version: isa_blah
// LLVM-NEXT: Type: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)
// LLVM-NEXT: AMD HSA ISA Name: isa_blah
// LLVM-NEXT: }
// LLVM-NEXT: }
// LLVM-NEXT: NoteSection {
@ -74,7 +76,8 @@
// LLVM-NEXT: Note {
// LLVM-NEXT: Owner: AMD
// LLVM-NEXT: Data size: 0x0
// LLVM-NEXT: Type: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)
// LLVM-NEXT: Type: NT_AMD_PAL_METADATA (AMD PAL Metadata)
// LLVM-NEXT: AMD PAL Metadata:
// LLVM-NEXT: }
// LLVM-NEXT: }
// LLVM-NEXT: NoteSection {
@ -96,17 +99,17 @@
.align 4
.long 4 /* namesz */
.long 0 /* descsz */
.long 10 /* type = NT_AMD_AMDGPU_HSA_METADATA */
.long 10 /* type = NT_AMD_HSA_METADATA */
.asciz "AMD"
.long 4 /* namesz */
.long 0 /* descsz */
.long 11 /* type = NT_AMD_AMDGPU_ISA */
.long 11 /* type = NT_AMD_HSA_ISA_NAME */
.asciz "AMD"
.section ".note.desc", "a"
.align 4
.long 4 /* namesz */
.long end.meta - begin.meta /* descsz */
.long 10 /* type = NT_AMD_AMDGPU_HSA_METADATA */
.long 10 /* type = NT_AMD_HSA_METADATA */
.asciz "AMD"
begin.meta:
.asciz "meta_blah"
@ -114,7 +117,7 @@ end.meta:
.align 4
.long 4 /* namesz */
.long end.isa - begin.isa /* descsz */
.long 11 /* type = NT_AMD_AMDGPU_ISA */
.long 11 /* type = NT_AMD_HSA_ISA_NAME */
.asciz "AMD"
begin.isa:
.asciz "isa_blah"
@ -124,7 +127,7 @@ end.isa:
.align 4
.long 4 /* namesz */
.long 0 /* descsz */
.long 12 /* type = NT_AMD_AMDGPU_PAL_METADATA */
.long 12 /* type = NT_AMD_PAL_METADATA */
.asciz "AMD"
.section ".note.unknown", "a"
.align 4

View File

@ -1430,7 +1430,7 @@ static const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
};
static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
@ -1477,8 +1477,63 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
};
static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RS880),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV670),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV710),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV730),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV770),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CEDAR),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CYPRESS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_JUNIPER),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_REDWOOD),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_SUMO),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_BARTS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAICOS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAYMAN),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_TURKS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX601),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX602),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX700),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX701),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX702),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX703),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX704),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX705),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX801),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX802),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX803),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX805),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX810),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX900),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX908),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ANY_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_OFF_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
};
static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
@ -4944,15 +4999,95 @@ static AMDNote getAMDNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
switch (NoteType) {
default:
return {"", ""};
case ELF::NT_AMD_AMDGPU_HSA_METADATA:
case ELF::NT_AMD_HSA_CODE_OBJECT_VERSION: {
struct CodeObjectVersion {
uint32_t MajorVersion;
uint32_t MinorVersion;
};
if (Desc.size() != sizeof(CodeObjectVersion))
return {"AMD HSA Code Object Version",
"Invalid AMD HSA Code Object Version"};
std::string VersionString;
raw_string_ostream StrOS(VersionString);
auto Version = reinterpret_cast<const CodeObjectVersion *>(Desc.data());
StrOS << "[Major: " << Version->MajorVersion
<< ", Minor: " << Version->MinorVersion << "]";
return {"AMD HSA Code Object Version", VersionString};
}
case ELF::NT_AMD_HSA_HSAIL: {
struct HSAILProperties {
uint32_t HSAILMajorVersion;
uint32_t HSAILMinorVersion;
uint8_t Profile;
uint8_t MachineModel;
uint8_t DefaultFloatRound;
};
if (Desc.size() != sizeof(HSAILProperties))
return {"AMD HSA HSAIL Properties", "Invalid AMD HSA HSAIL Properties"};
auto Properties = reinterpret_cast<const HSAILProperties *>(Desc.data());
std::string HSAILPropetiesString;
raw_string_ostream StrOS(HSAILPropetiesString);
StrOS << "[HSAIL Major: " << Properties->HSAILMajorVersion
<< ", HSAIL Minor: " << Properties->HSAILMinorVersion
<< ", Profile: " << Properties->Profile
<< ", Machine Model: " << Properties->MachineModel
<< ", Default Float Round: " << Properties->DefaultFloatRound << "]";
return {"AMD HSA HSAIL Properties", HSAILPropetiesString};
}
case ELF::NT_AMD_HSA_ISA_VERSION: {
struct IsaVersion {
uint16_t VendorNameSize;
uint16_t ArchitectureNameSize;
uint32_t Major;
uint32_t Minor;
uint32_t Stepping;
};
if (Desc.size() < sizeof(IsaVersion))
return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"};
auto Isa = reinterpret_cast<const IsaVersion *>(Desc.data());
if (Desc.size() < sizeof(IsaVersion) +
Isa->VendorNameSize + Isa->ArchitectureNameSize ||
Isa->VendorNameSize == 0 || Isa->ArchitectureNameSize == 0)
return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"};
std::string IsaString;
raw_string_ostream StrOS(IsaString);
StrOS << "[Vendor: "
<< StringRef((const char*)Desc.data() + sizeof(IsaVersion), Isa->VendorNameSize - 1)
<< ", Architecture: "
<< StringRef((const char*)Desc.data() + sizeof(IsaVersion) + Isa->VendorNameSize,
Isa->ArchitectureNameSize - 1)
<< ", Major: " << Isa->Major << ", Minor: " << Isa->Minor
<< ", Stepping: " << Isa->Stepping << "]";
return {"AMD HSA ISA Version", IsaString};
}
case ELF::NT_AMD_HSA_METADATA: {
if (Desc.size() == 0)
return {"AMD HSA Metadata", ""};
return {
"HSA Metadata",
std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
case ELF::NT_AMD_AMDGPU_ISA:
"AMD HSA Metadata",
std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size() - 1)};
}
case ELF::NT_AMD_HSA_ISA_NAME: {
if (Desc.size() == 0)
return {"AMD HSA ISA Name", ""};
return {
"ISA Version",
"AMD HSA ISA Name",
std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
}
case ELF::NT_AMD_PAL_METADATA: {
struct PALMetadata {
uint32_t Key;
uint32_t Value;
};
auto Isa = reinterpret_cast<const PALMetadata *>(Desc.data());
std::string MetadataString;
raw_string_ostream StrOS(MetadataString);
for (size_t I = 0, E = Desc.size() / sizeof(PALMetadata); I < E; ++E) {
StrOS << "[" << Isa[I].Key << ": " << Isa[I].Value << "]";
}
return {"AMD PAL Metadata", MetadataString};
}
}
}
struct AMDGPUNote {
@ -4973,11 +5108,11 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
return {"", ""};
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
std::string HSAMetadataString;
std::string MetadataString;
if (!Verifier.verify(MsgPackDoc.getRoot()))
HSAMetadataString = "Invalid AMDGPU Metadata\n";
MetadataString = "Invalid AMDGPU Metadata\n";
raw_string_ostream StrOS(HSAMetadataString);
raw_string_ostream StrOS(MetadataString);
if (MsgPackDoc.getRoot().isScalar()) {
// TODO: passing a scalar root to toYAML() asserts:
// (PolymorphicTraits<T>::getKind(Val) != NodeKind::Scalar &&
@ -5106,11 +5241,13 @@ static const NoteType FreeBSDNoteTypes[] = {
};
static const NoteType AMDNoteTypes[] = {
{ELF::NT_AMD_AMDGPU_HSA_METADATA,
"NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"},
{ELF::NT_AMD_AMDGPU_ISA, "NT_AMD_AMDGPU_ISA (ISA Version)"},
{ELF::NT_AMD_AMDGPU_PAL_METADATA,
"NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"},
{ELF::NT_AMD_HSA_CODE_OBJECT_VERSION,
"NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)"},
{ELF::NT_AMD_HSA_HSAIL, "NT_AMD_HSA_HSAIL (AMD HSA HSAIL Properties)"},
{ELF::NT_AMD_HSA_ISA_VERSION, "NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version)"},
{ELF::NT_AMD_HSA_METADATA, "NT_AMD_HSA_METADATA (AMD HSA Metadata)"},
{ELF::NT_AMD_HSA_ISA_NAME, "NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)"},
{ELF::NT_AMD_PAL_METADATA, "NT_AMD_PAL_METADATA (AMD PAL Metadata)"},
};
static const NoteType AMDGPUNoteTypes[] = {
@ -6050,10 +6187,28 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderMipsFlags),
unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
unsigned(ELF::EF_MIPS_MACH));
else if (E.e_machine == EM_AMDGPU)
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAMDGPUFlags),
unsigned(ELF::EF_AMDGPU_MACH));
else if (E.e_machine == EM_RISCV)
else if (E.e_machine == EM_AMDGPU) {
switch (E.e_ident[ELF::EI_ABIVERSION]) {
default:
W.printHex("Flags", E.e_flags);
break;
case 0:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
LLVM_FALLTHROUGH;
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
W.printFlags("Flags", E.e_flags,
makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion3),
unsigned(ELF::EF_AMDGPU_MACH));
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
W.printFlags("Flags", E.e_flags,
makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion4),
unsigned(ELF::EF_AMDGPU_MACH),
unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4));
break;
}
} else if (E.e_machine == EM_RISCV)
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderRISCVFlags));
else
W.printFlags("Flags", E.e_flags);