mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AMDGPU] Emit MessagePack HSA Metadata for v3 code object
Continue to present HSA metadata as YAML in ASM and when output by tools (e.g. llvm-readobj), but encode it in Messagepack in the code object. Differential Revision: https://reviews.llvm.org/D48179 llvm-svn: 348963
This commit is contained in:
parent
50f1817976
commit
2405f803ac
70
include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
Normal file
70
include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
Normal file
@ -0,0 +1,70 @@
|
||||
//===- AMDGPUMetadataVerifier.h - MsgPack Types -----------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This is a verifier for AMDGPU HSA metadata, which can verify both
|
||||
/// well-typed metadata and untyped metadata. When verifying in the non-strict
|
||||
/// mode, untyped metadata is coerced into the correct type if possible.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
|
||||
#define LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
|
||||
|
||||
#include "llvm/BinaryFormat/MsgPackTypes.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
namespace HSAMD {
|
||||
namespace V3 {
|
||||
|
||||
/// Verifier for AMDGPU HSA metadata.
|
||||
///
|
||||
/// Operates in two modes:
|
||||
///
|
||||
/// In strict mode, metadata must already be well-typed.
|
||||
///
|
||||
/// In non-strict mode, metadata is coerced into expected types when possible.
|
||||
class MetadataVerifier {
|
||||
bool Strict;
|
||||
|
||||
bool verifyScalar(msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
|
||||
function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
|
||||
bool verifyInteger(msgpack::Node &Node);
|
||||
bool verifyArray(msgpack::Node &Node,
|
||||
function_ref<bool(msgpack::Node &)> verifyNode,
|
||||
Optional<size_t> Size = None);
|
||||
bool verifyEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
|
||||
function_ref<bool(msgpack::Node &)> verifyNode);
|
||||
bool
|
||||
verifyScalarEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
|
||||
msgpack::ScalarNode::ScalarKind SKind,
|
||||
function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
|
||||
bool verifyIntegerEntry(msgpack::MapNode &MapNode, StringRef Key,
|
||||
bool Required);
|
||||
bool verifyKernelArgs(msgpack::Node &Node);
|
||||
bool verifyKernel(msgpack::Node &Node);
|
||||
|
||||
public:
|
||||
/// Construct a MetadataVerifier, specifying whether it will operate in \p
|
||||
/// Strict mode.
|
||||
MetadataVerifier(bool Strict) : Strict(Strict) {}
|
||||
|
||||
/// Verify given HSA metadata.
|
||||
///
|
||||
/// \returns True when successful, false when metadata is invalid.
|
||||
bool verify(msgpack::Node &HSAMetadataRoot);
|
||||
};
|
||||
|
||||
} // end namespace V3
|
||||
} // end namespace HSAMD
|
||||
} // end namespace AMDGPU
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
|
@ -1361,7 +1361,7 @@ enum {
|
||||
GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1
|
||||
};
|
||||
|
||||
// AMDGPU specific notes.
|
||||
// AMD specific notes. (Code Object V2)
|
||||
enum {
|
||||
// Note types with values between 0 and 9 (inclusive) are reserved.
|
||||
NT_AMD_AMDGPU_HSA_METADATA = 10,
|
||||
@ -1369,6 +1369,12 @@ enum {
|
||||
NT_AMD_AMDGPU_PAL_METADATA = 12
|
||||
};
|
||||
|
||||
// AMDGPU specific notes. (Code Object V3)
|
||||
enum {
|
||||
// Note types with values between 0 and 31 (inclusive) are reserved.
|
||||
NT_AMDGPU_METADATA = 32
|
||||
};
|
||||
|
||||
enum {
|
||||
GNU_ABI_TAG_LINUX = 0,
|
||||
GNU_ABI_TAG_HURD = 1,
|
||||
|
@ -431,6 +431,21 @@ std::error_code fromString(std::string String, Metadata &HSAMetadata);
|
||||
/// Converts \p HSAMetadata to \p String.
|
||||
std::error_code toString(Metadata HSAMetadata, std::string &String);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HSA metadata for v3 code object.
|
||||
//===----------------------------------------------------------------------===//
|
||||
namespace V3 {
|
||||
/// HSA metadata major version.
|
||||
constexpr uint32_t VersionMajor = 1;
|
||||
/// HSA metadata minor version.
|
||||
constexpr uint32_t VersionMinor = 0;
|
||||
|
||||
/// HSA metadata beginning assembler directive.
|
||||
constexpr char AssemblerDirectiveBegin[] = ".amdgpu_metadata";
|
||||
/// HSA metadata ending assembler directive.
|
||||
constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_metadata";
|
||||
} // end namespace V3
|
||||
|
||||
} // end namespace HSAMD
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
324
lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
Normal file
324
lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
Normal file
@ -0,0 +1,324 @@
|
||||
//===- AMDGPUMetadataVerifier.cpp - MsgPack Types ---------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// Implements a verifier for AMDGPU HSA metadata.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
namespace HSAMD {
|
||||
namespace V3 {
|
||||
|
||||
bool MetadataVerifier::verifyScalar(
|
||||
msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
|
||||
function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
|
||||
auto ScalarPtr = dyn_cast<msgpack::ScalarNode>(&Node);
|
||||
if (!ScalarPtr)
|
||||
return false;
|
||||
auto &Scalar = *ScalarPtr;
|
||||
// Do not output extraneous tags for types we know from the spec.
|
||||
Scalar.IgnoreTag = true;
|
||||
if (Scalar.getScalarKind() != SKind) {
|
||||
if (Strict)
|
||||
return false;
|
||||
// If we are not strict, we interpret string values as "implicitly typed"
|
||||
// and attempt to coerce them to the expected type here.
|
||||
if (Scalar.getScalarKind() != msgpack::ScalarNode::SK_String)
|
||||
return false;
|
||||
std::string StringValue = Scalar.getString();
|
||||
Scalar.setScalarKind(SKind);
|
||||
if (Scalar.inputYAML(StringValue) != StringRef())
|
||||
return false;
|
||||
}
|
||||
if (verifyValue)
|
||||
return verifyValue(Scalar);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MetadataVerifier::verifyInteger(msgpack::Node &Node) {
|
||||
if (!verifyScalar(Node, msgpack::ScalarNode::SK_UInt))
|
||||
if (!verifyScalar(Node, msgpack::ScalarNode::SK_Int))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MetadataVerifier::verifyArray(
|
||||
msgpack::Node &Node, function_ref<bool(msgpack::Node &)> verifyNode,
|
||||
Optional<size_t> Size) {
|
||||
auto ArrayPtr = dyn_cast<msgpack::ArrayNode>(&Node);
|
||||
if (!ArrayPtr)
|
||||
return false;
|
||||
auto &Array = *ArrayPtr;
|
||||
if (Size && Array.size() != *Size)
|
||||
return false;
|
||||
for (auto &Item : Array)
|
||||
if (!verifyNode(*Item.get()))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MetadataVerifier::verifyEntry(
|
||||
msgpack::MapNode &MapNode, StringRef Key, bool Required,
|
||||
function_ref<bool(msgpack::Node &)> verifyNode) {
|
||||
auto Entry = MapNode.find(Key);
|
||||
if (Entry == MapNode.end())
|
||||
return !Required;
|
||||
return verifyNode(*Entry->second.get());
|
||||
}
|
||||
|
||||
bool MetadataVerifier::verifyScalarEntry(
|
||||
msgpack::MapNode &MapNode, StringRef Key, bool Required,
|
||||
msgpack::ScalarNode::ScalarKind SKind,
|
||||
function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
|
||||
return verifyEntry(MapNode, Key, Required, [=](msgpack::Node &Node) {
|
||||
return verifyScalar(Node, SKind, verifyValue);
|
||||
});
|
||||
}
|
||||
|
||||
bool MetadataVerifier::verifyIntegerEntry(msgpack::MapNode &MapNode,
|
||||
StringRef Key, bool Required) {
|
||||
return verifyEntry(MapNode, Key, Required, [this](msgpack::Node &Node) {
|
||||
return verifyInteger(Node);
|
||||
});
|
||||
}
|
||||
|
||||
bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
|
||||
auto ArgsMapPtr = dyn_cast<msgpack::MapNode>(&Node);
|
||||
if (!ArgsMapPtr)
|
||||
return false;
|
||||
auto &ArgsMap = *ArgsMapPtr;
|
||||
|
||||
if (!verifyScalarEntry(ArgsMap, ".name", false,
|
||||
msgpack::ScalarNode::SK_String))
|
||||
return false;
|
||||
if (!verifyScalarEntry(ArgsMap, ".type_name", false,
|
||||
msgpack::ScalarNode::SK_String))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(ArgsMap, ".size", true))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(ArgsMap, ".offset", true))
|
||||
return false;
|
||||
if (!verifyScalarEntry(ArgsMap, ".value_kind", true,
|
||||
msgpack::ScalarNode::SK_String,
|
||||
[](msgpack::ScalarNode &SNode) {
|
||||
return StringSwitch<bool>(SNode.getString())
|
||||
.Case("by_value", true)
|
||||
.Case("global_buffer", true)
|
||||
.Case("dynamic_shared_pointer", true)
|
||||
.Case("sampler", true)
|
||||
.Case("image", true)
|
||||
.Case("pipe", true)
|
||||
.Case("queue", true)
|
||||
.Case("hidden_global_offset_x", true)
|
||||
.Case("hidden_global_offset_y", true)
|
||||
.Case("hidden_global_offset_z", true)
|
||||
.Case("hidden_none", true)
|
||||
.Case("hidden_printf_buffer", true)
|
||||
.Case("hidden_default_queue", true)
|
||||
.Case("hidden_completion_action", true)
|
||||
.Default(false);
|
||||
}))
|
||||
return false;
|
||||
if (!verifyScalarEntry(ArgsMap, ".value_type", true,
|
||||
msgpack::ScalarNode::SK_String,
|
||||
[](msgpack::ScalarNode &SNode) {
|
||||
return StringSwitch<bool>(SNode.getString())
|
||||
.Case("struct", true)
|
||||
.Case("i8", true)
|
||||
.Case("u8", true)
|
||||
.Case("i16", true)
|
||||
.Case("u16", true)
|
||||
.Case("f16", true)
|
||||
.Case("i32", true)
|
||||
.Case("u32", true)
|
||||
.Case("f32", true)
|
||||
.Case("i64", true)
|
||||
.Case("u64", true)
|
||||
.Case("f64", true)
|
||||
.Default(false);
|
||||
}))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(ArgsMap, ".pointee_align", false))
|
||||
return false;
|
||||
if (!verifyScalarEntry(ArgsMap, ".address_space", false,
|
||||
msgpack::ScalarNode::SK_String,
|
||||
[](msgpack::ScalarNode &SNode) {
|
||||
return StringSwitch<bool>(SNode.getString())
|
||||
.Case("private", true)
|
||||
.Case("global", true)
|
||||
.Case("constant", true)
|
||||
.Case("local", true)
|
||||
.Case("generic", true)
|
||||
.Case("region", true)
|
||||
.Default(false);
|
||||
}))
|
||||
return false;
|
||||
if (!verifyScalarEntry(ArgsMap, ".access", false,
|
||||
msgpack::ScalarNode::SK_String,
|
||||
[](msgpack::ScalarNode &SNode) {
|
||||
return StringSwitch<bool>(SNode.getString())
|
||||
.Case("read_only", true)
|
||||
.Case("write_only", true)
|
||||
.Case("read_write", true)
|
||||
.Default(false);
|
||||
}))
|
||||
return false;
|
||||
if (!verifyScalarEntry(ArgsMap, ".actual_access", false,
|
||||
msgpack::ScalarNode::SK_String,
|
||||
[](msgpack::ScalarNode &SNode) {
|
||||
return StringSwitch<bool>(SNode.getString())
|
||||
.Case("read_only", true)
|
||||
.Case("write_only", true)
|
||||
.Case("read_write", true)
|
||||
.Default(false);
|
||||
}))
|
||||
return false;
|
||||
if (!verifyScalarEntry(ArgsMap, ".is_const", false,
|
||||
msgpack::ScalarNode::SK_Boolean))
|
||||
return false;
|
||||
if (!verifyScalarEntry(ArgsMap, ".is_restrict", false,
|
||||
msgpack::ScalarNode::SK_Boolean))
|
||||
return false;
|
||||
if (!verifyScalarEntry(ArgsMap, ".is_volatile", false,
|
||||
msgpack::ScalarNode::SK_Boolean))
|
||||
return false;
|
||||
if (!verifyScalarEntry(ArgsMap, ".is_pipe", false,
|
||||
msgpack::ScalarNode::SK_Boolean))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
|
||||
auto KernelMapPtr = dyn_cast<msgpack::MapNode>(&Node);
|
||||
if (!KernelMapPtr)
|
||||
return false;
|
||||
auto &KernelMap = *KernelMapPtr;
|
||||
|
||||
if (!verifyScalarEntry(KernelMap, ".name", true,
|
||||
msgpack::ScalarNode::SK_String))
|
||||
return false;
|
||||
if (!verifyScalarEntry(KernelMap, ".symbol", true,
|
||||
msgpack::ScalarNode::SK_String))
|
||||
return false;
|
||||
if (!verifyScalarEntry(KernelMap, ".language", false,
|
||||
msgpack::ScalarNode::SK_String,
|
||||
[](msgpack::ScalarNode &SNode) {
|
||||
return StringSwitch<bool>(SNode.getString())
|
||||
.Case("OpenCL C", true)
|
||||
.Case("OpenCL C++", true)
|
||||
.Case("HCC", true)
|
||||
.Case("HIP", true)
|
||||
.Case("OpenMP", true)
|
||||
.Case("Assembler", true)
|
||||
.Default(false);
|
||||
}))
|
||||
return false;
|
||||
if (!verifyEntry(
|
||||
KernelMap, ".language_version", false, [this](msgpack::Node &Node) {
|
||||
return verifyArray(
|
||||
Node,
|
||||
[this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
|
||||
}))
|
||||
return false;
|
||||
if (!verifyEntry(KernelMap, ".args", false, [this](msgpack::Node &Node) {
|
||||
return verifyArray(Node, [this](msgpack::Node &Node) {
|
||||
return verifyKernelArgs(Node);
|
||||
});
|
||||
}))
|
||||
return false;
|
||||
if (!verifyEntry(KernelMap, ".reqd_workgroup_size", false,
|
||||
[this](msgpack::Node &Node) {
|
||||
return verifyArray(Node,
|
||||
[this](msgpack::Node &Node) {
|
||||
return verifyInteger(Node);
|
||||
},
|
||||
3);
|
||||
}))
|
||||
return false;
|
||||
if (!verifyEntry(KernelMap, ".workgroup_size_hint", false,
|
||||
[this](msgpack::Node &Node) {
|
||||
return verifyArray(Node,
|
||||
[this](msgpack::Node &Node) {
|
||||
return verifyInteger(Node);
|
||||
},
|
||||
3);
|
||||
}))
|
||||
return false;
|
||||
if (!verifyScalarEntry(KernelMap, ".vec_type_hint", false,
|
||||
msgpack::ScalarNode::SK_String))
|
||||
return false;
|
||||
if (!verifyScalarEntry(KernelMap, ".device_enqueue_symbol", false,
|
||||
msgpack::ScalarNode::SK_String))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_size", true))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".group_segment_fixed_size", true))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".private_segment_fixed_size", true))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".sgpr_count", true))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".vgpr_count", true))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".max_flat_workgroup_size", true))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".sgpr_spill_count", false))
|
||||
return false;
|
||||
if (!verifyIntegerEntry(KernelMap, ".vgpr_spill_count", false))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MetadataVerifier::verify(msgpack::Node &HSAMetadataRoot) {
|
||||
auto RootMapPtr = dyn_cast<msgpack::MapNode>(&HSAMetadataRoot);
|
||||
if (!RootMapPtr)
|
||||
return false;
|
||||
auto &RootMap = *RootMapPtr;
|
||||
|
||||
if (!verifyEntry(
|
||||
RootMap, "amdhsa.version", true, [this](msgpack::Node &Node) {
|
||||
return verifyArray(
|
||||
Node,
|
||||
[this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
|
||||
}))
|
||||
return false;
|
||||
if (!verifyEntry(
|
||||
RootMap, "amdhsa.printf", false, [this](msgpack::Node &Node) {
|
||||
return verifyArray(Node, [this](msgpack::Node &Node) {
|
||||
return verifyScalar(Node, msgpack::ScalarNode::SK_String);
|
||||
});
|
||||
}))
|
||||
return false;
|
||||
if (!verifyEntry(RootMap, "amdhsa.kernels", true,
|
||||
[this](msgpack::Node &Node) {
|
||||
return verifyArray(Node, [this](msgpack::Node &Node) {
|
||||
return verifyKernel(Node);
|
||||
});
|
||||
}))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace V3
|
||||
} // end namespace HSAMD
|
||||
} // end namespace AMDGPU
|
||||
} // end namespace llvm
|
@ -1,4 +1,5 @@
|
||||
add_llvm_library(LLVMBinaryFormat
|
||||
AMDGPUMetadataVerifier.cpp
|
||||
Dwarf.cpp
|
||||
Magic.cpp
|
||||
MsgPackReader.cpp
|
||||
|
@ -46,6 +46,7 @@
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::AMDGPU;
|
||||
using namespace llvm::AMDGPU::HSAMD;
|
||||
|
||||
// TODO: This should get the default rounding mode from the kernel. We just set
|
||||
// the default here, but this could change if the OpenCL rounding mode pragmas
|
||||
@ -99,6 +100,10 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
|
||||
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
|
||||
std::unique_ptr<MCStreamer> Streamer)
|
||||
: AsmPrinter(TM, std::move(Streamer)) {
|
||||
if (IsaInfo::hasCodeObjectV3(getSTI()))
|
||||
HSAMetadataStream.reset(new MetadataStreamerV3());
|
||||
else
|
||||
HSAMetadataStream.reset(new MetadataStreamerV2());
|
||||
}
|
||||
|
||||
StringRef AMDGPUAsmPrinter::getPassName() const {
|
||||
@ -122,9 +127,6 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
||||
IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS);
|
||||
|
||||
getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
|
||||
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
|
||||
return;
|
||||
}
|
||||
|
||||
if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
|
||||
@ -132,11 +134,14 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
||||
return;
|
||||
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
|
||||
HSAMetadataStream.begin(M);
|
||||
HSAMetadataStream->begin(M);
|
||||
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
|
||||
readPALMetadata(M);
|
||||
|
||||
if (IsaInfo::hasCodeObjectV3(getSTI()))
|
||||
return;
|
||||
|
||||
// HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
|
||||
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
|
||||
@ -148,37 +153,38 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
|
||||
// TODO: Add metadata to code object v3.
|
||||
if (IsaInfo::hasCodeObjectV3(getSTI()) &&
|
||||
TM.getTargetTriple().getOS() == Triple::AMDHSA)
|
||||
return;
|
||||
|
||||
// Following code requires TargetStreamer to be present.
|
||||
if (!getTargetStreamer())
|
||||
return;
|
||||
|
||||
// Emit ISA Version (NT_AMD_AMDGPU_ISA).
|
||||
std::string ISAVersionString;
|
||||
raw_string_ostream ISAVersionStream(ISAVersionString);
|
||||
IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
|
||||
getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
|
||||
if (!IsaInfo::hasCodeObjectV3(getSTI())) {
|
||||
// Emit ISA Version (NT_AMD_AMDGPU_ISA).
|
||||
std::string ISAVersionString;
|
||||
raw_string_ostream ISAVersionStream(ISAVersionString);
|
||||
IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
|
||||
getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
|
||||
}
|
||||
|
||||
// Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
|
||||
HSAMetadataStream.end();
|
||||
getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata());
|
||||
HSAMetadataStream->end();
|
||||
bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
|
||||
(void)Success;
|
||||
assert(Success && "Malformed HSA Metadata");
|
||||
}
|
||||
|
||||
// Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
|
||||
// Copy the PAL metadata from the map where we collected it into a vector,
|
||||
// then write it as a .note.
|
||||
PALMD::Metadata PALMetadataVector;
|
||||
for (auto i : PALMetadataMap) {
|
||||
PALMetadataVector.push_back(i.first);
|
||||
PALMetadataVector.push_back(i.second);
|
||||
if (!IsaInfo::hasCodeObjectV3(getSTI())) {
|
||||
// Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
|
||||
if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
|
||||
// Copy the PAL metadata from the map where we collected it into a vector,
|
||||
// then write it as a .note.
|
||||
PALMD::Metadata PALMetadataVector;
|
||||
for (auto i : PALMetadataMap) {
|
||||
PALMetadataVector.push_back(i.first);
|
||||
PALMetadataVector.push_back(i.second);
|
||||
}
|
||||
getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
|
||||
}
|
||||
getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
|
||||
}
|
||||
}
|
||||
|
||||
@ -211,11 +217,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
|
||||
}
|
||||
|
||||
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
|
||||
return;
|
||||
|
||||
if (!STM.hasCodeObjectV3() && STM.isAmdHsaOS())
|
||||
HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
|
||||
if (STM.isAmdHsaOS())
|
||||
HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
|
||||
|
@ -56,7 +56,7 @@ private:
|
||||
SIProgramInfo CurrentProgramInfo;
|
||||
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
|
||||
|
||||
AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream;
|
||||
std::unique_ptr<AMDGPU::HSAMD::MetadataStreamer> HSAMetadataStream;
|
||||
std::map<uint32_t, uint32_t> PALMetadataMap;
|
||||
|
||||
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "AMDGPUHSAMetadataStreamer.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "SIProgramInfo.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
@ -36,11 +37,14 @@ static cl::opt<bool> VerifyHSAMetadata(
|
||||
namespace AMDGPU {
|
||||
namespace HSAMD {
|
||||
|
||||
void MetadataStreamer::dump(StringRef HSAMetadataString) const {
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HSAMetadataStreamerV2
|
||||
//===----------------------------------------------------------------------===//
|
||||
void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
|
||||
errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
|
||||
}
|
||||
|
||||
void MetadataStreamer::verify(StringRef HSAMetadataString) const {
|
||||
void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
|
||||
errs() << "AMDGPU HSA Metadata Parser Test: ";
|
||||
|
||||
HSAMD::Metadata FromHSAMetadataString;
|
||||
@ -63,7 +67,8 @@ void MetadataStreamer::verify(StringRef HSAMetadataString) const {
|
||||
}
|
||||
}
|
||||
|
||||
AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
|
||||
AccessQualifier
|
||||
MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
|
||||
if (AccQual.empty())
|
||||
return AccessQualifier::Unknown;
|
||||
|
||||
@ -74,7 +79,8 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
|
||||
.Default(AccessQualifier::Default);
|
||||
}
|
||||
|
||||
AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
|
||||
AddressSpaceQualifier
|
||||
MetadataStreamerV2::getAddressSpaceQualifier(
|
||||
unsigned AddressSpace) const {
|
||||
switch (AddressSpace) {
|
||||
case AMDGPUAS::PRIVATE_ADDRESS:
|
||||
@ -94,8 +100,8 @@ AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
|
||||
}
|
||||
}
|
||||
|
||||
ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
|
||||
StringRef BaseTypeName) const {
|
||||
ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
|
||||
StringRef BaseTypeName) const {
|
||||
if (TypeQual.find("pipe") != StringRef::npos)
|
||||
return ValueKind::Pipe;
|
||||
|
||||
@ -122,7 +128,7 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
|
||||
ValueKind::ByValue);
|
||||
}
|
||||
|
||||
ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
|
||||
ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::IntegerTyID: {
|
||||
auto Signed = !TypeName.startswith("u");
|
||||
@ -154,7 +160,7 @@ ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
|
||||
}
|
||||
}
|
||||
|
||||
std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
|
||||
std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::IntegerTyID: {
|
||||
if (!Signed)
|
||||
@ -191,8 +197,8 @@ std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
|
||||
MDNode *Node) const {
|
||||
std::vector<uint32_t>
|
||||
MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
|
||||
std::vector<uint32_t> Dims;
|
||||
if (Node->getNumOperands() != 3)
|
||||
return Dims;
|
||||
@ -202,9 +208,9 @@ std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
|
||||
return Dims;
|
||||
}
|
||||
|
||||
Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
Kernel::CodeProps::Metadata
|
||||
MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
|
||||
@ -231,9 +237,9 @@ Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
|
||||
return HSACodeProps;
|
||||
}
|
||||
|
||||
Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
|
||||
const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
Kernel::DebugProps::Metadata
|
||||
MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
|
||||
|
||||
@ -253,14 +259,14 @@ Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
|
||||
return HSADebugProps;
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitVersion() {
|
||||
void MetadataStreamerV2::emitVersion() {
|
||||
auto &Version = HSAMetadata.mVersion;
|
||||
|
||||
Version.push_back(VersionMajor);
|
||||
Version.push_back(VersionMinor);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitPrintf(const Module &Mod) {
|
||||
void MetadataStreamerV2::emitPrintf(const Module &Mod) {
|
||||
auto &Printf = HSAMetadata.mPrintf;
|
||||
|
||||
auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
|
||||
@ -272,7 +278,7 @@ void MetadataStreamer::emitPrintf(const Module &Mod) {
|
||||
Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelLanguage(const Function &Func) {
|
||||
void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
|
||||
auto &Kernel = HSAMetadata.mKernels.back();
|
||||
|
||||
// TODO: What about other languages?
|
||||
@ -290,7 +296,7 @@ void MetadataStreamer::emitKernelLanguage(const Function &Func) {
|
||||
mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelAttrs(const Function &Func) {
|
||||
void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
|
||||
auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
|
||||
|
||||
if (auto Node = Func.getMetadata("reqd_work_group_size"))
|
||||
@ -308,14 +314,14 @@ void MetadataStreamer::emitKernelAttrs(const Function &Func) {
|
||||
}
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelArgs(const Function &Func) {
|
||||
void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
|
||||
for (auto &Arg : Func.args())
|
||||
emitKernelArg(Arg);
|
||||
|
||||
emitHiddenKernelArgs(Func);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelArg(const Argument &Arg) {
|
||||
void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
|
||||
auto Func = Arg.getParent();
|
||||
auto ArgNo = Arg.getArgNo();
|
||||
const MDNode *Node;
|
||||
@ -368,12 +374,12 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) {
|
||||
PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
|
||||
ValueKind ValueKind,
|
||||
unsigned PointeeAlign,
|
||||
StringRef Name,
|
||||
StringRef TypeName, StringRef BaseTypeName,
|
||||
StringRef AccQual, StringRef TypeQual) {
|
||||
void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
|
||||
ValueKind ValueKind,
|
||||
unsigned PointeeAlign, StringRef Name,
|
||||
StringRef TypeName,
|
||||
StringRef BaseTypeName,
|
||||
StringRef AccQual, StringRef TypeQual) {
|
||||
HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
|
||||
auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
|
||||
|
||||
@ -386,7 +392,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
|
||||
Arg.mPointeeAlign = PointeeAlign;
|
||||
|
||||
if (auto PtrTy = dyn_cast<PointerType>(Ty))
|
||||
Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
|
||||
Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
|
||||
|
||||
Arg.mAccQual = getAccessQualifier(AccQual);
|
||||
|
||||
@ -406,7 +412,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
|
||||
}
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
|
||||
void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
|
||||
int HiddenArgNumBytes =
|
||||
getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
|
||||
|
||||
@ -448,12 +454,16 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
|
||||
}
|
||||
}
|
||||
|
||||
void MetadataStreamer::begin(const Module &Mod) {
|
||||
bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
|
||||
return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
|
||||
}
|
||||
|
||||
void MetadataStreamerV2::begin(const Module &Mod) {
|
||||
emitVersion();
|
||||
emitPrintf(Mod);
|
||||
}
|
||||
|
||||
void MetadataStreamer::end() {
|
||||
void MetadataStreamerV2::end() {
|
||||
std::string HSAMetadataString;
|
||||
if (toString(HSAMetadata, HSAMetadataString))
|
||||
return;
|
||||
@ -464,7 +474,8 @@ void MetadataStreamer::end() {
|
||||
verify(HSAMetadataString);
|
||||
}
|
||||
|
||||
void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
|
||||
void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) {
|
||||
auto &Func = MF.getFunction();
|
||||
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
|
||||
return;
|
||||
@ -484,6 +495,505 @@ void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo
|
||||
HSAMetadata.mKernels.back().mDebugProps = DebugProps;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HSAMetadataStreamerV3
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
|
||||
errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
|
||||
errs() << "AMDGPU HSA Metadata Parser Test: ";
|
||||
|
||||
std::shared_ptr<msgpack::Node> FromHSAMetadataString =
|
||||
std::make_shared<msgpack::MapNode>();
|
||||
|
||||
yaml::Input YIn(HSAMetadataString);
|
||||
YIn >> FromHSAMetadataString;
|
||||
if (YIn.error()) {
|
||||
errs() << "FAIL\n";
|
||||
return;
|
||||
}
|
||||
|
||||
std::string ToHSAMetadataString;
|
||||
raw_string_ostream StrOS(ToHSAMetadataString);
|
||||
yaml::Output YOut(StrOS);
|
||||
YOut << FromHSAMetadataString;
|
||||
|
||||
errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
|
||||
if (HSAMetadataString != ToHSAMetadataString) {
|
||||
errs() << "Original input: " << HSAMetadataString << '\n'
|
||||
<< "Produced output: " << StrOS.str() << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
Optional<StringRef>
|
||||
MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
|
||||
return StringSwitch<Optional<StringRef>>(AccQual)
|
||||
.Case("read_only", StringRef("read_only"))
|
||||
.Case("write_only", StringRef("write_only"))
|
||||
.Case("read_write", StringRef("read_write"))
|
||||
.Default(None);
|
||||
}
|
||||
|
||||
Optional<StringRef>
|
||||
MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
|
||||
switch (AddressSpace) {
|
||||
case AMDGPUAS::PRIVATE_ADDRESS:
|
||||
return StringRef("private");
|
||||
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||
return StringRef("global");
|
||||
case AMDGPUAS::CONSTANT_ADDRESS:
|
||||
return StringRef("constant");
|
||||
case AMDGPUAS::LOCAL_ADDRESS:
|
||||
return StringRef("local");
|
||||
case AMDGPUAS::FLAT_ADDRESS:
|
||||
return StringRef("generic");
|
||||
case AMDGPUAS::REGION_ADDRESS:
|
||||
return StringRef("region");
|
||||
default:
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
|
||||
StringRef BaseTypeName) const {
|
||||
if (TypeQual.find("pipe") != StringRef::npos)
|
||||
return "pipe";
|
||||
|
||||
return StringSwitch<StringRef>(BaseTypeName)
|
||||
.Case("image1d_t", "image")
|
||||
.Case("image1d_array_t", "image")
|
||||
.Case("image1d_buffer_t", "image")
|
||||
.Case("image2d_t", "image")
|
||||
.Case("image2d_array_t", "image")
|
||||
.Case("image2d_array_depth_t", "image")
|
||||
.Case("image2d_array_msaa_t", "image")
|
||||
.Case("image2d_array_msaa_depth_t", "image")
|
||||
.Case("image2d_depth_t", "image")
|
||||
.Case("image2d_msaa_t", "image")
|
||||
.Case("image2d_msaa_depth_t", "image")
|
||||
.Case("image3d_t", "image")
|
||||
.Case("sampler_t", "sampler")
|
||||
.Case("queue_t", "queue")
|
||||
.Default(isa<PointerType>(Ty)
|
||||
? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
|
||||
? "dynamic_shared_pointer"
|
||||
: "global_buffer")
|
||||
: "by_value");
|
||||
}
|
||||
|
||||
StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::IntegerTyID: {
|
||||
auto Signed = !TypeName.startswith("u");
|
||||
switch (Ty->getIntegerBitWidth()) {
|
||||
case 8:
|
||||
return Signed ? "i8" : "u8";
|
||||
case 16:
|
||||
return Signed ? "i16" : "u16";
|
||||
case 32:
|
||||
return Signed ? "i32" : "u32";
|
||||
case 64:
|
||||
return Signed ? "i64" : "u64";
|
||||
default:
|
||||
return "struct";
|
||||
}
|
||||
}
|
||||
case Type::HalfTyID:
|
||||
return "f16";
|
||||
case Type::FloatTyID:
|
||||
return "f32";
|
||||
case Type::DoubleTyID:
|
||||
return "f64";
|
||||
case Type::PointerTyID:
|
||||
return getValueType(Ty->getPointerElementType(), TypeName);
|
||||
case Type::VectorTyID:
|
||||
return getValueType(Ty->getVectorElementType(), TypeName);
|
||||
default:
|
||||
return "struct";
|
||||
}
|
||||
}
|
||||
|
||||
std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
|
||||
switch (Ty->getTypeID()) {
|
||||
case Type::IntegerTyID: {
|
||||
if (!Signed)
|
||||
return (Twine('u') + getTypeName(Ty, true)).str();
|
||||
|
||||
auto BitWidth = Ty->getIntegerBitWidth();
|
||||
switch (BitWidth) {
|
||||
case 8:
|
||||
return "char";
|
||||
case 16:
|
||||
return "short";
|
||||
case 32:
|
||||
return "int";
|
||||
case 64:
|
||||
return "long";
|
||||
default:
|
||||
return (Twine('i') + Twine(BitWidth)).str();
|
||||
}
|
||||
}
|
||||
case Type::HalfTyID:
|
||||
return "half";
|
||||
case Type::FloatTyID:
|
||||
return "float";
|
||||
case Type::DoubleTyID:
|
||||
return "double";
|
||||
case Type::VectorTyID: {
|
||||
auto VecTy = cast<VectorType>(Ty);
|
||||
auto ElTy = VecTy->getElementType();
|
||||
auto NumElements = VecTy->getVectorNumElements();
|
||||
return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
|
||||
}
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<msgpack::ArrayNode>
|
||||
MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
|
||||
auto Dims = std::make_shared<msgpack::ArrayNode>();
|
||||
if (Node->getNumOperands() != 3)
|
||||
return Dims;
|
||||
|
||||
for (auto &Op : Node->operands())
|
||||
Dims->push_back(std::make_shared<msgpack::ScalarNode>(
|
||||
mdconst::extract<ConstantInt>(Op)->getZExtValue()));
|
||||
return Dims;
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::emitVersion() {
|
||||
auto Version = std::make_shared<msgpack::ArrayNode>();
|
||||
Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
|
||||
Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
|
||||
getRootMetadata("amdhsa.version") = std::move(Version);
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::emitPrintf(const Module &Mod) {
|
||||
auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
|
||||
if (!Node)
|
||||
return;
|
||||
|
||||
auto Printf = std::make_shared<msgpack::ArrayNode>();
|
||||
for (auto Op : Node->operands())
|
||||
if (Op->getNumOperands())
|
||||
Printf->push_back(std::make_shared<msgpack::ScalarNode>(
|
||||
cast<MDString>(Op->getOperand(0))->getString()));
|
||||
getRootMetadata("amdhsa.printf") = std::move(Printf);
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
|
||||
msgpack::MapNode &Kern) {
|
||||
// TODO: What about other languages?
|
||||
auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
|
||||
if (!Node || !Node->getNumOperands())
|
||||
return;
|
||||
auto Op0 = Node->getOperand(0);
|
||||
if (Op0->getNumOperands() <= 1)
|
||||
return;
|
||||
|
||||
Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
|
||||
auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
|
||||
LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
|
||||
mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
|
||||
LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
|
||||
mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
|
||||
Kern[".language_version"] = std::move(LanguageVersion);
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
|
||||
msgpack::MapNode &Kern) {
|
||||
|
||||
if (auto Node = Func.getMetadata("reqd_work_group_size"))
|
||||
Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
|
||||
if (auto Node = Func.getMetadata("work_group_size_hint"))
|
||||
Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
|
||||
if (auto Node = Func.getMetadata("vec_type_hint")) {
|
||||
Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
|
||||
cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
|
||||
mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
|
||||
}
|
||||
if (Func.hasFnAttribute("runtime-handle")) {
|
||||
Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
|
||||
Func.getFnAttribute("runtime-handle").getValueAsString().str());
|
||||
}
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::emitKernelArgs(const Function &Func,
|
||||
msgpack::MapNode &Kern) {
|
||||
unsigned Offset = 0;
|
||||
auto Args = std::make_shared<msgpack::ArrayNode>();
|
||||
for (auto &Arg : Func.args())
|
||||
emitKernelArg(Arg, Offset, *Args);
|
||||
|
||||
emitHiddenKernelArgs(Func, Offset, *Args);
|
||||
|
||||
// TODO: What about other languages?
|
||||
if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
|
||||
auto &DL = Func.getParent()->getDataLayout();
|
||||
auto Int64Ty = Type::getInt64Ty(Func.getContext());
|
||||
|
||||
emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
|
||||
emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
|
||||
emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
|
||||
|
||||
auto Int8PtrTy =
|
||||
Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
|
||||
|
||||
// Emit "printf buffer" argument if printf is used, otherwise emit dummy
|
||||
// "none" argument.
|
||||
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
|
||||
else
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
|
||||
|
||||
// Emit "default queue" and "completion action" arguments if enqueue kernel
|
||||
// is used, otherwise emit dummy "none" arguments.
|
||||
if (Func.hasFnAttribute("calls-enqueue-kernel")) {
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
|
||||
} else {
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
|
||||
}
|
||||
}
|
||||
|
||||
Kern[".args"] = std::move(Args);
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
|
||||
msgpack::ArrayNode &Args) {
|
||||
auto Func = Arg.getParent();
|
||||
auto ArgNo = Arg.getArgNo();
|
||||
const MDNode *Node;
|
||||
|
||||
StringRef Name;
|
||||
Node = Func->getMetadata("kernel_arg_name");
|
||||
if (Node && ArgNo < Node->getNumOperands())
|
||||
Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
|
||||
else if (Arg.hasName())
|
||||
Name = Arg.getName();
|
||||
|
||||
StringRef TypeName;
|
||||
Node = Func->getMetadata("kernel_arg_type");
|
||||
if (Node && ArgNo < Node->getNumOperands())
|
||||
TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
|
||||
|
||||
StringRef BaseTypeName;
|
||||
Node = Func->getMetadata("kernel_arg_base_type");
|
||||
if (Node && ArgNo < Node->getNumOperands())
|
||||
BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
|
||||
|
||||
StringRef AccQual;
|
||||
if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
|
||||
Arg.hasNoAliasAttr()) {
|
||||
AccQual = "read_only";
|
||||
} else {
|
||||
Node = Func->getMetadata("kernel_arg_access_qual");
|
||||
if (Node && ArgNo < Node->getNumOperands())
|
||||
AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
|
||||
}
|
||||
|
||||
StringRef TypeQual;
|
||||
Node = Func->getMetadata("kernel_arg_type_qual");
|
||||
if (Node && ArgNo < Node->getNumOperands())
|
||||
TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
|
||||
|
||||
Type *Ty = Arg.getType();
|
||||
const DataLayout &DL = Func->getParent()->getDataLayout();
|
||||
|
||||
unsigned PointeeAlign = 0;
|
||||
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
|
||||
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
PointeeAlign = Arg.getParamAlignment();
|
||||
if (PointeeAlign == 0)
|
||||
PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
|
||||
}
|
||||
}
|
||||
|
||||
emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
|
||||
getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
|
||||
Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
|
||||
TypeQual);
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
|
||||
StringRef ValueKind, unsigned &Offset,
|
||||
msgpack::ArrayNode &Args,
|
||||
unsigned PointeeAlign, StringRef Name,
|
||||
StringRef TypeName,
|
||||
StringRef BaseTypeName,
|
||||
StringRef AccQual, StringRef TypeQual) {
|
||||
auto ArgPtr = std::make_shared<msgpack::MapNode>();
|
||||
auto &Arg = *ArgPtr;
|
||||
|
||||
if (!Name.empty())
|
||||
Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
|
||||
if (!TypeName.empty())
|
||||
Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
|
||||
auto Size = DL.getTypeAllocSize(Ty);
|
||||
auto Align = DL.getABITypeAlignment(Ty);
|
||||
Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
|
||||
Offset = alignTo(Offset, Align);
|
||||
Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
|
||||
Offset += Size;
|
||||
Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
|
||||
Arg[".value_type"] =
|
||||
std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
|
||||
if (PointeeAlign)
|
||||
Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
|
||||
|
||||
if (auto PtrTy = dyn_cast<PointerType>(Ty))
|
||||
if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
|
||||
Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
|
||||
|
||||
if (auto AQ = getAccessQualifier(AccQual))
|
||||
Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
|
||||
|
||||
// TODO: Emit Arg[".actual_access"].
|
||||
|
||||
SmallVector<StringRef, 1> SplitTypeQuals;
|
||||
TypeQual.split(SplitTypeQuals, " ", -1, false);
|
||||
for (StringRef Key : SplitTypeQuals) {
|
||||
if (Key == "const")
|
||||
Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
|
||||
else if (Key == "restrict")
|
||||
Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
|
||||
else if (Key == "volatile")
|
||||
Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
|
||||
else if (Key == "pipe")
|
||||
Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
|
||||
}
|
||||
|
||||
Args.push_back(std::move(ArgPtr));
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
|
||||
unsigned &Offset,
|
||||
msgpack::ArrayNode &Args) {
|
||||
int HiddenArgNumBytes =
|
||||
getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
|
||||
|
||||
if (!HiddenArgNumBytes)
|
||||
return;
|
||||
|
||||
auto &DL = Func.getParent()->getDataLayout();
|
||||
auto Int64Ty = Type::getInt64Ty(Func.getContext());
|
||||
|
||||
if (HiddenArgNumBytes >= 8)
|
||||
emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
|
||||
if (HiddenArgNumBytes >= 16)
|
||||
emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
|
||||
if (HiddenArgNumBytes >= 24)
|
||||
emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
|
||||
|
||||
auto Int8PtrTy =
|
||||
Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
|
||||
|
||||
// Emit "printf buffer" argument if printf is used, otherwise emit dummy
|
||||
// "none" argument.
|
||||
if (HiddenArgNumBytes >= 32) {
|
||||
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
|
||||
else
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
|
||||
}
|
||||
|
||||
// Emit "default queue" and "completion action" arguments if enqueue kernel is
|
||||
// used, otherwise emit dummy "none" arguments.
|
||||
if (HiddenArgNumBytes >= 48) {
|
||||
if (Func.hasFnAttribute("calls-enqueue-kernel")) {
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
|
||||
} else {
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
|
||||
emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<msgpack::MapNode>
|
||||
MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const {
|
||||
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
||||
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
|
||||
const Function &F = MF.getFunction();
|
||||
|
||||
auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
|
||||
auto &Kern = *HSAKernelProps;
|
||||
|
||||
unsigned MaxKernArgAlign;
|
||||
Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
|
||||
STM.getKernArgSegmentSize(F, MaxKernArgAlign));
|
||||
Kern[".group_segment_fixed_size"] =
|
||||
std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
|
||||
Kern[".private_segment_fixed_size"] =
|
||||
std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
|
||||
Kern[".kernarg_segment_align"] =
|
||||
std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
|
||||
Kern[".wavefront_size"] =
|
||||
std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
|
||||
Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
|
||||
Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
|
||||
Kern[".max_flat_workgroup_size"] =
|
||||
std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
|
||||
Kern[".sgpr_spill_count"] =
|
||||
std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
|
||||
Kern[".vgpr_spill_count"] =
|
||||
std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
|
||||
|
||||
return HSAKernelProps;
|
||||
}
|
||||
|
||||
bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
|
||||
return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::begin(const Module &Mod) {
|
||||
emitVersion();
|
||||
emitPrintf(Mod);
|
||||
getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::end() {
|
||||
std::string HSAMetadataString;
|
||||
raw_string_ostream StrOS(HSAMetadataString);
|
||||
yaml::Output YOut(StrOS);
|
||||
YOut << HSAMetadataRoot;
|
||||
|
||||
if (DumpHSAMetadata)
|
||||
dump(StrOS.str());
|
||||
if (VerifyHSAMetadata)
|
||||
verify(StrOS.str());
|
||||
}
|
||||
|
||||
void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) {
|
||||
auto &Func = MF.getFunction();
|
||||
auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
|
||||
|
||||
assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
|
||||
Func.getCallingConv() == CallingConv::SPIR_KERNEL);
|
||||
|
||||
auto &KernelsNode = getRootMetadata("amdhsa.kernels");
|
||||
auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
|
||||
|
||||
{
|
||||
auto &Kern = *KernelProps;
|
||||
Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
|
||||
Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
|
||||
(Twine(Func.getName()) + Twine(".kd")).str());
|
||||
emitKernelLanguage(Func, Kern);
|
||||
emitKernelAttrs(Func, Kern);
|
||||
emitKernelArgs(Func, Kern);
|
||||
}
|
||||
|
||||
Kernels->push_back(std::move(KernelProps));
|
||||
}
|
||||
|
||||
} // end namespace HSAMD
|
||||
} // end namespace AMDGPU
|
||||
} // end namespace llvm
|
||||
|
@ -19,10 +19,12 @@
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDKernelCodeT.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/BinaryFormat/MsgPackTypes.h"
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetStreamer;
|
||||
class Argument;
|
||||
class DataLayout;
|
||||
class Function;
|
||||
@ -34,7 +36,92 @@ class Type;
|
||||
namespace AMDGPU {
|
||||
namespace HSAMD {
|
||||
|
||||
class MetadataStreamer final {
|
||||
class MetadataStreamer {
|
||||
public:
|
||||
virtual ~MetadataStreamer(){};
|
||||
|
||||
virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0;
|
||||
|
||||
virtual void begin(const Module &Mod) = 0;
|
||||
|
||||
virtual void end() = 0;
|
||||
|
||||
virtual void emitKernel(const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) = 0;
|
||||
};
|
||||
|
||||
class MetadataStreamerV3 final : public MetadataStreamer {
|
||||
private:
|
||||
std::shared_ptr<msgpack::Node> HSAMetadataRoot =
|
||||
std::make_shared<msgpack::MapNode>();
|
||||
|
||||
void dump(StringRef HSAMetadataString) const;
|
||||
|
||||
void verify(StringRef HSAMetadataString) const;
|
||||
|
||||
Optional<StringRef> getAccessQualifier(StringRef AccQual) const;
|
||||
|
||||
Optional<StringRef> getAddressSpaceQualifier(unsigned AddressSpace) const;
|
||||
|
||||
StringRef getValueKind(Type *Ty, StringRef TypeQual,
|
||||
StringRef BaseTypeName) const;
|
||||
|
||||
StringRef getValueType(Type *Ty, StringRef TypeName) const;
|
||||
|
||||
std::string getTypeName(Type *Ty, bool Signed) const;
|
||||
|
||||
std::shared_ptr<msgpack::ArrayNode>
|
||||
getWorkGroupDimensions(MDNode *Node) const;
|
||||
|
||||
std::shared_ptr<msgpack::MapNode>
|
||||
getHSAKernelProps(const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) const;
|
||||
|
||||
void emitVersion();
|
||||
|
||||
void emitPrintf(const Module &Mod);
|
||||
|
||||
void emitKernelLanguage(const Function &Func, msgpack::MapNode &Kern);
|
||||
|
||||
void emitKernelAttrs(const Function &Func, msgpack::MapNode &Kern);
|
||||
|
||||
void emitKernelArgs(const Function &Func, msgpack::MapNode &Kern);
|
||||
|
||||
void emitKernelArg(const Argument &Arg, unsigned &Offset,
|
||||
msgpack::ArrayNode &Args);
|
||||
|
||||
void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind,
|
||||
unsigned &Offset, msgpack::ArrayNode &Args,
|
||||
unsigned PointeeAlign = 0, StringRef Name = "",
|
||||
StringRef TypeName = "", StringRef BaseTypeName = "",
|
||||
StringRef AccQual = "", StringRef TypeQual = "");
|
||||
|
||||
void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
|
||||
msgpack::ArrayNode &Args);
|
||||
|
||||
std::shared_ptr<msgpack::Node> &getRootMetadata(StringRef Key) {
|
||||
return (*cast<msgpack::MapNode>(HSAMetadataRoot.get()))[Key];
|
||||
}
|
||||
|
||||
std::shared_ptr<msgpack::Node> &getHSAMetadataRoot() {
|
||||
return HSAMetadataRoot;
|
||||
}
|
||||
|
||||
public:
|
||||
MetadataStreamerV3() = default;
|
||||
~MetadataStreamerV3() = default;
|
||||
|
||||
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
|
||||
|
||||
void begin(const Module &Mod) override;
|
||||
|
||||
void end() override;
|
||||
|
||||
void emitKernel(const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) override;
|
||||
};
|
||||
|
||||
class MetadataStreamerV2 final : public MetadataStreamer {
|
||||
private:
|
||||
Metadata HSAMetadata;
|
||||
|
||||
@ -44,7 +131,7 @@ private:
|
||||
|
||||
AccessQualifier getAccessQualifier(StringRef AccQual) const;
|
||||
|
||||
AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
|
||||
AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const;
|
||||
|
||||
ValueKind getValueKind(Type *Ty, StringRef TypeQual,
|
||||
StringRef BaseTypeName) const;
|
||||
@ -82,19 +169,22 @@ private:
|
||||
|
||||
void emitHiddenKernelArgs(const Function &Func);
|
||||
|
||||
public:
|
||||
MetadataStreamer() = default;
|
||||
~MetadataStreamer() = default;
|
||||
|
||||
const Metadata &getHSAMetadata() const {
|
||||
return HSAMetadata;
|
||||
}
|
||||
|
||||
void begin(const Module &Mod);
|
||||
public:
|
||||
MetadataStreamerV2() = default;
|
||||
~MetadataStreamerV2() = default;
|
||||
|
||||
void end();
|
||||
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
|
||||
|
||||
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo);
|
||||
void begin(const Module &Mod) override;
|
||||
|
||||
void end() override;
|
||||
|
||||
void emitKernel(const MachineFunction &MF,
|
||||
const SIProgramInfo &ProgramInfo) override;
|
||||
};
|
||||
|
||||
} // end namespace HSAMD
|
||||
|
@ -23,7 +23,8 @@ namespace ElfNote {
|
||||
|
||||
const char SectionName[] = ".note";
|
||||
|
||||
const char NoteName[] = "AMD";
|
||||
const char NoteNameV2[] = "AMD";
|
||||
const char NoteNameV3[] = "AMDGPU";
|
||||
|
||||
// TODO: Remove this file once we drop code object v2.
|
||||
enum NoteType{
|
||||
|
@ -3065,9 +3065,18 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
|
||||
const char *AssemblerDirectiveBegin;
|
||||
const char *AssemblerDirectiveEnd;
|
||||
std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
|
||||
AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
|
||||
? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
|
||||
HSAMD::V3::AssemblerDirectiveEnd)
|
||||
: std::make_tuple(HSAMD::AssemblerDirectiveBegin,
|
||||
HSAMD::AssemblerDirectiveEnd);
|
||||
|
||||
if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
|
||||
return Error(getParser().getTok().getLoc(),
|
||||
(Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
|
||||
(Twine(AssemblerDirectiveBegin) + Twine(" directive is "
|
||||
"not available on non-amdhsa OSes")).str());
|
||||
}
|
||||
|
||||
@ -3085,7 +3094,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
|
||||
|
||||
if (getLexer().is(AsmToken::Identifier)) {
|
||||
StringRef ID = getLexer().getTok().getIdentifier();
|
||||
if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
|
||||
if (ID == AssemblerDirectiveEnd) {
|
||||
Lex();
|
||||
FoundEnd = true;
|
||||
break;
|
||||
@ -3107,8 +3116,13 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
|
||||
|
||||
YamlStream.flush();
|
||||
|
||||
if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
|
||||
return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
|
||||
if (IsaInfo::hasCodeObjectV3(&getSTI())) {
|
||||
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
|
||||
return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
|
||||
} else {
|
||||
if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
|
||||
return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -3145,6 +3159,10 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
|
||||
|
||||
if (IDVal == ".amdhsa_kernel")
|
||||
return ParseDirectiveAMDHSAKernel();
|
||||
|
||||
// TODO: Restructure/combine with PAL metadata directive.
|
||||
if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
|
||||
return ParseDirectiveHSAMetadata();
|
||||
} else {
|
||||
if (IDVal == ".hsa_code_object_version")
|
||||
return ParseDirectiveHSACodeObjectVersion();
|
||||
@ -3160,10 +3178,10 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
|
||||
|
||||
if (IDVal == ".amd_amdgpu_isa")
|
||||
return ParseDirectiveISAVersion();
|
||||
}
|
||||
|
||||
if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
|
||||
return ParseDirectiveHSAMetadata();
|
||||
if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
|
||||
return ParseDirectiveHSAMetadata();
|
||||
}
|
||||
|
||||
if (IDVal == PALMD::AssemblerDirective)
|
||||
return ParseDirectivePALMetadata();
|
||||
|
@ -30,5 +30,5 @@ has_disassembler = 1
|
||||
type = Library
|
||||
name = AMDGPUCodeGen
|
||||
parent = AMDGPU
|
||||
required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel
|
||||
required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel BinaryFormat
|
||||
add_to_library_groups = AMDGPU
|
||||
|
@ -17,7 +17,9 @@
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "Utils/AMDKernelCodeTUtils.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
#include "llvm/BinaryFormat/MsgPackTypes.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
@ -35,12 +37,13 @@ namespace llvm {
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::AMDGPU;
|
||||
using namespace llvm::AMDGPU::HSAMD;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPUTargetStreamer
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
|
||||
bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
|
||||
HSAMD::Metadata HSAMetadata;
|
||||
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
|
||||
return false;
|
||||
@ -48,6 +51,15 @@ bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
|
||||
return EmitHSAMetadata(HSAMetadata);
|
||||
}
|
||||
|
||||
bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
|
||||
std::shared_ptr<msgpack::Node> HSAMetadataRoot;
|
||||
yaml::Input YIn(HSAMetadataString);
|
||||
YIn >> HSAMetadataRoot;
|
||||
if (YIn.error())
|
||||
return false;
|
||||
return EmitHSAMetadata(HSAMetadataRoot, false);
|
||||
}
|
||||
|
||||
StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
|
||||
AMDGPU::GPUKind AK;
|
||||
|
||||
@ -195,9 +207,26 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
|
||||
if (HSAMD::toString(HSAMetadata, HSAMetadataString))
|
||||
return false;
|
||||
|
||||
OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
|
||||
OS << '\t' << AssemblerDirectiveBegin << '\n';
|
||||
OS << HSAMetadataString << '\n';
|
||||
OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
|
||||
OS << '\t' << AssemblerDirectiveEnd << '\n';
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
|
||||
std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
|
||||
V3::MetadataVerifier Verifier(Strict);
|
||||
if (!Verifier.verify(*HSAMetadataRoot))
|
||||
return false;
|
||||
|
||||
std::string HSAMetadataString;
|
||||
raw_string_ostream StrOS(HSAMetadataString);
|
||||
yaml::Output YOut(StrOS);
|
||||
YOut << HSAMetadataRoot;
|
||||
|
||||
OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
|
||||
OS << StrOS.str() << '\n';
|
||||
OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -358,13 +387,13 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
|
||||
return static_cast<MCELFStreamer &>(Streamer);
|
||||
}
|
||||
|
||||
void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
|
||||
const MCExpr *DescSZ, unsigned NoteType,
|
||||
void AMDGPUTargetELFStreamer::EmitNote(
|
||||
StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
|
||||
function_ref<void(MCELFStreamer &)> EmitDesc) {
|
||||
auto &S = getStreamer();
|
||||
auto &Context = S.getContext();
|
||||
|
||||
auto NameSZ = sizeof(ElfNote::NoteName);
|
||||
auto NameSZ = Name.size() + 1;
|
||||
|
||||
S.PushSection();
|
||||
S.SwitchSection(Context.getELFSection(
|
||||
@ -372,7 +401,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
|
||||
S.EmitIntValue(NameSZ, 4); // namesz
|
||||
S.EmitValue(DescSZ, 4); // descz
|
||||
S.EmitIntValue(NoteType, 4); // type
|
||||
S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name
|
||||
S.EmitBytes(Name); // name
|
||||
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
|
||||
EmitDesc(S); // desc
|
||||
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
|
||||
@ -384,14 +413,11 @@ void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
|
||||
void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
|
||||
uint32_t Major, uint32_t Minor) {
|
||||
|
||||
EmitAMDGPUNote(
|
||||
MCConstantExpr::create(8, getContext()),
|
||||
ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
|
||||
[&](MCELFStreamer &OS){
|
||||
OS.EmitIntValue(Major, 4);
|
||||
OS.EmitIntValue(Minor, 4);
|
||||
}
|
||||
);
|
||||
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
|
||||
ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
|
||||
OS.EmitIntValue(Major, 4);
|
||||
OS.EmitIntValue(Minor, 4);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
@ -407,21 +433,18 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
|
||||
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
|
||||
VendorNameSize + ArchNameSize;
|
||||
|
||||
EmitAMDGPUNote(
|
||||
MCConstantExpr::create(DescSZ, getContext()),
|
||||
ElfNote::NT_AMDGPU_HSA_ISA,
|
||||
[&](MCELFStreamer &OS) {
|
||||
OS.EmitIntValue(VendorNameSize, 2);
|
||||
OS.EmitIntValue(ArchNameSize, 2);
|
||||
OS.EmitIntValue(Major, 4);
|
||||
OS.EmitIntValue(Minor, 4);
|
||||
OS.EmitIntValue(Stepping, 4);
|
||||
OS.EmitBytes(VendorName);
|
||||
OS.EmitIntValue(0, 1); // NULL terminate VendorName
|
||||
OS.EmitBytes(ArchName);
|
||||
OS.EmitIntValue(0, 1); // NULL terminte ArchName
|
||||
}
|
||||
);
|
||||
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
|
||||
ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
|
||||
OS.EmitIntValue(VendorNameSize, 2);
|
||||
OS.EmitIntValue(ArchNameSize, 2);
|
||||
OS.EmitIntValue(Major, 4);
|
||||
OS.EmitIntValue(Minor, 4);
|
||||
OS.EmitIntValue(Stepping, 4);
|
||||
OS.EmitBytes(VendorName);
|
||||
OS.EmitIntValue(0, 1); // NULL terminate VendorName
|
||||
OS.EmitBytes(ArchName);
|
||||
OS.EmitIntValue(0, 1); // NULL terminte ArchName
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
@ -450,15 +473,41 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
|
||||
MCSymbolRefExpr::create(DescEnd, Context),
|
||||
MCSymbolRefExpr::create(DescBegin, Context), Context);
|
||||
|
||||
EmitAMDGPUNote(
|
||||
DescSZ,
|
||||
ELF::NT_AMD_AMDGPU_ISA,
|
||||
[&](MCELFStreamer &OS) {
|
||||
OS.EmitLabel(DescBegin);
|
||||
OS.EmitBytes(IsaVersionString);
|
||||
OS.EmitLabel(DescEnd);
|
||||
}
|
||||
);
|
||||
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
|
||||
[&](MCELFStreamer &OS) {
|
||||
OS.EmitLabel(DescBegin);
|
||||
OS.EmitBytes(IsaVersionString);
|
||||
OS.EmitLabel(DescEnd);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
|
||||
std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
|
||||
V3::MetadataVerifier Verifier(Strict);
|
||||
if (!Verifier.verify(*HSAMetadataRoot))
|
||||
return false;
|
||||
|
||||
std::string HSAMetadataString;
|
||||
raw_string_ostream StrOS(HSAMetadataString);
|
||||
msgpack::Writer MPWriter(StrOS);
|
||||
HSAMetadataRoot->write(MPWriter);
|
||||
|
||||
// Create two labels to mark the beginning and end of the desc field
|
||||
// and a MCExpr to calculate the size of the desc field.
|
||||
auto &Context = getContext();
|
||||
auto *DescBegin = Context.createTempSymbol();
|
||||
auto *DescEnd = Context.createTempSymbol();
|
||||
auto *DescSZ = MCBinaryExpr::createSub(
|
||||
MCSymbolRefExpr::create(DescEnd, Context),
|
||||
MCSymbolRefExpr::create(DescBegin, Context), Context);
|
||||
|
||||
EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
|
||||
[&](MCELFStreamer &OS) {
|
||||
OS.EmitLabel(DescBegin);
|
||||
OS.EmitBytes(StrOS.str());
|
||||
OS.EmitLabel(DescEnd);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -477,28 +526,24 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
|
||||
MCSymbolRefExpr::create(DescEnd, Context),
|
||||
MCSymbolRefExpr::create(DescBegin, Context), Context);
|
||||
|
||||
EmitAMDGPUNote(
|
||||
DescSZ,
|
||||
ELF::NT_AMD_AMDGPU_HSA_METADATA,
|
||||
[&](MCELFStreamer &OS) {
|
||||
OS.EmitLabel(DescBegin);
|
||||
OS.EmitBytes(HSAMetadataString);
|
||||
OS.EmitLabel(DescEnd);
|
||||
}
|
||||
);
|
||||
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
|
||||
[&](MCELFStreamer &OS) {
|
||||
OS.EmitLabel(DescBegin);
|
||||
OS.EmitBytes(HSAMetadataString);
|
||||
OS.EmitLabel(DescEnd);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetELFStreamer::EmitPALMetadata(
|
||||
const PALMD::Metadata &PALMetadata) {
|
||||
EmitAMDGPUNote(
|
||||
MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
|
||||
ELF::NT_AMD_AMDGPU_PAL_METADATA,
|
||||
[&](MCELFStreamer &OS){
|
||||
for (auto I : PALMetadata)
|
||||
OS.EmitIntValue(I, sizeof(uint32_t));
|
||||
}
|
||||
);
|
||||
EmitNote(ElfNote::NoteNameV2,
|
||||
MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t),
|
||||
getContext()),
|
||||
ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) {
|
||||
for (auto I : PALMetadata)
|
||||
OS.EmitIntValue(I, sizeof(uint32_t));
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
|
||||
|
||||
#include "AMDKernelCodeT.h"
|
||||
#include "llvm/BinaryFormat/MsgPackTypes.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
@ -52,7 +53,20 @@ public:
|
||||
virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
virtual bool EmitHSAMetadata(StringRef HSAMetadataString);
|
||||
virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString);
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString);
|
||||
|
||||
/// Emit HSA Metadata
|
||||
///
|
||||
/// When \p Strict is true, known metadata elements must already be
|
||||
/// well-typed. When \p Strict is false, known types are inferred and
|
||||
/// the \p HSAMetadata structure is updated with the correct types.
|
||||
///
|
||||
/// \returns True on success, false on failure.
|
||||
virtual bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
|
||||
bool Strict) = 0;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
|
||||
@ -91,6 +105,10 @@ public:
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitISAVersion(StringRef IsaVersionString) override;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
|
||||
bool Strict) override;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
|
||||
|
||||
@ -107,8 +125,8 @@ public:
|
||||
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
|
||||
MCStreamer &Streamer;
|
||||
|
||||
void EmitAMDGPUNote(const MCExpr *DescSize, unsigned NoteType,
|
||||
function_ref<void(MCELFStreamer &)> EmitDesc);
|
||||
void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType,
|
||||
function_ref<void(MCELFStreamer &)> EmitDesc);
|
||||
|
||||
public:
|
||||
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
|
||||
@ -131,6 +149,10 @@ public:
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitISAVersion(StringRef IsaVersionString) override;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
|
||||
bool Strict) override;
|
||||
|
||||
/// \returns True on success, false on failure.
|
||||
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
|
||||
|
||||
|
@ -19,5 +19,5 @@
|
||||
type = Library
|
||||
name = AMDGPUDesc
|
||||
parent = AMDGPU
|
||||
required_libraries = Core MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support
|
||||
required_libraries = Core MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support BinaryFormat
|
||||
add_to_library_groups = AMDGPU
|
||||
|
145
test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
Normal file
145
test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
Normal file
@ -0,0 +1,145 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 < %s | FileCheck --check-prefix=CHECK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-verify-hsa-metadata -filetype=obj -mattr=+code-object-v3 -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s
|
||||
|
||||
; CHECK-LABEL: {{^}}min_64_max_64:
|
||||
; CHECK: SGPRBlocks: 0
|
||||
; CHECK: VGPRBlocks: 0
|
||||
; CHECK: NumSGPRsForWavesPerEU: 1
|
||||
; CHECK: NumVGPRsForWavesPerEU: 1
|
||||
define amdgpu_kernel void @min_64_max_64() #0 {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
attributes #0 = {"amdgpu-flat-work-group-size"="64,64"}
|
||||
|
||||
; CHECK-LABEL: {{^}}min_64_max_128:
|
||||
; CHECK: SGPRBlocks: 0
|
||||
; CHECK: VGPRBlocks: 0
|
||||
; CHECK: NumSGPRsForWavesPerEU: 1
|
||||
; CHECK: NumVGPRsForWavesPerEU: 1
|
||||
define amdgpu_kernel void @min_64_max_128() #1 {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
attributes #1 = {"amdgpu-flat-work-group-size"="64,128"}
|
||||
|
||||
; CHECK-LABEL: {{^}}min_128_max_128:
|
||||
; CHECK: SGPRBlocks: 0
|
||||
; CHECK: VGPRBlocks: 0
|
||||
; CHECK: NumSGPRsForWavesPerEU: 1
|
||||
; CHECK: NumVGPRsForWavesPerEU: 1
|
||||
define amdgpu_kernel void @min_128_max_128() #2 {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
|
||||
|
||||
; CHECK-LABEL: {{^}}min_1024_max_2048
|
||||
; CHECK: SGPRBlocks: 1
|
||||
; CHECK: VGPRBlocks: 7
|
||||
; CHECK: NumSGPRsForWavesPerEU: 12
|
||||
; CHECK: NumVGPRsForWavesPerEU: 32
|
||||
@var = addrspace(1) global float 0.0
|
||||
define amdgpu_kernel void @min_1024_max_2048() #3 {
|
||||
%val0 = load volatile float, float addrspace(1)* @var
|
||||
%val1 = load volatile float, float addrspace(1)* @var
|
||||
%val2 = load volatile float, float addrspace(1)* @var
|
||||
%val3 = load volatile float, float addrspace(1)* @var
|
||||
%val4 = load volatile float, float addrspace(1)* @var
|
||||
%val5 = load volatile float, float addrspace(1)* @var
|
||||
%val6 = load volatile float, float addrspace(1)* @var
|
||||
%val7 = load volatile float, float addrspace(1)* @var
|
||||
%val8 = load volatile float, float addrspace(1)* @var
|
||||
%val9 = load volatile float, float addrspace(1)* @var
|
||||
%val10 = load volatile float, float addrspace(1)* @var
|
||||
%val11 = load volatile float, float addrspace(1)* @var
|
||||
%val12 = load volatile float, float addrspace(1)* @var
|
||||
%val13 = load volatile float, float addrspace(1)* @var
|
||||
%val14 = load volatile float, float addrspace(1)* @var
|
||||
%val15 = load volatile float, float addrspace(1)* @var
|
||||
%val16 = load volatile float, float addrspace(1)* @var
|
||||
%val17 = load volatile float, float addrspace(1)* @var
|
||||
%val18 = load volatile float, float addrspace(1)* @var
|
||||
%val19 = load volatile float, float addrspace(1)* @var
|
||||
%val20 = load volatile float, float addrspace(1)* @var
|
||||
%val21 = load volatile float, float addrspace(1)* @var
|
||||
%val22 = load volatile float, float addrspace(1)* @var
|
||||
%val23 = load volatile float, float addrspace(1)* @var
|
||||
%val24 = load volatile float, float addrspace(1)* @var
|
||||
%val25 = load volatile float, float addrspace(1)* @var
|
||||
%val26 = load volatile float, float addrspace(1)* @var
|
||||
%val27 = load volatile float, float addrspace(1)* @var
|
||||
%val28 = load volatile float, float addrspace(1)* @var
|
||||
%val29 = load volatile float, float addrspace(1)* @var
|
||||
%val30 = load volatile float, float addrspace(1)* @var
|
||||
%val31 = load volatile float, float addrspace(1)* @var
|
||||
%val32 = load volatile float, float addrspace(1)* @var
|
||||
%val33 = load volatile float, float addrspace(1)* @var
|
||||
%val34 = load volatile float, float addrspace(1)* @var
|
||||
%val35 = load volatile float, float addrspace(1)* @var
|
||||
%val36 = load volatile float, float addrspace(1)* @var
|
||||
%val37 = load volatile float, float addrspace(1)* @var
|
||||
%val38 = load volatile float, float addrspace(1)* @var
|
||||
%val39 = load volatile float, float addrspace(1)* @var
|
||||
%val40 = load volatile float, float addrspace(1)* @var
|
||||
|
||||
store volatile float %val0, float addrspace(1)* @var
|
||||
store volatile float %val1, float addrspace(1)* @var
|
||||
store volatile float %val2, float addrspace(1)* @var
|
||||
store volatile float %val3, float addrspace(1)* @var
|
||||
store volatile float %val4, float addrspace(1)* @var
|
||||
store volatile float %val5, float addrspace(1)* @var
|
||||
store volatile float %val6, float addrspace(1)* @var
|
||||
store volatile float %val7, float addrspace(1)* @var
|
||||
store volatile float %val8, float addrspace(1)* @var
|
||||
store volatile float %val9, float addrspace(1)* @var
|
||||
store volatile float %val10, float addrspace(1)* @var
|
||||
store volatile float %val11, float addrspace(1)* @var
|
||||
store volatile float %val12, float addrspace(1)* @var
|
||||
store volatile float %val13, float addrspace(1)* @var
|
||||
store volatile float %val14, float addrspace(1)* @var
|
||||
store volatile float %val15, float addrspace(1)* @var
|
||||
store volatile float %val16, float addrspace(1)* @var
|
||||
store volatile float %val17, float addrspace(1)* @var
|
||||
store volatile float %val18, float addrspace(1)* @var
|
||||
store volatile float %val19, float addrspace(1)* @var
|
||||
store volatile float %val20, float addrspace(1)* @var
|
||||
store volatile float %val21, float addrspace(1)* @var
|
||||
store volatile float %val22, float addrspace(1)* @var
|
||||
store volatile float %val23, float addrspace(1)* @var
|
||||
store volatile float %val24, float addrspace(1)* @var
|
||||
store volatile float %val25, float addrspace(1)* @var
|
||||
store volatile float %val26, float addrspace(1)* @var
|
||||
store volatile float %val27, float addrspace(1)* @var
|
||||
store volatile float %val28, float addrspace(1)* @var
|
||||
store volatile float %val29, float addrspace(1)* @var
|
||||
store volatile float %val30, float addrspace(1)* @var
|
||||
store volatile float %val31, float addrspace(1)* @var
|
||||
store volatile float %val32, float addrspace(1)* @var
|
||||
store volatile float %val33, float addrspace(1)* @var
|
||||
store volatile float %val34, float addrspace(1)* @var
|
||||
store volatile float %val35, float addrspace(1)* @var
|
||||
store volatile float %val36, float addrspace(1)* @var
|
||||
store volatile float %val37, float addrspace(1)* @var
|
||||
store volatile float %val38, float addrspace(1)* @var
|
||||
store volatile float %val39, float addrspace(1)* @var
|
||||
store volatile float %val40, float addrspace(1)* @var
|
||||
|
||||
ret void
|
||||
}
|
||||
attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
|
||||
|
||||
; CHECK: amdhsa.kernels:
|
||||
; CHECK: .max_flat_workgroup_size: 64
|
||||
; CHECK: .name: min_64_max_64
|
||||
; CHECK: .max_flat_workgroup_size: 128
|
||||
; CHECK: .name: min_64_max_128
|
||||
; CHECK: .max_flat_workgroup_size: 128
|
||||
; CHECK: .name: min_128_max_128
|
||||
; CHECK: .max_flat_workgroup_size: 2048
|
||||
; CHECK: .name: min_1024_max_2048
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK: - 1
|
||||
; CHECK: - 0
|
||||
|
||||
; PARSER: AMDGPU HSA Metadata Parser Test: PASS
|
@ -3,6 +3,8 @@
|
||||
|
||||
; ALL-ASM-LABEL: {{^}}fadd:
|
||||
|
||||
; OSABI-AMDHSA-ASM-NOT: .hsa_code_object_version
|
||||
; OSABI-AMDHSA-ASM-NOT: .hsa_code_object_isa
|
||||
; OSABI-AMDHSA-ASM-NOT: .amdgpu_hsa_kernel
|
||||
; OSABI-AMDHSA-ASM-NOT: .amd_kernel_code_t
|
||||
|
||||
@ -57,7 +59,8 @@
|
||||
; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000000 64 OBJECT GLOBAL DEFAULT {{[0-9]+}} fadd.kd
|
||||
; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000040 64 OBJECT GLOBAL DEFAULT {{[0-9]+}} fsub.kd
|
||||
|
||||
; OSABI-AMDHSA-ELF-NOT: Displaying notes found
|
||||
; OSABI-AMDHSA-ELF: Displaying notes found at file offset
|
||||
; OSABI-AMDHSA-ELF: AMDGPU 0x{{[0-9a-f]+}} NT_AMDGPU_METADATA (AMDGPU Metadata)
|
||||
|
||||
define amdgpu_kernel void @fadd(
|
||||
float addrspace(1)* %r,
|
||||
|
33
test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll
Normal file
33
test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll
Normal file
@ -0,0 +1,33 @@
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s
|
||||
|
||||
; CHECK: .symbol: test_ro_arg.kd
|
||||
; CHECK: .name: test_ro_arg
|
||||
; CHECK: .args:
|
||||
; CHECK-NEXT: - .type_name: 'float*'
|
||||
; CHECK-NEXT: .value_kind: global_buffer
|
||||
; CHECK-NEXT: .name: in
|
||||
; CHECK-NEXT: .access: read_only
|
||||
; CHECK-NEXT: .offset: 0
|
||||
; CHECK-NEXT: .is_const: true
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .is_restrict: true
|
||||
; CHECK-NEXT: .value_type: f32
|
||||
; CHECK-NEXT: .address_space: global
|
||||
; CHECK-NEXT: - .type_name: 'float*'
|
||||
; CHECK-NEXT: .value_kind: global_buffer
|
||||
; CHECK-NEXT: .name: out
|
||||
; CHECK-NEXT: .offset: 8
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: f32
|
||||
; CHECK-NEXT: .address_space: global
|
||||
|
||||
define amdgpu_kernel void @test_ro_arg(float addrspace(1)* noalias readonly %in, float addrspace(1)* %out)
|
||||
!kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2
|
||||
!kernel_arg_base_type !2 !kernel_arg_type_qual !3 {
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{i32 1, i32 1}
|
||||
!1 = !{!"none", !"none"}
|
||||
!2 = !{!"float*", !"float*"}
|
||||
!3 = !{!"const restrict", !""}
|
101
test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll
Normal file
101
test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll
Normal file
@ -0,0 +1,101 @@
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: amdhsa.kernels:
|
||||
; CHECK: .symbol: test_non_enqueue_kernel_caller.kd
|
||||
; CHECK: .name: test_non_enqueue_kernel_caller
|
||||
; CHECK: .language: OpenCL C
|
||||
; CHECK: .language_version:
|
||||
; CHECK-NEXT: - 2
|
||||
; CHECK-NEXT: - 0
|
||||
; CHECK: .args:
|
||||
; CHECK-NEXT: - .type_name: char
|
||||
; CHECK-NEXT: .value_kind: by_value
|
||||
; CHECK-NEXT: .offset: 0
|
||||
; CHECK-NEXT: .size: 1
|
||||
; CHECK-NEXT: .value_type: i8
|
||||
; CHECK-NEXT: .name: a
|
||||
; CHECK-NEXT: - .value_kind: hidden_global_offset_x
|
||||
; CHECK-NEXT: .offset: 8
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i64
|
||||
; CHECK-NEXT: - .value_kind: hidden_global_offset_y
|
||||
; CHECK-NEXT: .offset: 16
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i64
|
||||
; CHECK-NEXT: - .value_kind: hidden_global_offset_z
|
||||
; CHECK-NEXT: .offset: 24
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i64
|
||||
; CHECK-NOT: .value_kind: hidden_default_queue
|
||||
; CHECK-NOT: .value_kind: hidden_completion_action
|
||||
define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a)
|
||||
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
|
||||
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .symbol: test_enqueue_kernel_caller.kd
|
||||
; CHECK: .name: test_enqueue_kernel_caller
|
||||
; CHECK: .language: OpenCL C
|
||||
; CHECK: .language_version:
|
||||
; CHECK-NEXT: - 2
|
||||
; CHECK-NEXT: - 0
|
||||
; CHECK: .args:
|
||||
; CHECK-NEXT: - .type_name: char
|
||||
; CHECK-NEXT: .value_kind: by_value
|
||||
; CHECK-NEXT: .offset: 0
|
||||
; CHECK-NEXT: .size: 1
|
||||
; CHECK-NEXT: .value_type: i8
|
||||
; CHECK-NEXT: .name: a
|
||||
; CHECK-NEXT: - .value_kind: hidden_global_offset_x
|
||||
; CHECK-NEXT: .offset: 8
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i64
|
||||
; CHECK-NEXT: - .value_kind: hidden_global_offset_y
|
||||
; CHECK-NEXT: .offset: 16
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i64
|
||||
; CHECK-NEXT: - .value_kind: hidden_global_offset_z
|
||||
; CHECK-NEXT: .offset: 24
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i64
|
||||
; CHECK-NEXT: - .value_kind: hidden_none
|
||||
; CHECK-NEXT: .offset: 32
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i8
|
||||
; CHECK-NEXT: .address_space: global
|
||||
; CHECK-NEXT: - .value_kind: hidden_default_queue
|
||||
; CHECK-NEXT: .offset: 40
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i8
|
||||
; CHECK-NEXT: .address_space: global
|
||||
; CHECK-NEXT: - .value_kind: hidden_completion_action
|
||||
; CHECK-NEXT: .offset: 48
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i8
|
||||
; CHECK-NEXT: .address_space: global
|
||||
define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #0
|
||||
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
|
||||
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK-NEXT: - 1
|
||||
; CHECK-NEXT: - 0
|
||||
; CHECK-NOT: amdhsa.printf:
|
||||
|
||||
attributes #0 = { "calls-enqueue-kernel" }
|
||||
|
||||
!1 = !{i32 0}
|
||||
!2 = !{!"none"}
|
||||
!3 = !{!"char"}
|
||||
!4 = !{!""}
|
||||
|
||||
!opencl.ocl.version = !{!90}
|
||||
!90 = !{i32 2, i32 0}
|
||||
|
||||
|
||||
; PARSER: AMDGPU HSA Metadata Parser Test: PASS
|
1453
test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
Normal file
1453
test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
Normal file
File diff suppressed because it is too large
Load Diff
72
test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
Normal file
72
test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
Normal file
@ -0,0 +1,72 @@
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: amdhsa.kernels:
|
||||
; CHECK: .symbol: test.kd
|
||||
; CHECK: .name: test
|
||||
; CHECK: .args:
|
||||
; CHECK-NEXT: - .value_kind: global_buffer
|
||||
; CHECK-NEXT: .name: r
|
||||
; CHECK-NEXT: .offset: 0
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: f16
|
||||
; CHECK-NEXT: .address_space: global
|
||||
; CHECK-NEXT: - .value_kind: global_buffer
|
||||
; CHECK-NEXT: .name: a
|
||||
; CHECK-NEXT: .offset: 8
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: f16
|
||||
; CHECK-NEXT: .address_space: global
|
||||
; CHECK-NEXT: - .value_kind: global_buffer
|
||||
; CHECK-NEXT: .name: b
|
||||
; CHECK-NEXT: .offset: 16
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: f16
|
||||
; CHECK-NEXT: .address_space: global
|
||||
; CHECK-NEXT: - .value_kind: hidden_global_offset_x
|
||||
; CHECK-NEXT: .offset: 24
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i64
|
||||
; CHECK-NEXT: - .value_kind: hidden_global_offset_y
|
||||
; CHECK-NEXT: .offset: 32
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i64
|
||||
; CHECK-NEXT: - .value_kind: hidden_global_offset_z
|
||||
; CHECK-NEXT: .offset: 40
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i64
|
||||
; CHECK-NEXT: - .value_kind: hidden_none
|
||||
; CHECK-NEXT: .offset: 48
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i8
|
||||
; CHECK-NEXT: .address_space: global
|
||||
; CHECK-NEXT: - .value_kind: hidden_none
|
||||
; CHECK-NEXT: .offset: 56
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i8
|
||||
; CHECK-NEXT: .address_space: global
|
||||
; CHECK-NEXT: - .value_kind: hidden_none
|
||||
; CHECK-NEXT: .offset: 64
|
||||
; CHECK-NEXT: .size: 8
|
||||
; CHECK-NEXT: .value_type: i8
|
||||
; CHECK-NEXT: .address_space: global
|
||||
define amdgpu_kernel void @test(
|
||||
half addrspace(1)* %r,
|
||||
half addrspace(1)* %a,
|
||||
half addrspace(1)* %b) {
|
||||
entry:
|
||||
%a.val = load half, half addrspace(1)* %a
|
||||
%b.val = load half, half addrspace(1)* %b
|
||||
%r.val = fadd half %a.val, %b.val
|
||||
store half %r.val, half addrspace(1)* %r
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK-NEXT: - 1
|
||||
; CHECK-NEXT: - 0
|
||||
|
||||
!opencl.ocl.version = !{!0}
|
||||
!0 = !{i32 2, i32 0}
|
95
test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll
Normal file
95
test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll
Normal file
@ -0,0 +1,95 @@
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
|
||||
|
||||
%opencl.image1d_t = type opaque
|
||||
%opencl.image1d_array_t = type opaque
|
||||
%opencl.image1d_buffer_t = type opaque
|
||||
%opencl.image2d_t = type opaque
|
||||
%opencl.image2d_array_t = type opaque
|
||||
%opencl.image2d_array_depth_t = type opaque
|
||||
%opencl.image2d_array_msaa_t = type opaque
|
||||
%opencl.image2d_array_msaa_depth_t = type opaque
|
||||
%opencl.image2d_depth_t = type opaque
|
||||
%opencl.image2d_msaa_t = type opaque
|
||||
%opencl.image2d_msaa_depth_t = type opaque
|
||||
%opencl.image3d_t = type opaque
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: amdhsa.kernels:
|
||||
; CHECK: .symbol: test.kd
|
||||
; CHECK: .name: test
|
||||
; CHECK: .args:
|
||||
; CHECK: - .type_name: image1d_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: a
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image1d_array_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: b
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image1d_buffer_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: c
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image2d_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: d
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image2d_array_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: e
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image2d_array_depth_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: f
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image2d_array_msaa_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: g
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image2d_array_msaa_depth_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: h
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image2d_depth_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: i
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image2d_msaa_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: j
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image2d_msaa_depth_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: k
|
||||
; CHECK: .size: 8
|
||||
; CHECK: - .type_name: image3d_t
|
||||
; CHECK: .value_kind: image
|
||||
; CHECK: .name: l
|
||||
; CHECK: .size: 8
|
||||
define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a,
|
||||
%opencl.image1d_array_t addrspace(1)* %b,
|
||||
%opencl.image1d_buffer_t addrspace(1)* %c,
|
||||
%opencl.image2d_t addrspace(1)* %d,
|
||||
%opencl.image2d_array_t addrspace(1)* %e,
|
||||
%opencl.image2d_array_depth_t addrspace(1)* %f,
|
||||
%opencl.image2d_array_msaa_t addrspace(1)* %g,
|
||||
%opencl.image2d_array_msaa_depth_t addrspace(1)* %h,
|
||||
%opencl.image2d_depth_t addrspace(1)* %i,
|
||||
%opencl.image2d_msaa_t addrspace(1)* %j,
|
||||
%opencl.image2d_msaa_depth_t addrspace(1)* %k,
|
||||
%opencl.image3d_t addrspace(1)* %l)
|
||||
!kernel_arg_type !1 !kernel_arg_base_type !1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK-NEXT: - 1
|
||||
; CHECK-NEXT: - 0
|
||||
|
||||
!1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t",
|
||||
!"image2d_t", !"image2d_array_t", !"image2d_array_depth_t",
|
||||
!"image2d_array_msaa_t", !"image2d_array_msaa_depth_t",
|
||||
!"image2d_depth_t", !"image2d_msaa_t", !"image2d_msaa_depth_t",
|
||||
!"image3d_t"}
|
11
test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll
Normal file
11
test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll
Normal file
@ -0,0 +1,11 @@
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s
|
||||
|
||||
; Make sure llc does not crash for invalid opencl version metadata.
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK-NEXT: - 1
|
||||
; CHECK-NEXT: - 0
|
||||
; CHECK: ...
|
||||
|
||||
!opencl.ocl.version = !{}
|
12
test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll
Normal file
12
test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll
Normal file
@ -0,0 +1,12 @@
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s
|
||||
|
||||
; Make sure llc does not crash for invalid opencl version metadata.
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK-NEXT: - 1
|
||||
; CHECK-NEXT: - 0
|
||||
; CHECK: ...
|
||||
|
||||
!opencl.ocl.version = !{!0}
|
||||
!0 = !{}
|
12
test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll
Normal file
12
test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll
Normal file
@ -0,0 +1,12 @@
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s
|
||||
|
||||
; Make sure llc does not crash for invalid opencl version metadata.
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK-NEXT: - 1
|
||||
; CHECK-NEXT: - 0
|
||||
; CHECK: ...
|
||||
|
||||
!opencl.ocl.version = !{!0}
|
||||
!0 = !{i32 1}
|
146
test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
Normal file
146
test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
Normal file
@ -0,0 +1,146 @@
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
|
||||
|
||||
@var = addrspace(1) global float 0.0
|
||||
|
||||
; CHECK: ---
|
||||
; CHECK: amdhsa.kernels:
|
||||
|
||||
; CHECK: - .max_flat_workgroup_size: 256
|
||||
; CHECK: .kernarg_segment_size: 24
|
||||
; CHECK: .private_segment_fixed_size: 0
|
||||
; CHECK: .wavefront_size: 64
|
||||
; CHECK: .symbol: test.kd
|
||||
; CHECK: .name: test
|
||||
; CHECK: .sgpr_count: 8
|
||||
; CHECK: .kernarg_segment_align: 8
|
||||
; CHECK: .vgpr_count: 6
|
||||
; CHECK: .group_segment_fixed_size: 0
|
||||
define amdgpu_kernel void @test(
|
||||
half addrspace(1)* %r,
|
||||
half addrspace(1)* %a,
|
||||
half addrspace(1)* %b) {
|
||||
entry:
|
||||
%a.val = load half, half addrspace(1)* %a
|
||||
%b.val = load half, half addrspace(1)* %b
|
||||
%r.val = fadd half %a.val, %b.val
|
||||
store half %r.val, half addrspace(1)* %r
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .symbol: num_spilled_sgprs.kd
|
||||
; CHECK: .name: num_spilled_sgprs
|
||||
; GFX700: .sgpr_spill_count: 40
|
||||
; GFX803: .sgpr_spill_count: 24
|
||||
; GFX900: .sgpr_spill_count: 24
|
||||
define amdgpu_kernel void @num_spilled_sgprs(
|
||||
i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, [8 x i32],
|
||||
i32 addrspace(1)* %out2, i32 addrspace(1)* %out3, [8 x i32],
|
||||
i32 addrspace(1)* %out4, i32 addrspace(1)* %out5, [8 x i32],
|
||||
i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, [8 x i32],
|
||||
i32 addrspace(1)* %out8, i32 addrspace(1)* %out9, [8 x i32],
|
||||
i32 addrspace(1)* %outa, i32 addrspace(1)* %outb, [8 x i32],
|
||||
i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, [8 x i32],
|
||||
i32 addrspace(1)* %oute, i32 addrspace(1)* %outf, [8 x i32],
|
||||
i32 %in0, i32 %in1, i32 %in2, i32 %in3, [8 x i32],
|
||||
i32 %in4, i32 %in5, i32 %in6, i32 %in7, [8 x i32],
|
||||
i32 %in8, i32 %in9, i32 %ina, i32 %inb, [8 x i32],
|
||||
i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 {
|
||||
entry:
|
||||
store i32 %in0, i32 addrspace(1)* %out0
|
||||
store i32 %in1, i32 addrspace(1)* %out1
|
||||
store i32 %in2, i32 addrspace(1)* %out2
|
||||
store i32 %in3, i32 addrspace(1)* %out3
|
||||
store i32 %in4, i32 addrspace(1)* %out4
|
||||
store i32 %in5, i32 addrspace(1)* %out5
|
||||
store i32 %in6, i32 addrspace(1)* %out6
|
||||
store i32 %in7, i32 addrspace(1)* %out7
|
||||
store i32 %in8, i32 addrspace(1)* %out8
|
||||
store i32 %in9, i32 addrspace(1)* %out9
|
||||
store i32 %ina, i32 addrspace(1)* %outa
|
||||
store i32 %inb, i32 addrspace(1)* %outb
|
||||
store i32 %inc, i32 addrspace(1)* %outc
|
||||
store i32 %ind, i32 addrspace(1)* %outd
|
||||
store i32 %ine, i32 addrspace(1)* %oute
|
||||
store i32 %inf, i32 addrspace(1)* %outf
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .symbol: num_spilled_vgprs.kd
|
||||
; CHECK: .name: num_spilled_vgprs
|
||||
; CHECK: .vgpr_spill_count: 14
|
||||
define amdgpu_kernel void @num_spilled_vgprs() #1 {
|
||||
%val0 = load volatile float, float addrspace(1)* @var
|
||||
%val1 = load volatile float, float addrspace(1)* @var
|
||||
%val2 = load volatile float, float addrspace(1)* @var
|
||||
%val3 = load volatile float, float addrspace(1)* @var
|
||||
%val4 = load volatile float, float addrspace(1)* @var
|
||||
%val5 = load volatile float, float addrspace(1)* @var
|
||||
%val6 = load volatile float, float addrspace(1)* @var
|
||||
%val7 = load volatile float, float addrspace(1)* @var
|
||||
%val8 = load volatile float, float addrspace(1)* @var
|
||||
%val9 = load volatile float, float addrspace(1)* @var
|
||||
%val10 = load volatile float, float addrspace(1)* @var
|
||||
%val11 = load volatile float, float addrspace(1)* @var
|
||||
%val12 = load volatile float, float addrspace(1)* @var
|
||||
%val13 = load volatile float, float addrspace(1)* @var
|
||||
%val14 = load volatile float, float addrspace(1)* @var
|
||||
%val15 = load volatile float, float addrspace(1)* @var
|
||||
%val16 = load volatile float, float addrspace(1)* @var
|
||||
%val17 = load volatile float, float addrspace(1)* @var
|
||||
%val18 = load volatile float, float addrspace(1)* @var
|
||||
%val19 = load volatile float, float addrspace(1)* @var
|
||||
%val20 = load volatile float, float addrspace(1)* @var
|
||||
%val21 = load volatile float, float addrspace(1)* @var
|
||||
%val22 = load volatile float, float addrspace(1)* @var
|
||||
%val23 = load volatile float, float addrspace(1)* @var
|
||||
%val24 = load volatile float, float addrspace(1)* @var
|
||||
%val25 = load volatile float, float addrspace(1)* @var
|
||||
%val26 = load volatile float, float addrspace(1)* @var
|
||||
%val27 = load volatile float, float addrspace(1)* @var
|
||||
%val28 = load volatile float, float addrspace(1)* @var
|
||||
%val29 = load volatile float, float addrspace(1)* @var
|
||||
%val30 = load volatile float, float addrspace(1)* @var
|
||||
|
||||
store volatile float %val0, float addrspace(1)* @var
|
||||
store volatile float %val1, float addrspace(1)* @var
|
||||
store volatile float %val2, float addrspace(1)* @var
|
||||
store volatile float %val3, float addrspace(1)* @var
|
||||
store volatile float %val4, float addrspace(1)* @var
|
||||
store volatile float %val5, float addrspace(1)* @var
|
||||
store volatile float %val6, float addrspace(1)* @var
|
||||
store volatile float %val7, float addrspace(1)* @var
|
||||
store volatile float %val8, float addrspace(1)* @var
|
||||
store volatile float %val9, float addrspace(1)* @var
|
||||
store volatile float %val10, float addrspace(1)* @var
|
||||
store volatile float %val11, float addrspace(1)* @var
|
||||
store volatile float %val12, float addrspace(1)* @var
|
||||
store volatile float %val13, float addrspace(1)* @var
|
||||
store volatile float %val14, float addrspace(1)* @var
|
||||
store volatile float %val15, float addrspace(1)* @var
|
||||
store volatile float %val16, float addrspace(1)* @var
|
||||
store volatile float %val17, float addrspace(1)* @var
|
||||
store volatile float %val18, float addrspace(1)* @var
|
||||
store volatile float %val19, float addrspace(1)* @var
|
||||
store volatile float %val20, float addrspace(1)* @var
|
||||
store volatile float %val21, float addrspace(1)* @var
|
||||
store volatile float %val22, float addrspace(1)* @var
|
||||
store volatile float %val23, float addrspace(1)* @var
|
||||
store volatile float %val24, float addrspace(1)* @var
|
||||
store volatile float %val25, float addrspace(1)* @var
|
||||
store volatile float %val26, float addrspace(1)* @var
|
||||
store volatile float %val27, float addrspace(1)* @var
|
||||
store volatile float %val28, float addrspace(1)* @var
|
||||
store volatile float %val29, float addrspace(1)* @var
|
||||
store volatile float %val30, float addrspace(1)* @var
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: amdhsa.version:
|
||||
; CHECK-NEXT: - 1
|
||||
; CHECK-NEXT: - 0
|
||||
|
||||
attributes #0 = { "amdgpu-num-sgpr"="14" }
|
||||
attributes #1 = { "amdgpu-num-vgpr"="20" }
|
96
test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s
Normal file
96
test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s
Normal file
@ -0,0 +1,96 @@
|
||||
// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
|
||||
// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
|
||||
// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
|
||||
|
||||
// CHECK: .amdgpu_metadata
|
||||
// CHECK: amdhsa.kernels:
|
||||
// CHECK-NEXT: - .max_flat_workgroup_size: 256
|
||||
// CHECK-NEXT: .wavefront_size: 128
|
||||
// CHECK-NEXT: .symbol: 'test_kernel@kd'
|
||||
// CHECK-NEXT: .kernarg_segment_size: 8
|
||||
// CHECK-NEXT: .private_segment_fixed_size: 32
|
||||
// CHECK-NEXT: .name: test_kernel
|
||||
// CHECK-NEXT: .language: OpenCL C
|
||||
// CHECK-NEXT: .sgpr_count: 14
|
||||
// CHECK-NEXT: .kernarg_segment_align: 64
|
||||
// CHECK-NEXT: .vgpr_count: 40
|
||||
// CHECK-NEXT: .group_segment_fixed_size: 16
|
||||
// CHECK-NEXT: .language_version:
|
||||
// CHECK-NEXT: - 2
|
||||
// CHECK-NEXT: - 0
|
||||
// CHECK-NEXT: .args:
|
||||
// CHECK-NEXT: - .type_name: char
|
||||
// CHECK-NEXT: .value_kind: by_value
|
||||
// CHECK-NEXT: .offset: 1
|
||||
// CHECK-NEXT: .size: 1
|
||||
// CHECK-NEXT: .value_type: i8
|
||||
// CHECK-NEXT: - .value_kind: hidden_global_offset_x
|
||||
// CHECK-NEXT: .offset: 8
|
||||
// CHECK-NEXT: .size: 8
|
||||
// CHECK-NEXT: .value_type: i64
|
||||
// CHECK-NEXT: - .value_kind: hidden_global_offset_y
|
||||
// CHECK-NEXT: .offset: 8
|
||||
// CHECK-NEXT: .size: 8
|
||||
// CHECK-NEXT: .value_type: i64
|
||||
// CHECK-NEXT: - .value_kind: hidden_global_offset_z
|
||||
// CHECK-NEXT: .offset: 8
|
||||
// CHECK-NEXT: .size: 8
|
||||
// CHECK-NEXT: .value_type: i64
|
||||
// CHECK-NEXT: - .value_kind: hidden_printf_buffer
|
||||
// CHECK-NEXT: .offset: 8
|
||||
// CHECK-NEXT: .size: 8
|
||||
// CHECK-NEXT: .value_type: i8
|
||||
// CHECK-NEXT: .address_space: global
|
||||
// CHECK: amdhsa.version:
|
||||
// CHECK-NEXT: - 1
|
||||
// CHECK-NEXT: - 0
|
||||
// CHECK: amdhsa.printf:
|
||||
// CHECK-NEXT: - '1:1:4:%d\n'
|
||||
// CHECK-NEXt: - '2:1:8:%g\n'
|
||||
// CHECK: .end_amdgpu_metadata
|
||||
.amdgpu_metadata
|
||||
amdhsa.version:
|
||||
- 1
|
||||
- 0
|
||||
amdhsa.printf:
|
||||
- '1:1:4:%d\n'
|
||||
- '2:1:8:%g\n'
|
||||
amdhsa.kernels:
|
||||
- .name: test_kernel
|
||||
.symbol: test_kernel@kd
|
||||
.language: OpenCL C
|
||||
.language_version:
|
||||
- 2
|
||||
- 0
|
||||
.kernarg_segment_size: 8
|
||||
.group_segment_fixed_size: 16
|
||||
.private_segment_fixed_size: 32
|
||||
.kernarg_segment_align: 64
|
||||
.wavefront_size: 128
|
||||
.sgpr_count: 14
|
||||
.vgpr_count: 40
|
||||
.max_flat_workgroup_size: 256
|
||||
.args:
|
||||
- .type_name: char
|
||||
.size: 1
|
||||
.offset: 1
|
||||
.value_kind: by_value
|
||||
.value_type: i8
|
||||
- .size: 8
|
||||
.offset: 8
|
||||
.value_kind: hidden_global_offset_x
|
||||
.value_type: i64
|
||||
- .size: 8
|
||||
.offset: 8
|
||||
.value_kind: hidden_global_offset_y
|
||||
.value_type: i64
|
||||
- .size: 8
|
||||
.offset: 8
|
||||
.value_kind: hidden_global_offset_z
|
||||
.value_type: i64
|
||||
- .size: 8
|
||||
.offset: 8
|
||||
.value_kind: hidden_printf_buffer
|
||||
.value_type: i8
|
||||
.address_space: global
|
||||
.end_amdgpu_metadata
|
67
test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s
Normal file
67
test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s
Normal file
@ -0,0 +1,67 @@
|
||||
// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
|
||||
// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
|
||||
// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
|
||||
|
||||
// CHECK: .amdgpu_metadata
|
||||
// CHECK: amdhsa.kernels:
|
||||
// CHECK: - .max_flat_workgroup_size: 256
|
||||
// CHECK: .wavefront_size: 128
|
||||
// CHECK: .symbol: 'test_kernel@kd'
|
||||
// CHECK: .reqd_workgroup_size:
|
||||
// CHECK-NEXT: - 1
|
||||
// CHECK-NEXT: - 2
|
||||
// CHECK-NEXT: - 4
|
||||
// CHECK: .kernarg_segment_size: 8
|
||||
// CHECK: .private_segment_fixed_size: 32
|
||||
// CHECK: .workgroup_size_hint:
|
||||
// CHECK-NEXT: - 8
|
||||
// CHECK-NEXT: - 16
|
||||
// CHECK-NEXT: - 32
|
||||
// CHECK: .name: test_kernel
|
||||
// CHECK: .language: OpenCL C
|
||||
// CHECK: .sgpr_count: 14
|
||||
// CHECK: .kernarg_segment_align: 64
|
||||
// CHECK: .vgpr_count: 40
|
||||
// CHECK: .language_version:
|
||||
// CHECK-NEXT: - 2
|
||||
// CHECK-NEXT: - 0
|
||||
// CHECK: .vec_type_hint: int
|
||||
// CHECK: amdhsa.version:
|
||||
// CHECK-NEXT: - 1
|
||||
// CHECK-NEXT: - 0
|
||||
// CHECK: amdhsa.printf:
|
||||
// CHECK: - '1:1:4:%d\n'
|
||||
// CHECK: - '2:1:8:%g\n'
|
||||
// CHECK: .end_amdgpu_metadata
|
||||
.amdgpu_metadata
|
||||
amdhsa.version:
|
||||
- 1
|
||||
- 0
|
||||
amdhsa.printf:
|
||||
- '1:1:4:%d\n'
|
||||
- '2:1:8:%g\n'
|
||||
amdhsa.kernels:
|
||||
- .name: test_kernel
|
||||
.symbol: test_kernel@kd
|
||||
.language: OpenCL C
|
||||
.language_version:
|
||||
- 2
|
||||
- 0
|
||||
.kernarg_segment_size: 8
|
||||
.group_segment_fixed_size: 16
|
||||
.private_segment_fixed_size: 32
|
||||
.kernarg_segment_align: 64
|
||||
.wavefront_size: 128
|
||||
.sgpr_count: 14
|
||||
.vgpr_count: 40
|
||||
.max_flat_workgroup_size: 256
|
||||
.reqd_workgroup_size:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
.workgroup_size_hint:
|
||||
- 8
|
||||
- 16
|
||||
- 32
|
||||
.vec_type_hint: int
|
||||
.end_amdgpu_metadata
|
42
test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s
Normal file
42
test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s
Normal file
@ -0,0 +1,42 @@
|
||||
// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
|
||||
// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
|
||||
// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
|
||||
|
||||
// CHECK: .amdgpu_metadata
|
||||
// CHECK: amdhsa.kernels:
|
||||
// CHECK: - .sgpr_count: 40
|
||||
// CHECK: .max_flat_workgroup_size: 256
|
||||
// CHECK: .symbol: 'test_kernel@kd'
|
||||
// CHECK: .kernarg_segment_size: 24
|
||||
// CHECK: .group_segment_fixed_size: 24
|
||||
// CHECK: .private_segment_fixed_size: 16
|
||||
// CHECK: .vgpr_count: 14
|
||||
// CHECK: .vgpr_spill_count: 1
|
||||
// CHECK: .kernarg_segment_align: 16
|
||||
// CHECK: .sgpr_spill_count: 1
|
||||
// CHECK: .wavefront_size: 64
|
||||
// CHECK: .name: test_kernel
|
||||
// CHECK: amdhsa.version:
|
||||
// CHECK-NEXT: - 1
|
||||
// CHECK-NEXT: - 0
|
||||
.amdgpu_metadata
|
||||
amdhsa.version:
|
||||
- 1
|
||||
- 0
|
||||
amdhsa.printf:
|
||||
- '1:1:4:%d\n'
|
||||
- '2:1:8:%g\n'
|
||||
amdhsa.kernels:
|
||||
- .name: test_kernel
|
||||
.symbol: test_kernel@kd
|
||||
.kernarg_segment_size: 24
|
||||
.group_segment_fixed_size: 24
|
||||
.private_segment_fixed_size: 16
|
||||
.kernarg_segment_align: 16
|
||||
.wavefront_size: 64
|
||||
.max_flat_workgroup_size: 256
|
||||
.sgpr_count: 40
|
||||
.vgpr_count: 14
|
||||
.sgpr_spill_count: 1
|
||||
.vgpr_spill_count: 1
|
||||
.end_amdgpu_metadata
|
@ -213,3 +213,59 @@ v_mov_b32_e32 v16, s3
|
||||
// ASM: .byte 17
|
||||
.byte .amdgcn.next_free_sgpr
|
||||
// ASM: .byte 4
|
||||
|
||||
// Metadata
|
||||
|
||||
.amdgpu_metadata
|
||||
amdhsa.version:
|
||||
- 3
|
||||
- 0
|
||||
amdhsa.kernels:
|
||||
- .name: amd_kernel_code_t_test_all
|
||||
.symbol: amd_kernel_code_t_test_all@kd
|
||||
.kernarg_segment_size: 8
|
||||
.group_segment_fixed_size: 16
|
||||
.private_segment_fixed_size: 32
|
||||
.kernarg_segment_align: 64
|
||||
.wavefront_size: 128
|
||||
.sgpr_count: 14
|
||||
.vgpr_count: 40
|
||||
.max_flat_workgroup_size: 256
|
||||
- .name: amd_kernel_code_t_minimal
|
||||
.symbol: amd_kernel_code_t_minimal@kd
|
||||
.kernarg_segment_size: 8
|
||||
.group_segment_fixed_size: 16
|
||||
.private_segment_fixed_size: 32
|
||||
.kernarg_segment_align: 64
|
||||
.wavefront_size: 128
|
||||
.sgpr_count: 14
|
||||
.vgpr_count: 40
|
||||
.max_flat_workgroup_size: 256
|
||||
.end_amdgpu_metadata
|
||||
|
||||
// ASM: .amdgpu_metadata
|
||||
// ASM: amdhsa.kernels:
|
||||
// ASM: - .sgpr_count: 14
|
||||
// ASM: .max_flat_workgroup_size: 256
|
||||
// ASM: .symbol: 'amd_kernel_code_t_test_all@kd'
|
||||
// ASM: .kernarg_segment_size: 8
|
||||
// ASM: .group_segment_fixed_size: 16
|
||||
// ASM: .private_segment_fixed_size: 32
|
||||
// ASM: .vgpr_count: 40
|
||||
// ASM: .kernarg_segment_align: 64
|
||||
// ASM: .wavefront_size: 128
|
||||
// ASM: .name: amd_kernel_code_t_test_all
|
||||
// ASM: - .sgpr_count: 14
|
||||
// ASM: .max_flat_workgroup_size: 256
|
||||
// ASM: .symbol: 'amd_kernel_code_t_minimal@kd'
|
||||
// ASM: .kernarg_segment_size: 8
|
||||
// ASM: .group_segment_fixed_size: 16
|
||||
// ASM: .private_segment_fixed_size: 32
|
||||
// ASM: .vgpr_count: 40
|
||||
// ASM: .kernarg_segment_align: 64
|
||||
// ASM: .wavefront_size: 128
|
||||
// ASM: .name: amd_kernel_code_t_minimal
|
||||
// ASM: amdhsa.version:
|
||||
// ASM-NEXT: - 3
|
||||
// ASM-NEXT: - 0
|
||||
// ASM: .end_amdgpu_metadata
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
#include "llvm/Object/ELF.h"
|
||||
#include "llvm/Object/ELFObjectFile.h"
|
||||
@ -3628,7 +3629,7 @@ static std::string getFreeBSDNoteTypeName(const uint32_t NT) {
|
||||
return OS.str();
|
||||
}
|
||||
|
||||
static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
|
||||
static std::string getAMDNoteTypeName(const uint32_t NT) {
|
||||
static const struct {
|
||||
uint32_t ID;
|
||||
const char *Name;
|
||||
@ -3651,6 +3652,16 @@ static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
|
||||
return OS.str();
|
||||
}
|
||||
|
||||
static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
|
||||
if (NT == ELF::NT_AMDGPU_METADATA)
|
||||
return std::string("NT_AMDGPU_METADATA (AMDGPU Metadata)");
|
||||
|
||||
std::string string;
|
||||
raw_string_ostream OS(string);
|
||||
OS << format("Unknown note type (0x%08x)", NT);
|
||||
return OS.str();
|
||||
}
|
||||
|
||||
template <typename ELFT>
|
||||
static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
|
||||
ArrayRef<uint8_t> Data) {
|
||||
@ -3808,14 +3819,13 @@ static void printGNUNote(raw_ostream &OS, uint32_t NoteType,
|
||||
OS << '\n';
|
||||
}
|
||||
|
||||
struct AMDGPUNote {
|
||||
std::string type;
|
||||
std::string value;
|
||||
struct AMDNote {
|
||||
std::string Type;
|
||||
std::string Value;
|
||||
};
|
||||
|
||||
template <typename ELFT>
|
||||
static AMDGPUNote getAMDGPUNote(uint32_t NoteType,
|
||||
ArrayRef<uint8_t> Desc) {
|
||||
static AMDNote getAMDNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
|
||||
switch (NoteType) {
|
||||
default:
|
||||
return {"", ""};
|
||||
@ -3841,6 +3851,41 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType,
|
||||
}
|
||||
}
|
||||
|
||||
struct AMDGPUNote {
|
||||
std::string Type;
|
||||
std::string Value;
|
||||
};
|
||||
|
||||
template <typename ELFT>
|
||||
static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
|
||||
switch (NoteType) {
|
||||
default:
|
||||
return {"", ""};
|
||||
case ELF::NT_AMDGPU_METADATA:
|
||||
auto MsgPackString =
|
||||
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
|
||||
msgpack::Reader MsgPackReader(MsgPackString);
|
||||
auto OptMsgPackNodeOrErr = msgpack::Node::read(MsgPackReader);
|
||||
if (errorToBool(OptMsgPackNodeOrErr.takeError()))
|
||||
return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
|
||||
auto &OptMsgPackNode = *OptMsgPackNodeOrErr;
|
||||
if (!OptMsgPackNode)
|
||||
return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
|
||||
auto &MsgPackNode = *OptMsgPackNode;
|
||||
|
||||
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
|
||||
if (!Verifier.verify(*MsgPackNode))
|
||||
return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
|
||||
|
||||
std::string HSAMetadataString;
|
||||
raw_string_ostream StrOS(HSAMetadataString);
|
||||
yaml::Output YOut(StrOS);
|
||||
YOut << MsgPackNode;
|
||||
|
||||
return {"AMDGPU Metadata", StrOS.str()};
|
||||
}
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
|
||||
const Elf_Ehdr *e = Obj->getHeader();
|
||||
@ -3867,10 +3912,15 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
|
||||
} else if (Name == "FreeBSD") {
|
||||
OS << getFreeBSDNoteTypeName(Type) << '\n';
|
||||
} else if (Name == "AMD") {
|
||||
OS << getAMDNoteTypeName(Type) << '\n';
|
||||
const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
|
||||
if (!N.Type.empty())
|
||||
OS << " " << N.Type << ":\n " << N.Value << '\n';
|
||||
} else if (Name == "AMDGPU") {
|
||||
OS << getAMDGPUNoteTypeName(Type) << '\n';
|
||||
const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
|
||||
if (!N.type.empty())
|
||||
OS << " " << N.type << ":\n " << N.value << '\n';
|
||||
if (!N.Type.empty())
|
||||
OS << " " << N.Type << ":\n " << N.Value << '\n';
|
||||
} else {
|
||||
OS << "Unknown note type: (" << format_hex(Type, 10) << ')';
|
||||
}
|
||||
@ -4533,10 +4583,15 @@ void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
|
||||
} else if (Name == "FreeBSD") {
|
||||
W.printString("Type", getFreeBSDNoteTypeName(Type));
|
||||
} else if (Name == "AMD") {
|
||||
W.printString("Type", getAMDNoteTypeName(Type));
|
||||
const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
|
||||
if (!N.Type.empty())
|
||||
W.printString(N.Type, N.Value);
|
||||
} else if (Name == "AMDGPU") {
|
||||
W.printString("Type", getAMDGPUNoteTypeName(Type));
|
||||
const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
|
||||
if (!N.type.empty())
|
||||
W.printString(N.type, N.value);
|
||||
if (!N.Type.empty())
|
||||
W.printString(N.Type, N.Value);
|
||||
} else {
|
||||
W.getOStream() << "Unknown note type: (" << format_hex(Type, 10) << ')';
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user