1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[MC] Changes to help improve target specific symbol disassembly

Summary:
This commit slightly modifies the MCDisassembler, and llvm-objdump to
allow targets to also decode entire symbols.

WebAssembly uses the onSymbolStart hook it to decode preludes.
WebAssembly partially disassembles the symbol in its target specific
way; and then falls back to the normal flow of llvm-objdump.

AMDGPU needs it to decode kernel descriptors entirely, and move to the
next symbol.

This commit is to split the above task into 2.
- Changes to llvm-objdump and MC-layer without breaking WebAssembly code
  [ this commit ]
- AMDGPU's implementation of onSymbolStart that decodes kernel
  descriptors. [ https://reviews.llvm.org/D80713 ]

Reviewers: scott.linder, t-tye, sunfish, arsenm, jhenderson, MaskRay, aardappel

Reviewed By: scott.linder, jhenderson, aardappel

Subscribers: bcain, dschuff, wdng, tpr, sbc100, jgravelle-google, hiraditya, aheejin, MaskRay, rupprecht, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80512
This commit is contained in:
Ronak Chauhan 2020-06-12 14:00:33 -04:00 committed by Scott Linder
parent 3be3719b08
commit 94e53ef1f1
4 changed files with 69 additions and 21 deletions

View File

@ -127,8 +127,13 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const = 0;
/// May parse any prelude that precedes instructions after the start of a
/// symbol. Needed for some targets, e.g. WebAssembly.
/// Used to perform separate target specific disassembly for a particular
/// symbol. May parse any prelude that precedes instructions after the
/// start of a symbol, or the entire symbol.
/// This is used for example by WebAssembly to decode preludes.
///
/// Base implementation returns None. So all targets by default ignore to
/// treat symbols separately.
///
/// \param Name - The name of the symbol.
/// \param Size - The number of bytes consumed.
@ -136,11 +141,27 @@ public:
/// byte of the symbol.
/// \param Bytes - A reference to the actual bytes at the symbol location.
/// \param CStream - The stream to print comments and annotations on.
/// \return - MCDisassembler::Success if the bytes are valid,
/// MCDisassembler::Fail if the bytes were invalid.
virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
/// \return - MCDisassembler::Success if bytes are decoded
/// successfully. Size must hold the number of bytes that
/// were decoded.
/// - MCDisassembler::Fail if the bytes are invalid. Size
/// must hold the number of bytes that were decoded before
/// failing. The target must print nothing. This can be
/// done by buffering the output if needed.
/// - None if the target doesn't want to handle the symbol
/// separately. Value of Size is ignored in this case.
virtual Optional<DecodeStatus> onSymbolStart(StringRef Name, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &CStream) const;
// TODO:
// Implement similar hooks that can be used at other points during
// disassembly. Something along the following lines:
// - onBeforeInstructionDecode()
// - onAfterInstructionDecode()
// - onSymbolEnd()
// It should help move much of the target specific code from llvm-objdump to
// respective target disassemblers.
private:
MCContext &Ctx;

View File

@ -16,12 +16,11 @@ using namespace llvm;
MCDisassembler::~MCDisassembler() = default;
MCDisassembler::DecodeStatus
Optional<MCDisassembler::DecodeStatus>
MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const {
Size = 0;
return MCDisassembler::Success;
return None;
}
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,

View File

@ -46,8 +46,9 @@ class WebAssemblyDisassembler final : public MCDisassembler {
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
Optional<DecodeStatus> onSymbolStart(StringRef Name, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &CStream) const override;
public:
@ -120,7 +121,7 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
return true;
}
MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const {
Size = 0;
@ -128,21 +129,21 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
// Start of a code section: we're parsing only the function count.
int64_t FunctionCount;
if (!nextLEB(FunctionCount, Bytes, Size, false))
return MCDisassembler::Fail;
return None;
outs() << " # " << FunctionCount << " functions in section.";
} else {
// Parse the start of a single function.
int64_t BodySize, LocalEntryCount;
if (!nextLEB(BodySize, Bytes, Size, false) ||
!nextLEB(LocalEntryCount, Bytes, Size, false))
return MCDisassembler::Fail;
return None;
if (LocalEntryCount) {
outs() << " .local ";
for (int64_t I = 0; I < LocalEntryCount; I++) {
int64_t Count, Type;
if (!nextLEB(Count, Bytes, Size, false) ||
!nextLEB(Type, Bytes, Size, false))
return MCDisassembler::Fail;
return None;
for (int64_t J = 0; J < Count; J++) {
if (I || J)
outs() << ", ";

View File

@ -1429,10 +1429,37 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
continue;
}
// Some targets (like WebAssembly) have a special prelude at the start
// of each symbol.
DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start),
auto Status = DisAsm->onSymbolStart(SymbolName, Size,
Bytes.slice(Start, End - Start),
SectionAddr + Start, CommentStream);
// To have round trippable disassembly, we fall back to decoding the
// remaining bytes as instructions.
//
// If there is a failure, we disassemble the failed region as bytes before
// falling back. The target is expected to print nothing in this case.
//
// If there is Success or SoftFail i.e no 'real' failure, we go ahead by
// Size bytes before falling back.
// So if the entire symbol is 'eaten' by the target:
// Start += Size // Now Start = End and we will never decode as
// // instructions
//
// Right now, most targets return None i.e ignore to treat a symbol
// separately. But WebAssembly decodes preludes for some symbols.
//
if (Status.hasValue()) {
if (Status.getValue() == MCDisassembler::Fail) {
outs() << "// Error in decoding " << SymbolName
<< " : Decoding failed region as bytes.\n";
for (uint64_t I = 0; I < Size; ++I) {
outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
<< "\n";
}
}
} else {
Size = 0;
}
Start += Size;
Index = Start;