mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[MC] Changes to help improve target specific symbol disassembly
Summary: This commit slightly modifies the MCDisassembler, and llvm-objdump to allow targets to also decode entire symbols. WebAssembly uses the onSymbolStart hook it to decode preludes. WebAssembly partially disassembles the symbol in its target specific way; and then falls back to the normal flow of llvm-objdump. AMDGPU needs it to decode kernel descriptors entirely, and move to the next symbol. This commit is to split the above task into 2. - Changes to llvm-objdump and MC-layer without breaking WebAssembly code [ this commit ] - AMDGPU's implementation of onSymbolStart that decodes kernel descriptors. [ https://reviews.llvm.org/D80713 ] Reviewers: scott.linder, t-tye, sunfish, arsenm, jhenderson, MaskRay, aardappel Reviewed By: scott.linder, jhenderson, aardappel Subscribers: bcain, dschuff, wdng, tpr, sbc100, jgravelle-google, hiraditya, aheejin, MaskRay, rupprecht, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80512
This commit is contained in:
parent
3be3719b08
commit
94e53ef1f1
@ -127,8 +127,13 @@ public:
|
||||
ArrayRef<uint8_t> Bytes, uint64_t Address,
|
||||
raw_ostream &CStream) const = 0;
|
||||
|
||||
/// May parse any prelude that precedes instructions after the start of a
|
||||
/// symbol. Needed for some targets, e.g. WebAssembly.
|
||||
/// Used to perform separate target specific disassembly for a particular
|
||||
/// symbol. May parse any prelude that precedes instructions after the
|
||||
/// start of a symbol, or the entire symbol.
|
||||
/// This is used for example by WebAssembly to decode preludes.
|
||||
///
|
||||
/// Base implementation returns None. So all targets by default ignore to
|
||||
/// treat symbols separately.
|
||||
///
|
||||
/// \param Name - The name of the symbol.
|
||||
/// \param Size - The number of bytes consumed.
|
||||
@ -136,11 +141,27 @@ public:
|
||||
/// byte of the symbol.
|
||||
/// \param Bytes - A reference to the actual bytes at the symbol location.
|
||||
/// \param CStream - The stream to print comments and annotations on.
|
||||
/// \return - MCDisassembler::Success if the bytes are valid,
|
||||
/// MCDisassembler::Fail if the bytes were invalid.
|
||||
virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
|
||||
ArrayRef<uint8_t> Bytes, uint64_t Address,
|
||||
/// \return - MCDisassembler::Success if bytes are decoded
|
||||
/// successfully. Size must hold the number of bytes that
|
||||
/// were decoded.
|
||||
/// - MCDisassembler::Fail if the bytes are invalid. Size
|
||||
/// must hold the number of bytes that were decoded before
|
||||
/// failing. The target must print nothing. This can be
|
||||
/// done by buffering the output if needed.
|
||||
/// - None if the target doesn't want to handle the symbol
|
||||
/// separately. Value of Size is ignored in this case.
|
||||
virtual Optional<DecodeStatus> onSymbolStart(StringRef Name, uint64_t &Size,
|
||||
ArrayRef<uint8_t> Bytes,
|
||||
uint64_t Address,
|
||||
raw_ostream &CStream) const;
|
||||
// TODO:
|
||||
// Implement similar hooks that can be used at other points during
|
||||
// disassembly. Something along the following lines:
|
||||
// - onBeforeInstructionDecode()
|
||||
// - onAfterInstructionDecode()
|
||||
// - onSymbolEnd()
|
||||
// It should help move much of the target specific code from llvm-objdump to
|
||||
// respective target disassemblers.
|
||||
|
||||
private:
|
||||
MCContext &Ctx;
|
||||
|
@ -16,12 +16,11 @@ using namespace llvm;
|
||||
|
||||
MCDisassembler::~MCDisassembler() = default;
|
||||
|
||||
MCDisassembler::DecodeStatus
|
||||
Optional<MCDisassembler::DecodeStatus>
|
||||
MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size,
|
||||
ArrayRef<uint8_t> Bytes, uint64_t Address,
|
||||
raw_ostream &CStream) const {
|
||||
Size = 0;
|
||||
return MCDisassembler::Success;
|
||||
return None;
|
||||
}
|
||||
|
||||
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
|
||||
|
@ -46,8 +46,9 @@ class WebAssemblyDisassembler final : public MCDisassembler {
|
||||
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
|
||||
ArrayRef<uint8_t> Bytes, uint64_t Address,
|
||||
raw_ostream &CStream) const override;
|
||||
DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
|
||||
ArrayRef<uint8_t> Bytes, uint64_t Address,
|
||||
Optional<DecodeStatus> onSymbolStart(StringRef Name, uint64_t &Size,
|
||||
ArrayRef<uint8_t> Bytes,
|
||||
uint64_t Address,
|
||||
raw_ostream &CStream) const override;
|
||||
|
||||
public:
|
||||
@ -120,7 +121,7 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
|
||||
return true;
|
||||
}
|
||||
|
||||
MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
|
||||
Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
|
||||
StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
|
||||
raw_ostream &CStream) const {
|
||||
Size = 0;
|
||||
@ -128,21 +129,21 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
|
||||
// Start of a code section: we're parsing only the function count.
|
||||
int64_t FunctionCount;
|
||||
if (!nextLEB(FunctionCount, Bytes, Size, false))
|
||||
return MCDisassembler::Fail;
|
||||
return None;
|
||||
outs() << " # " << FunctionCount << " functions in section.";
|
||||
} else {
|
||||
// Parse the start of a single function.
|
||||
int64_t BodySize, LocalEntryCount;
|
||||
if (!nextLEB(BodySize, Bytes, Size, false) ||
|
||||
!nextLEB(LocalEntryCount, Bytes, Size, false))
|
||||
return MCDisassembler::Fail;
|
||||
return None;
|
||||
if (LocalEntryCount) {
|
||||
outs() << " .local ";
|
||||
for (int64_t I = 0; I < LocalEntryCount; I++) {
|
||||
int64_t Count, Type;
|
||||
if (!nextLEB(Count, Bytes, Size, false) ||
|
||||
!nextLEB(Type, Bytes, Size, false))
|
||||
return MCDisassembler::Fail;
|
||||
return None;
|
||||
for (int64_t J = 0; J < Count; J++) {
|
||||
if (I || J)
|
||||
outs() << ", ";
|
||||
|
@ -1429,10 +1429,37 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
|
||||
continue;
|
||||
}
|
||||
|
||||
// Some targets (like WebAssembly) have a special prelude at the start
|
||||
// of each symbol.
|
||||
DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start),
|
||||
auto Status = DisAsm->onSymbolStart(SymbolName, Size,
|
||||
Bytes.slice(Start, End - Start),
|
||||
SectionAddr + Start, CommentStream);
|
||||
// To have round trippable disassembly, we fall back to decoding the
|
||||
// remaining bytes as instructions.
|
||||
//
|
||||
// If there is a failure, we disassemble the failed region as bytes before
|
||||
// falling back. The target is expected to print nothing in this case.
|
||||
//
|
||||
// If there is Success or SoftFail i.e no 'real' failure, we go ahead by
|
||||
// Size bytes before falling back.
|
||||
// So if the entire symbol is 'eaten' by the target:
|
||||
// Start += Size // Now Start = End and we will never decode as
|
||||
// // instructions
|
||||
//
|
||||
// Right now, most targets return None i.e ignore to treat a symbol
|
||||
// separately. But WebAssembly decodes preludes for some symbols.
|
||||
//
|
||||
if (Status.hasValue()) {
|
||||
if (Status.getValue() == MCDisassembler::Fail) {
|
||||
outs() << "// Error in decoding " << SymbolName
|
||||
<< " : Decoding failed region as bytes.\n";
|
||||
for (uint64_t I = 0; I < Size; ++I) {
|
||||
outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
|
||||
<< "\n";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Size = 0;
|
||||
}
|
||||
|
||||
Start += Size;
|
||||
|
||||
Index = Start;
|
||||
|
Loading…
x
Reference in New Issue
Block a user