From 023fdbe1b6dd546144d1caca8af45384bd5434a5 Mon Sep 17 00:00:00 2001 From: Sean Callanan Date: Thu, 17 Dec 2009 01:49:59 +0000 Subject: [PATCH] Test harness for the LLVM disassembler. When invoked with -disassemble, llvm-mc now accepts lines of the form 0x00 0x00 and passes the resulting bytes to the disassembler for the chosen (or default) target, printing the result. llvm-svn: 91579 --- tools/llvm-mc/HexDisassembler.cpp | 158 ++++++++++++++++++++++++++++++ tools/llvm-mc/HexDisassembler.h | 34 +++++++ tools/llvm-mc/llvm-mc.cpp | 37 ++++++- 3 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 tools/llvm-mc/HexDisassembler.cpp create mode 100644 tools/llvm-mc/HexDisassembler.h diff --git a/tools/llvm-mc/HexDisassembler.cpp b/tools/llvm-mc/HexDisassembler.cpp new file mode 100644 index 00000000000..88464a5a5a4 --- /dev/null +++ b/tools/llvm-mc/HexDisassembler.cpp @@ -0,0 +1,158 @@ +//===- HexDisassembler.cpp - Disassembler for hex strings -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the disassembler of strings of bytes written in +// hexadecimal, from standard input or from a file. +// +//===----------------------------------------------------------------------===// + +#include "HexDisassembler.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/raw_ostream.h" + +#include + +using namespace llvm; + +class VectorMemoryObject : public MemoryObject { +private: + const std::vector &Bytes; +public: + VectorMemoryObject(const std::vector &bytes) : + Bytes(bytes) { + } + + uint64_t getBase() const { + return 0; + } + + uint64_t getExtent() const { + return Bytes.size(); + } + + int readByte(uint64_t addr, uint8_t *byte) const { + if (addr > getExtent()) + return -1; + else + *byte = Bytes[addr]; + + return 0; + } +}; + +void printInst(const llvm::MCDisassembler &disassembler, + llvm::MCInstPrinter &instPrinter, + const std::vector &bytes) { + // Wrap the vector in a MemoryObject. + + VectorMemoryObject memoryObject(bytes); + + // Disassemble it. + + MCInst inst; + uint64_t size; + + std::string verboseOStr; + llvm::raw_string_ostream verboseOS(verboseOStr); + + if (disassembler.getInstruction(inst, + size, + memoryObject, + 0, + verboseOS)) { + instPrinter.printInst(&inst); + outs() << "\n"; + } + else { + errs() << "error: invalid instruction" << "\n"; + errs() << "Diagnostic log:" << "\n"; + errs() << verboseOStr.c_str() << "\n"; + } +} + +int HexDisassembler::disassemble(const Target &target, + const std::string &tripleString, + MemoryBuffer &buffer) { + // Set up disassembler + + llvm::OwningPtr asmInfo + (target.createAsmInfo(tripleString)); + + if (!asmInfo) { + errs() << "error: no assembly info for target " << tripleString << "\n"; + return -1; + } + + llvm::OwningPtr disassembler + (target.createMCDisassembler()); + + if (!disassembler) { + errs() << "error: no disassembler for target " << tripleString << "\n"; + return -1; + } + + llvm::MCInstPrinter *instPrinter = target.createMCInstPrinter(0, + *asmInfo, + outs()); + + if (!instPrinter) { + errs() << "error: no instruction printer for target " << tripleString + << "\n"; + return -1; + } + + // Convert the input to a vector for disassembly. + + std::vector bytes; + + StringRef str = buffer.getBuffer(); + + while (!str.empty()) { + if(str.find_first_of("\n") < str.find_first_not_of(" \t\n\r")) { + printInst(*disassembler, *instPrinter, bytes); + + bytes.clear(); + } + + // Skip leading space. + str = str.substr(str.find_first_not_of(" \t\n\r")); + + // Get the current token. + size_t next = str.find_first_of(" \t\n\r"); + + if(next == (size_t)StringRef::npos) + break; + + StringRef value = str.slice(0, next); + + // Convert to a byte and add to the byte vector. + unsigned byte; + if (value.getAsInteger(0, byte) || byte > 255) { + errs() << "warning: invalid input token '" << value << "' of length " + << next << "\n"; + } + else { + bytes.push_back((unsigned char)byte); + } + str = str.substr(next); + } + + if (!bytes.empty()) + printInst(*disassembler, *instPrinter, bytes); + + return 0; +} diff --git a/tools/llvm-mc/HexDisassembler.h b/tools/llvm-mc/HexDisassembler.h new file mode 100644 index 00000000000..d197aea9bc8 --- /dev/null +++ b/tools/llvm-mc/HexDisassembler.h @@ -0,0 +1,34 @@ +//===- HexDisassembler.h - Disassembler for hex strings -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the disassembler of strings of bytes written in +// hexadecimal, from standard input or from a file. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXDISASSEMBLER_H +#define HEXDISASSEMBLER_H + +#include + +namespace llvm { + +class Target; +class MemoryBuffer; + +class HexDisassembler { +public: + static int disassemble(const Target &target, + const std::string &tripleString, + MemoryBuffer &buffer); +}; + +} // namespace llvm + +#endif diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index 76552b850ec..75cb2b83508 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetMachine.h" // FIXME. #include "llvm/Target/TargetSelect.h" #include "AsmParser.h" +#include "HexDisassembler.h" using namespace llvm; static cl::opt @@ -76,7 +77,8 @@ TripleName("triple", cl::desc("Target triple to assemble for, " enum ActionType { AC_AsLex, - AC_Assemble + AC_Assemble, + AC_Disassemble }; static cl::opt @@ -86,6 +88,8 @@ Action(cl::desc("Action to perform:"), "Lex tokens from a .s file"), clEnumValN(AC_Assemble, "assemble", "Assemble a .s file (default)"), + clEnumValN(AC_Disassemble, "disassemble", + "Disassemble strings of hex bytes"), clEnumValEnd)); static const Target *GetTarget(const char *ProgName) { @@ -281,7 +285,33 @@ static int AssembleInput(const char *ProgName) { delete Out; return Res; -} +} + +static int DisassembleInput(const char *ProgName) { + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + if (TheTarget == 0) { + errs() << ProgName << ": error: unable to get target for '" << TripleName + << "', see --version and --triple.\n"; + return 0; + } + + std::string ErrorMessage; + + MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, + &ErrorMessage); + + if (Buffer == 0) { + errs() << ProgName << ": "; + if (ErrorMessage.size()) + errs() << ErrorMessage << "\n"; + else + errs() << "input file didn't read correctly.\n"; + return 1; + } + + return HexDisassembler::disassemble(*TheTarget, TripleName, *Buffer); +} int main(int argc, char **argv) { @@ -296,6 +326,7 @@ int main(int argc, char **argv) { llvm::InitializeAllTargets(); llvm::InitializeAllAsmPrinters(); llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); @@ -305,6 +336,8 @@ int main(int argc, char **argv) { return AsLexInput(argv[0]); case AC_Assemble: return AssembleInput(argv[0]); + case AC_Disassemble: + return DisassembleInput(argv[0]); } return 0;