1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[X86][Disassembler] Simplify and optimize reader functions

llvm-objdump -d on clang is decreased from 8.2s to 7.8s.
This commit is contained in:
Fangrui Song 2020-01-10 23:43:44 -08:00
parent dccc4683f8
commit 5e7f810e7d
3 changed files with 102 additions and 181 deletions

View File

@ -169,29 +169,12 @@ X86GenericDisassembler::X86GenericDisassembler(
llvm_unreachable("Invalid CPU mode");
}
namespace {
struct Region {
ArrayRef<uint8_t> Bytes;
uint64_t Base;
Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {}
};
} // end anonymous namespace
/// A callback function that wraps the readByte method from Region.
///
/// @param Arg - The generic callback parameter. In this case, this should
/// be a pointer to a Region.
/// @param Byte - A pointer to the byte to be read.
/// @param Address - The address to be read.
static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) {
auto *R = static_cast<const Region *>(Arg);
ArrayRef<uint8_t> Bytes = R->Bytes;
unsigned Index = Address - R->Base;
if (Bytes.size() <= Index)
return -1;
*Byte = Bytes[Index];
return 0;
}
/// logger - a callback function that wraps the operator<< method from
/// raw_ostream.
@ -222,10 +205,9 @@ MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
if (&VStream == &nulls())
LoggerFn = nullptr; // Disable logging completely if it's going to nulls().
Region R(Bytes, Address);
std::pair<ArrayRef<uint8_t>, uint64_t> R(Bytes, Address);
int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R,
LoggerFn, (void *)&VStream,
int Ret = decodeInstruction(&InternalInstr, &R, LoggerFn, (void *)&VStream,
(const void *)MII.get(), Address, fMode);
if (Ret) {

View File

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "X86DisassemblerDecoder.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include <cstdarg> /* for va_*() */
@ -190,74 +191,34 @@ static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
return &INSTRUCTIONS_SYM[uid];
}
/*
* consumeByte - Uses the reader function provided by the user to consume one
* byte from the instruction's memory and advance the cursor.
*
* @param insn - The instruction with the reader function to use. The cursor
* for this instruction is advanced.
* @param byte - A pointer to a pre-allocated memory buffer to be populated
* with the data read.
* @return - 0 if the read was successful; nonzero otherwise.
*/
static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
if (!ret)
++(insn->readerCursor);
return ret;
}
/*
* lookAtByte - Like consumeByte, but does not advance the cursor.
*
* @param insn - See consumeByte().
* @param byte - See consumeByte().
* @return - See consumeByte().
*/
static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
return insn->reader(insn->readerArg, byte, insn->readerCursor);
static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
auto *r = static_cast<const std::pair<llvm::ArrayRef<uint8_t>, uint64_t> *>(
insn->readerArg);
uint64_t offset = insn->readerCursor - r->second;
if (offset >= r->first.size())
return true;
byte = r->first[offset];
return false;
}
static void unconsumeByte(struct InternalInstruction* insn) {
insn->readerCursor--;
}
#define CONSUME_FUNC(name, type) \
static int name(struct InternalInstruction* insn, type* ptr) { \
type combined = 0; \
unsigned offset; \
for (offset = 0; offset < sizeof(type); ++offset) { \
uint8_t byte; \
int ret = insn->reader(insn->readerArg, \
&byte, \
insn->readerCursor + offset); \
if (ret) \
return ret; \
combined = combined | ((uint64_t)byte << (offset * 8)); \
} \
*ptr = combined; \
insn->readerCursor += sizeof(type); \
return 0; \
}
/*
* consume* - Use the reader function provided by the user to consume data
* values of various sizes from the instruction's memory and advance the
* cursor appropriately. These readers perform endian conversion.
*
* @param insn - See consumeByte().
* @param ptr - A pointer to a pre-allocated memory of appropriate size to
* be populated with the data read.
* @return - See consumeByte().
*/
CONSUME_FUNC(consumeInt8, int8_t)
CONSUME_FUNC(consumeInt16, int16_t)
CONSUME_FUNC(consumeInt32, int32_t)
CONSUME_FUNC(consumeUInt16, uint16_t)
CONSUME_FUNC(consumeUInt32, uint32_t)
CONSUME_FUNC(consumeUInt64, uint64_t)
template <typename T>
static bool consume(InternalInstruction *insn, T &ptr) {
auto *r = static_cast<const std::pair<llvm::ArrayRef<uint8_t>, uint64_t> *>(
insn->readerArg);
uint64_t offset = insn->readerCursor - r->second;
if (offset + sizeof(T) > r->first.size())
return true;
T ret = 0;
for (unsigned i = 0; i < sizeof(T); ++i)
ret |= (uint64_t)r->first[offset + i] << (i * 8);
ptr = ret;
insn->readerCursor += sizeof(T);
return false;
}
/*
* dbgprintf - Uses the logging function provided by the user to log a single
@ -307,7 +268,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
while (isPrefix) {
/* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
if (consumeByte(insn, &byte))
if (consume(insn, byte))
break;
/*
@ -317,7 +278,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
break;
if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) {
if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
/*
* If the byte is 0xf2 or 0xf3, and any of the following conditions are
* met:
@ -345,10 +306,10 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (isREX(insn, nextByte)) {
uint8_t nnextByte;
// Go to REX prefix after the current one
if (consumeByte(insn, &nnextByte))
if (consume(insn, nnextByte))
return -1;
// We should be able to read next byte after REX prefix
if (lookAtByte(insn, &nnextByte))
if (peek(insn, nnextByte))
return -1;
unconsumeByte(insn);
}
@ -361,7 +322,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
case 0xf2: /* REPNE/REPNZ */
case 0xf3: /* REP or REPE/REPZ */ {
uint8_t nextByte;
if (lookAtByte(insn, &nextByte))
if (peek(insn, nextByte))
break;
// TODO:
// 1. There could be several 0x66
@ -376,39 +337,27 @@ static int readPrefixes(struct InternalInstruction* insn) {
break;
}
case 0x2e: /* CS segment override -OR- Branch not taken */
case 0x36: /* SS segment override -OR- Branch taken */
case 0x3e: /* DS segment override */
case 0x26: /* ES segment override */
case 0x64: /* FS segment override */
case 0x65: /* GS segment override */
switch (byte) {
case 0x2e:
insn->segmentOverride = SEG_OVERRIDE_CS;
break;
case 0x36:
insn->segmentOverride = SEG_OVERRIDE_SS;
break;
case 0x3e:
insn->segmentOverride = SEG_OVERRIDE_DS;
break;
case 0x26:
insn->segmentOverride = SEG_OVERRIDE_ES;
break;
case 0x64:
insn->segmentOverride = SEG_OVERRIDE_FS;
break;
case 0x65:
insn->segmentOverride = SEG_OVERRIDE_GS;
break;
default:
debug("Unhandled override");
return -1;
}
insn->segmentOverride = SEG_OVERRIDE_CS;
break;
case 0x66: /* Operand-size override */ {
case 0x36: /* SS segment override -OR- Branch taken */
insn->segmentOverride = SEG_OVERRIDE_SS;
break;
case 0x3e: /* DS segment override */
insn->segmentOverride = SEG_OVERRIDE_DS;
break;
case 0x26: /* ES segment override */
insn->segmentOverride = SEG_OVERRIDE_ES;
break;
case 0x64: /* FS segment override */
insn->segmentOverride = SEG_OVERRIDE_FS;
break;
case 0x65: /* GS segment override */
insn->segmentOverride = SEG_OVERRIDE_GS;
break;
case 0x66: /* Operand-size override */ {
uint8_t nextByte;
insn->hasOpSize = true;
if (lookAtByte(insn, &nextByte))
if (peek(insn, nextByte))
break;
// 0x66 can't overwrite existing mandatory prefix and should be ignored
if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
@ -431,13 +380,12 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (byte == 0x62) {
uint8_t byte1, byte2;
if (consumeByte(insn, &byte1)) {
if (consume(insn, byte1)) {
dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
return -1;
}
if (lookAtByte(insn, &byte2)) {
if (peek(insn, byte2)) {
dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
return -1;
}
@ -453,11 +401,11 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->vectorExtensionType == TYPE_EVEX) {
insn->vectorExtensionPrefix[0] = byte;
insn->vectorExtensionPrefix[1] = byte1;
if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
if (consume(insn, insn->vectorExtensionPrefix[2])) {
dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
return -1;
}
if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
if (consume(insn, insn->vectorExtensionPrefix[3])) {
dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
return -1;
}
@ -477,8 +425,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
} else if (byte == 0xc4) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
if (peek(insn, byte1)) {
dbgprintf(insn, "Couldn't read second byte of VEX");
return -1;
}
@ -490,8 +437,8 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->vectorExtensionType == TYPE_VEX_3B) {
insn->vectorExtensionPrefix[0] = byte;
consumeByte(insn, &insn->vectorExtensionPrefix[1]);
consumeByte(insn, &insn->vectorExtensionPrefix[2]);
consume(insn, insn->vectorExtensionPrefix[1]);
consume(insn, insn->vectorExtensionPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
@ -508,8 +455,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
} else if (byte == 0xc5) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
if (peek(insn, byte1)) {
dbgprintf(insn, "Couldn't read second byte of VEX");
return -1;
}
@ -521,7 +467,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->vectorExtensionType == TYPE_VEX_2B) {
insn->vectorExtensionPrefix[0] = byte;
consumeByte(insn, &insn->vectorExtensionPrefix[1]);
consume(insn, insn->vectorExtensionPrefix[1]);
if (insn->mode == MODE_64BIT)
insn->rexPrefix = 0x40
@ -541,8 +487,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
} else if (byte == 0x8f) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
if (peek(insn, byte1)) {
dbgprintf(insn, "Couldn't read second byte of XOP");
return -1;
}
@ -554,8 +499,8 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->vectorExtensionType == TYPE_XOP) {
insn->vectorExtensionPrefix[0] = byte;
consumeByte(insn, &insn->vectorExtensionPrefix[1]);
consumeByte(insn, &insn->vectorExtensionPrefix[2]);
consume(insn, insn->vectorExtensionPrefix[1]);
consume(insn, insn->vectorExtensionPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
@ -579,7 +524,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vectorExtensionPrefix[2]);
}
} else if (isREX(insn, byte)) {
if (lookAtByte(insn, &nextByte))
if (peek(insn, nextByte))
return -1;
insn->rexPrefix = byte;
dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
@ -618,11 +563,8 @@ static int readModRM(struct InternalInstruction* insn);
/*
* readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
* extended or escape opcodes).
*
* @param insn - The instruction whose opcode is to be read.
* @return - 0 if the opcode could be read successfully; nonzero otherwise.
*/
static int readOpcode(struct InternalInstruction* insn) {
static bool readOpcode(struct InternalInstruction* insn) {
/* Determine the length of the primary opcode */
uint8_t current;
@ -636,74 +578,74 @@ static int readOpcode(struct InternalInstruction* insn) {
default:
dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
return -1;
return true;
case VEX_LOB_0F:
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
return consume(insn, insn->opcode);
case VEX_LOB_0F38:
insn->opcodeType = THREEBYTE_38;
return consumeByte(insn, &insn->opcode);
return consume(insn, insn->opcode);
case VEX_LOB_0F3A:
insn->opcodeType = THREEBYTE_3A;
return consumeByte(insn, &insn->opcode);
return consume(insn, insn->opcode);
}
} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
return -1;
return true;
case VEX_LOB_0F:
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
return consume(insn, insn->opcode);
case VEX_LOB_0F38:
insn->opcodeType = THREEBYTE_38;
return consumeByte(insn, &insn->opcode);
return consume(insn, insn->opcode);
case VEX_LOB_0F3A:
insn->opcodeType = THREEBYTE_3A;
return consumeByte(insn, &insn->opcode);
return consume(insn, insn->opcode);
}
} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
return consume(insn, insn->opcode);
} else if (insn->vectorExtensionType == TYPE_XOP) {
switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
return -1;
return true;
case XOP_MAP_SELECT_8:
insn->opcodeType = XOP8_MAP;
return consumeByte(insn, &insn->opcode);
return consume(insn, insn->opcode);
case XOP_MAP_SELECT_9:
insn->opcodeType = XOP9_MAP;
return consumeByte(insn, &insn->opcode);
return consume(insn, insn->opcode);
case XOP_MAP_SELECT_A:
insn->opcodeType = XOPA_MAP;
return consumeByte(insn, &insn->opcode);
return consume(insn, insn->opcode);
}
}
if (consumeByte(insn, &current))
return -1;
if (consume(insn, current))
return true;
if (current == 0x0f) {
dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
if (consumeByte(insn, &current))
return -1;
if (consume(insn, current))
return true;
if (current == 0x38) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
if (consumeByte(insn, &current))
if (consume(insn, current))
return -1;
insn->opcodeType = THREEBYTE_38;
} else if (current == 0x3a) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
if (consumeByte(insn, &current))
if (consume(insn, current))
return -1;
insn->opcodeType = THREEBYTE_3A;
@ -712,10 +654,10 @@ static int readOpcode(struct InternalInstruction* insn) {
// Consume operands before the opcode to comply with the 3DNow encoding
if (readModRM(insn))
return -1;
return true;
if (consumeByte(insn, &current))
return -1;
if (consume(insn, current))
return true;
insn->opcodeType = THREEDNOW_MAP;
} else {
@ -735,7 +677,7 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcode = current;
return 0;
return false;
}
/*
@ -1164,7 +1106,7 @@ static int readSIB(struct InternalInstruction* insn) {
break;
}
if (consumeByte(insn, &insn->sib))
if (consume(insn, insn->sib))
return -1;
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
@ -1233,17 +1175,17 @@ static int readDisplacement(struct InternalInstruction* insn) {
insn->consumedDisplacement = false;
break;
case EA_DISP_8:
if (consumeInt8(insn, &d8))
if (consume(insn, d8))
return -1;
insn->displacement = d8;
break;
case EA_DISP_16:
if (consumeInt16(insn, &d16))
if (consume(insn, d16))
return -1;
insn->displacement = d16;
break;
case EA_DISP_32:
if (consumeInt32(insn, &d32))
if (consume(insn, d32))
return -1;
insn->displacement = d32;
break;
@ -1268,7 +1210,7 @@ static int readModRM(struct InternalInstruction* insn) {
if (insn->consumedModRM)
return 0;
if (consumeByte(insn, &insn->modRM))
if (consume(insn, insn->modRM))
return -1;
insn->consumedModRM = true;
@ -1623,22 +1565,22 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
switch (size) {
case 1:
if (consumeByte(insn, &imm8))
if (consume(insn, imm8))
return -1;
insn->immediates[insn->numImmediatesConsumed] = imm8;
break;
case 2:
if (consumeUInt16(insn, &imm16))
if (consume(insn, imm16))
return -1;
insn->immediates[insn->numImmediatesConsumed] = imm16;
break;
case 4:
if (consumeUInt32(insn, &imm32))
if (consume(insn, imm32))
return -1;
insn->immediates[insn->numImmediatesConsumed] = imm32;
break;
case 8:
if (consumeUInt64(insn, &imm64))
if (consume(insn, imm64))
return -1;
insn->immediates[insn->numImmediatesConsumed] = imm64;
break;
@ -1884,13 +1826,14 @@ static int readOperands(struct InternalInstruction* insn) {
* @return - 0 if the instruction's memory could be read; nonzero if
* not.
*/
int llvm::X86Disassembler::decodeInstruction(
struct InternalInstruction *insn, byteReader_t reader,
const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
uint64_t startLoc, DisassemblerMode mode) {
int llvm::X86Disassembler::decodeInstruction(struct InternalInstruction *insn,
const void *readerArg,
dlog_t logger, void *loggerArg,
const void *miiArg,
uint64_t startLoc,
DisassemblerMode mode) {
memset(insn, 0, sizeof(struct InternalInstruction));
insn->reader = reader;
insn->readerArg = readerArg;
insn->dlog = logger;
insn->dlogArg = loggerArg;

View File

@ -446,12 +446,12 @@ enum SIBBase {
};
/// Possible displacement types for effective-address computations.
typedef enum {
enum EADisplacement {
EA_DISP_NONE,
EA_DISP_8,
EA_DISP_16,
EA_DISP_32
} EADisplacement;
};
/// All possible values of the reg field in the ModR/M byte.
enum Reg {
@ -529,8 +529,6 @@ struct InstructionSpecifier {
/// The x86 internal instruction, which is produced by the decoder.
struct InternalInstruction {
// Reader interface (C)
byteReader_t reader;
// Opaque value passed to the reader
const void* readerArg;
// The address of the next byte to read via the reader
@ -661,7 +659,6 @@ struct InternalInstruction {
/// a buffer provided by the consumer.
/// \param insn The buffer to store the instruction in. Allocated by the
/// consumer.
/// \param reader The byteReader_t for the bytes to be read.
/// \param readerArg An argument to pass to the reader for storing context
/// specific to the consumer. May be NULL.
/// \param logger The dlog_t to be used in printing status messages from the
@ -673,7 +670,6 @@ struct InternalInstruction {
/// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in.
/// \return Nonzero if there was an error during decode, 0 otherwise.
int decodeInstruction(InternalInstruction *insn,
byteReader_t reader,
const void *readerArg,
dlog_t logger,
void *loggerArg,