mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
Teach llvm-objdump with the -macho parser how to use the data in code table
from the LC_DATA_IN_CODE load command. And when disassembling print the data in code formatted for the kind of data it and not disassemble those bytes. I added the format specific functionality to the derived class MachOObjectFile since these tables only appears in Mach-O object files. This is my first attempt to modify the libObject stuff so if folks have better suggestions how to fit this in or suggestions on the implementation please let me know. rdar://11791371 llvm-svn: 183424
This commit is contained in:
parent
c7c3041d71
commit
644e3fc29e
@ -25,6 +25,31 @@
|
||||
namespace llvm {
|
||||
namespace object {
|
||||
|
||||
/// DiceRef - This is a value type class that represents a single
|
||||
/// data in code entry in the table in a Mach-O object file.
|
||||
class DiceRef {
|
||||
DataRefImpl DicePimpl;
|
||||
const ObjectFile *OwningObject;
|
||||
|
||||
public:
|
||||
DiceRef() : OwningObject(NULL) { }
|
||||
|
||||
DiceRef(DataRefImpl DiceP, const ObjectFile *Owner);
|
||||
|
||||
bool operator==(const DiceRef &Other) const;
|
||||
bool operator<(const DiceRef &Other) const;
|
||||
|
||||
error_code getNext(DiceRef &Result) const;
|
||||
|
||||
error_code getOffset(uint32_t &Result) const;
|
||||
error_code getLength(uint16_t &Result) const;
|
||||
error_code getKind(uint16_t &Result) const;
|
||||
|
||||
DataRefImpl getRawDataRefImpl() const;
|
||||
const ObjectFile *getObjectFile() const;
|
||||
};
|
||||
typedef content_iterator<DiceRef> dice_iterator;
|
||||
|
||||
class MachOObjectFile : public ObjectFile {
|
||||
public:
|
||||
struct LoadCommandInfo {
|
||||
@ -108,6 +133,9 @@ public:
|
||||
relocation_iterator getSectionRelBegin(unsigned Index) const;
|
||||
relocation_iterator getSectionRelEnd(unsigned Index) const;
|
||||
|
||||
dice_iterator begin_dices() const;
|
||||
dice_iterator end_dices() const;
|
||||
|
||||
// In a MachO file, sections have a segment name. This is used in the .o
|
||||
// files. They have a single segment, but this field specifies which segment
|
||||
// a section should be put in in the final object.
|
||||
@ -152,6 +180,7 @@ public:
|
||||
getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const;
|
||||
|
||||
macho::RelocationEntry getRelocation(DataRefImpl Rel) const;
|
||||
macho::DataInCodeTableEntry getDice(DataRefImpl Rel) const;
|
||||
macho::Header getHeader() const;
|
||||
macho::Header64Ext getHeader64Ext() const;
|
||||
macho::IndirectSymbolTableEntry
|
||||
@ -161,6 +190,7 @@ public:
|
||||
unsigned Index) const;
|
||||
macho::SymtabLoadCommand getSymtabLoadCommand() const;
|
||||
macho::DysymtabLoadCommand getDysymtabLoadCommand() const;
|
||||
macho::LinkeditDataLoadCommand getDataInCodeLoadCommand() const;
|
||||
|
||||
StringRef getStringTableData() const;
|
||||
bool is64Bit() const;
|
||||
@ -175,8 +205,66 @@ private:
|
||||
SectionList Sections;
|
||||
const char *SymtabLoadCmd;
|
||||
const char *DysymtabLoadCmd;
|
||||
const char *DataInCodeLoadCmd;
|
||||
};
|
||||
|
||||
/// DiceRef
|
||||
inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner)
|
||||
: DicePimpl(DiceP) , OwningObject(Owner) {}
|
||||
|
||||
inline bool DiceRef::operator==(const DiceRef &Other) const {
|
||||
return DicePimpl == Other.DicePimpl;
|
||||
}
|
||||
|
||||
inline bool DiceRef::operator<(const DiceRef &Other) const {
|
||||
return DicePimpl < Other.DicePimpl;
|
||||
}
|
||||
|
||||
inline error_code DiceRef::getNext(DiceRef &Result) const {
|
||||
DataRefImpl Rel = DicePimpl;
|
||||
const macho::DataInCodeTableEntry *P =
|
||||
reinterpret_cast<const macho::DataInCodeTableEntry *>(Rel.p);
|
||||
Rel.p = reinterpret_cast<uintptr_t>(P + 1);
|
||||
Result = DiceRef(Rel, OwningObject);
|
||||
return object_error::success;
|
||||
}
|
||||
|
||||
// Since a Mach-O data in code reference, a DiceRef, can only be created when
|
||||
// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for
|
||||
// the methods that get the values of the fields of the reference.
|
||||
|
||||
inline error_code DiceRef::getOffset(uint32_t &Result) const {
|
||||
const MachOObjectFile *MachOOF =
|
||||
static_cast<const MachOObjectFile *>(OwningObject);
|
||||
macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
|
||||
Result = Dice.Offset;
|
||||
return object_error::success;
|
||||
}
|
||||
|
||||
inline error_code DiceRef::getLength(uint16_t &Result) const {
|
||||
const MachOObjectFile *MachOOF =
|
||||
static_cast<const MachOObjectFile *>(OwningObject);
|
||||
macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
|
||||
Result = Dice.Length;
|
||||
return object_error::success;
|
||||
}
|
||||
|
||||
inline error_code DiceRef::getKind(uint16_t &Result) const {
|
||||
const MachOObjectFile *MachOOF =
|
||||
static_cast<const MachOObjectFile *>(OwningObject);
|
||||
macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
|
||||
Result = Dice.Kind;
|
||||
return object_error::success;
|
||||
}
|
||||
|
||||
inline DataRefImpl DiceRef::getRawDataRefImpl() const {
|
||||
return DicePimpl;
|
||||
}
|
||||
|
||||
inline const ObjectFile *DiceRef::getObjectFile() const {
|
||||
return OwningObject;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -414,7 +414,7 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
|
||||
bool IsLittleEndian, bool Is64bits,
|
||||
error_code &ec)
|
||||
: ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
|
||||
SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) {
|
||||
SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) {
|
||||
uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
|
||||
macho::LoadCommandType SegmentLoadType = is64Bit() ?
|
||||
macho::LCT_Segment64 : macho::LCT_Segment;
|
||||
@ -427,6 +427,9 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
|
||||
} else if (Load.C.Type == macho::LCT_Dysymtab) {
|
||||
assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
|
||||
DysymtabLoadCmd = Load.Ptr;
|
||||
} else if (Load.C.Type == macho::LCT_DataInCode) {
|
||||
assert(!DataInCodeLoadCmd && "Multiple data in code tables");
|
||||
DataInCodeLoadCmd = Load.Ptr;
|
||||
} else if (Load.C.Type == SegmentLoadType) {
|
||||
uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
|
||||
for (unsigned J = 0; J < NumSections; ++J) {
|
||||
@ -1328,6 +1331,27 @@ relocation_iterator MachOObjectFile::getSectionRelEnd(unsigned Index) const {
|
||||
return getSectionRelEnd(DRI);
|
||||
}
|
||||
|
||||
dice_iterator MachOObjectFile::begin_dices() const {
|
||||
DataRefImpl DRI;
|
||||
if (!DataInCodeLoadCmd)
|
||||
return dice_iterator(DiceRef(DRI, this));
|
||||
|
||||
macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
|
||||
DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, DicLC.DataOffset));
|
||||
return dice_iterator(DiceRef(DRI, this));
|
||||
}
|
||||
|
||||
dice_iterator MachOObjectFile::end_dices() const {
|
||||
DataRefImpl DRI;
|
||||
if (!DataInCodeLoadCmd)
|
||||
return dice_iterator(DiceRef(DRI, this));
|
||||
|
||||
macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
|
||||
unsigned Offset = DicLC.DataOffset + DicLC.DataSize;
|
||||
DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
|
||||
return dice_iterator(DiceRef(DRI, this));
|
||||
}
|
||||
|
||||
StringRef
|
||||
MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
|
||||
ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
|
||||
@ -1492,6 +1516,12 @@ MachOObjectFile::getRelocation(DataRefImpl Rel) const {
|
||||
return getStruct<macho::RelocationEntry>(this, P);
|
||||
}
|
||||
|
||||
macho::DataInCodeTableEntry
|
||||
MachOObjectFile::getDice(DataRefImpl Rel) const {
|
||||
const char *P = reinterpret_cast<const char *>(Rel.p);
|
||||
return getStruct<macho::DataInCodeTableEntry>(this, P);
|
||||
}
|
||||
|
||||
macho::Header MachOObjectFile::getHeader() const {
|
||||
return getStruct<macho::Header>(this, getPtr(this, 0));
|
||||
}
|
||||
@ -1524,6 +1554,20 @@ macho::DysymtabLoadCommand MachOObjectFile::getDysymtabLoadCommand() const {
|
||||
return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd);
|
||||
}
|
||||
|
||||
macho::LinkeditDataLoadCommand
|
||||
MachOObjectFile::getDataInCodeLoadCommand() const {
|
||||
if (DataInCodeLoadCmd)
|
||||
return getStruct<macho::LinkeditDataLoadCommand>(this, DataInCodeLoadCmd);
|
||||
|
||||
// If there is no DataInCodeLoadCmd return a load command with zero'ed fields.
|
||||
macho::LinkeditDataLoadCommand Cmd;
|
||||
Cmd.Type = macho::LCT_DataInCode;
|
||||
Cmd.Size = macho::LinkeditLoadCommandSize;
|
||||
Cmd.DataOffset = 0;
|
||||
Cmd.DataSize = 0;
|
||||
return Cmd;
|
||||
}
|
||||
|
||||
StringRef MachOObjectFile::getStringTableData() const {
|
||||
macho::SymtabLoadCommand S = getSymtabLoadCommand();
|
||||
return getData().substr(S.StringTableOffset, S.StringTableSize);
|
||||
|
BIN
test/Object/Inputs/macho-data-in-code.macho-thumbv7
Normal file
BIN
test/Object/Inputs/macho-data-in-code.macho-thumbv7
Normal file
Binary file not shown.
7
test/Object/X86/macho-data-in-code.test
Normal file
7
test/Object/X86/macho-data-in-code.test
Normal file
@ -0,0 +1,7 @@
|
||||
RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data-in-code.macho-thumbv7 -macho | FileCheck %s
|
||||
|
||||
CHECK: 12: 80 bd pop {r7, pc}
|
||||
|
||||
CHECK: 14: 38 00 00 00 .long 56 @ KIND_DATA
|
||||
CHECK: 16: 00 00 movs r0, r0
|
||||
|
@ -87,12 +87,73 @@ struct SymbolSorter {
|
||||
}
|
||||
};
|
||||
|
||||
// Types for the storted data in code table that is built before disassembly
|
||||
// and the predicate function to sort them.
|
||||
typedef std::pair<uint64_t, DiceRef> DiceTableEntry;
|
||||
typedef std::vector<DiceTableEntry> DiceTable;
|
||||
typedef DiceTable::iterator dice_table_iterator;
|
||||
|
||||
static bool
|
||||
compareDiceTableEntries(const DiceTableEntry i,
|
||||
const DiceTableEntry j) {
|
||||
return i.first == j.first;
|
||||
}
|
||||
|
||||
static void DumpDataInCode(const char *bytes, uint64_t Size,
|
||||
unsigned short Kind) {
|
||||
uint64_t Value;
|
||||
|
||||
switch (Kind) {
|
||||
case macho::Data:
|
||||
switch (Size) {
|
||||
case 4:
|
||||
Value = bytes[3] << 24 |
|
||||
bytes[2] << 16 |
|
||||
bytes[1] << 8 |
|
||||
bytes[0];
|
||||
outs() << "\t.long " << Value;
|
||||
break;
|
||||
case 2:
|
||||
Value = bytes[1] << 8 |
|
||||
bytes[0];
|
||||
outs() << "\t.short " << Value;
|
||||
break;
|
||||
case 1:
|
||||
Value = bytes[0];
|
||||
outs() << "\t.byte " << Value;
|
||||
break;
|
||||
}
|
||||
outs() << "\t@ KIND_DATA\n";
|
||||
break;
|
||||
case macho::JumpTable8:
|
||||
Value = bytes[0];
|
||||
outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8";
|
||||
break;
|
||||
case macho::JumpTable16:
|
||||
Value = bytes[1] << 8 |
|
||||
bytes[0];
|
||||
outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16";
|
||||
break;
|
||||
case macho::JumpTable32:
|
||||
Value = bytes[3] << 24 |
|
||||
bytes[2] << 16 |
|
||||
bytes[1] << 8 |
|
||||
bytes[0];
|
||||
outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32";
|
||||
break;
|
||||
default:
|
||||
outs() << "\t@ data in code kind = " << Kind << "\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
getSectionsAndSymbols(const macho::Header Header,
|
||||
MachOObjectFile *MachOObj,
|
||||
std::vector<SectionRef> &Sections,
|
||||
std::vector<SymbolRef> &Symbols,
|
||||
SmallVectorImpl<uint64_t> &FoundFns) {
|
||||
SmallVectorImpl<uint64_t> &FoundFns,
|
||||
uint64_t &BaseSegmentAddress) {
|
||||
error_code ec;
|
||||
for (symbol_iterator SI = MachOObj->begin_symbols(),
|
||||
SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
|
||||
@ -108,6 +169,7 @@ getSectionsAndSymbols(const macho::Header Header,
|
||||
|
||||
MachOObjectFile::LoadCommandInfo Command =
|
||||
MachOObj->getFirstLoadCommandInfo();
|
||||
bool BaseSegmentAddressSet = false;
|
||||
for (unsigned i = 0; ; ++i) {
|
||||
if (Command.C.Type == macho::LCT_FunctionStarts) {
|
||||
// We found a function starts segment, parse the addresses for later
|
||||
@ -117,6 +179,15 @@ getSectionsAndSymbols(const macho::Header Header,
|
||||
|
||||
MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
|
||||
}
|
||||
else if (Command.C.Type == macho::LCT_Segment) {
|
||||
macho::SegmentLoadCommand SLC =
|
||||
MachOObj->getSegmentLoadCommand(Command);
|
||||
StringRef SegName = SLC.Name;
|
||||
if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
|
||||
BaseSegmentAddressSet = true;
|
||||
BaseSegmentAddress = SLC.VMAddress;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == Header.NumLoadCommands - 1)
|
||||
break;
|
||||
@ -184,14 +255,32 @@ static void DisassembleInputMachO2(StringRef Filename,
|
||||
std::vector<SectionRef> Sections;
|
||||
std::vector<SymbolRef> Symbols;
|
||||
SmallVector<uint64_t, 8> FoundFns;
|
||||
uint64_t BaseSegmentAddress;
|
||||
|
||||
getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns);
|
||||
getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns,
|
||||
BaseSegmentAddress);
|
||||
|
||||
// Make a copy of the unsorted symbol list. FIXME: duplication
|
||||
std::vector<SymbolRef> UnsortedSymbols(Symbols);
|
||||
// Sort the symbols by address, just in case they didn't come in that way.
|
||||
std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
|
||||
|
||||
// Build a data in code table that is sorted on by the address of each entry.
|
||||
uint64_t BaseAddress = 0;
|
||||
if (Header.FileType == macho::HFT_Object)
|
||||
Sections[0].getAddress(BaseAddress);
|
||||
else
|
||||
BaseAddress = BaseSegmentAddress;
|
||||
DiceTable Dices;
|
||||
error_code ec;
|
||||
for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
|
||||
DI != DE; DI.increment(ec)){
|
||||
uint32_t Offset;
|
||||
DI->getOffset(Offset);
|
||||
Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
|
||||
}
|
||||
array_pod_sort(Dices.begin(), Dices.end());
|
||||
|
||||
#ifndef NDEBUG
|
||||
raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
|
||||
#else
|
||||
@ -309,12 +398,29 @@ static void DisassembleInputMachO2(StringRef Filename,
|
||||
for (uint64_t Index = Start; Index < End; Index += Size) {
|
||||
MCInst Inst;
|
||||
|
||||
uint64_t SectAddress = 0;
|
||||
Sections[SectIdx].getAddress(SectAddress);
|
||||
outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
|
||||
|
||||
// Check the data in code table here to see if this is data not an
|
||||
// instruction to be disassembled.
|
||||
DiceTable Dice;
|
||||
Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
|
||||
dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
|
||||
Dice.begin(), Dice.end(),
|
||||
compareDiceTableEntries);
|
||||
if (DTI != Dices.end()){
|
||||
uint16_t Length;
|
||||
DTI->second.getLength(Length);
|
||||
DumpBytes(StringRef(Bytes.data() + Index, Length));
|
||||
uint16_t Kind;
|
||||
DTI->second.getKind(Kind);
|
||||
DumpDataInCode(Bytes.data() + Index, Length, Kind);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
|
||||
DebugOut, nulls())) {
|
||||
uint64_t SectAddress = 0;
|
||||
Sections[SectIdx].getAddress(SectAddress);
|
||||
outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
|
||||
|
||||
DumpBytes(StringRef(Bytes.data() + Index, Size));
|
||||
IP->printInst(&Inst, outs(), "");
|
||||
|
||||
|
@ -292,7 +292,7 @@ DumpDataInCodeDataCommand(const MachOObjectFile &Obj,
|
||||
<< " ('datasize', " << LLC.DataSize << ")\n"
|
||||
<< " ('_data_regions', [\n";
|
||||
|
||||
unsigned NumRegions = LLC.DataSize / 8;
|
||||
unsigned NumRegions = LLC.DataSize / sizeof(macho::DataInCodeTableEntry);
|
||||
for (unsigned i = 0; i < NumRegions; ++i) {
|
||||
macho::DataInCodeTableEntry DICE =
|
||||
Obj.getDataInCodeTableEntry(LLC.DataOffset, i);
|
||||
|
Loading…
Reference in New Issue
Block a user