//===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "obj2yaml.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/ObjectYAML/DWARFYAML.h" #include "llvm/ObjectYAML/ObjectYAML.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include // for memcpy using namespace llvm; class MachODumper { template Expected processLoadCommandData( MachOYAML::LoadCommand &LC, const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, MachOYAML::Object &Y); const object::MachOObjectFile &Obj; std::unique_ptr DWARFCtx; void dumpHeader(std::unique_ptr &Y); Error dumpLoadCommands(std::unique_ptr &Y); void dumpLinkEdit(std::unique_ptr &Y); void dumpRebaseOpcodes(std::unique_ptr &Y); void dumpBindOpcodes(std::vector &BindOpcodes, ArrayRef OpcodeBuffer, bool Lazy = false); void dumpExportTrie(std::unique_ptr &Y); void dumpSymbols(std::unique_ptr &Y); template Expected constructSectionCommon(SectionType Sec, size_t SecIndex); template Expected constructSection(SectionType Sec, size_t SecIndex); template Expected extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, std::vector &Sections, MachOYAML::Object &Y); public: MachODumper(const object::MachOObjectFile &O, std::unique_ptr DCtx) : Obj(O), DWARFCtx(std::move(DCtx)) {} Expected> dump(); }; #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ case MachO::LCName: \ memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \ sizeof(MachO::LCStruct)); \ if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \ MachO::swapStruct(LC.Data.LCStruct##_data); \ if (Expected ExpectedEndPtr = \ processLoadCommandData(LC, LoadCmd, *Y.get())) \ EndPtr = *ExpectedEndPtr; \ else \ return ExpectedEndPtr.takeError(); \ break; template Expected MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) { MachOYAML::Section TempSec; memcpy(reinterpret_cast(&TempSec.sectname[0]), &Sec.sectname[0], 16); memcpy(reinterpret_cast(&TempSec.segname[0]), &Sec.segname[0], 16); TempSec.addr = Sec.addr; TempSec.size = Sec.size; TempSec.offset = Sec.offset; TempSec.align = Sec.align; TempSec.reloff = Sec.reloff; TempSec.nreloc = Sec.nreloc; TempSec.flags = Sec.flags; TempSec.reserved1 = Sec.reserved1; TempSec.reserved2 = Sec.reserved2; TempSec.reserved3 = 0; if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE)) TempSec.content = yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); if (Expected SecRef = Obj.getSection(SecIndex)) { TempSec.relocations.reserve(TempSec.nreloc); for (const object::RelocationRef &Reloc : SecRef->relocations()) { const object::DataRefImpl Rel = Reloc.getRawDataRefImpl(); const MachO::any_relocation_info RE = Obj.getRelocation(Rel); MachOYAML::Relocation R; R.address = Obj.getAnyRelocationAddress(RE); R.is_pcrel = Obj.getAnyRelocationPCRel(RE); R.length = Obj.getAnyRelocationLength(RE); R.type = Obj.getAnyRelocationType(RE); R.is_scattered = Obj.isRelocationScattered(RE); R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE)); R.is_extern = (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE)); R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0); TempSec.relocations.push_back(R); } } else { return SecRef.takeError(); } return TempSec; } template <> Expected MachODumper::constructSection(MachO::section Sec, size_t SecIndex) { Expected TempSec = constructSectionCommon(Sec, SecIndex); if (TempSec) TempSec->reserved3 = 0; return TempSec; } template <> Expected MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) { Expected TempSec = constructSectionCommon(Sec, SecIndex); if (TempSec) TempSec->reserved3 = Sec.reserved3; return TempSec; } static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx, DWARFYAML::Data &DWARF) { if (SecName == "__debug_abbrev") { dumpDebugAbbrev(DCtx, DWARF); return Error::success(); } if (SecName == "__debug_aranges") return dumpDebugARanges(DCtx, DWARF); if (SecName == "__debug_info") { dumpDebugInfo(DCtx, DWARF); return Error::success(); } if (SecName == "__debug_line") { dumpDebugLines(DCtx, DWARF); return Error::success(); } if (SecName.startswith("__debug_pub")) { // FIXME: We should extract pub-section dumpers from this function. dumpDebugPubSections(DCtx, DWARF); return Error::success(); } if (SecName == "__debug_ranges") return dumpDebugRanges(DCtx, DWARF); if (SecName == "__debug_str") return dumpDebugStrings(DCtx, DWARF); return createStringError(errc::not_supported, "dumping " + SecName + " section is not supported"); } template Expected MachODumper::extractSections( const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, std::vector &Sections, MachOYAML::Object &Y) { auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; const SectionType *Curr = reinterpret_cast(LoadCmd.Ptr + sizeof(SegmentType)); for (; reinterpret_cast(Curr) < End; Curr++) { SectionType Sec; memcpy((void *)&Sec, Curr, sizeof(SectionType)); if (Obj.isLittleEndian() != sys::IsLittleEndianHost) MachO::swapStruct(Sec); // For MachO section indices start from 1. if (Expected S = constructSection(Sec, Sections.size() + 1)) { StringRef SecName(S->sectname); if (SecName.startswith("__debug_")) { // If the DWARF section cannot be successfully parsed, emit raw content // instead of an entry in the DWARF section of the YAML. if (Error Err = dumpDebugSection(SecName, *DWARFCtx.get(), Y.DWARF)) consumeError(std::move(Err)); else S->content.reset(); } Sections.push_back(std::move(*S)); } else return S.takeError(); } return reinterpret_cast(Curr); } template Expected MachODumper::processLoadCommandData( MachOYAML::LoadCommand &LC, const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, MachOYAML::Object &Y) { return LoadCmd.Ptr + sizeof(StructType); } template <> Expected MachODumper::processLoadCommandData( MachOYAML::LoadCommand &LC, const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, MachOYAML::Object &Y) { return extractSections( LoadCmd, LC.Sections, Y); } template <> Expected MachODumper::processLoadCommandData( MachOYAML::LoadCommand &LC, const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, MachOYAML::Object &Y) { return extractSections( LoadCmd, LC.Sections, Y); } template const char * readString(MachOYAML::LoadCommand &LC, const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { auto Start = LoadCmd.Ptr + sizeof(StructType); auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType); auto Size = strnlen(Start, MaxSize); LC.Content = StringRef(Start, Size).str(); return Start + Size; } template <> Expected MachODumper::processLoadCommandData( MachOYAML::LoadCommand &LC, const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, MachOYAML::Object &Y) { return readString(LC, LoadCmd); } template <> Expected MachODumper::processLoadCommandData( MachOYAML::LoadCommand &LC, const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, MachOYAML::Object &Y) { return readString(LC, LoadCmd); } template <> Expected MachODumper::processLoadCommandData( MachOYAML::LoadCommand &LC, const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, MachOYAML::Object &Y) { return readString(LC, LoadCmd); } template <> Expected MachODumper::processLoadCommandData( MachOYAML::LoadCommand &LC, const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, MachOYAML::Object &Y) { auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command); auto NTools = LC.Data.build_version_command_data.ntools; for (unsigned i = 0; i < NTools; ++i) { auto Curr = Start + i * sizeof(MachO::build_tool_version); MachO::build_tool_version BV; memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version)); if (Obj.isLittleEndian() != sys::IsLittleEndianHost) MachO::swapStruct(BV); LC.Tools.push_back(BV); } return Start + NTools * sizeof(MachO::build_tool_version); } Expected> MachODumper::dump() { auto Y = std::make_unique(); Y->IsLittleEndian = Obj.isLittleEndian(); dumpHeader(Y); if (Error Err = dumpLoadCommands(Y)) return std::move(Err); dumpLinkEdit(Y); return std::move(Y); } void MachODumper::dumpHeader(std::unique_ptr &Y) { Y->Header.magic = Obj.getHeader().magic; Y->Header.cputype = Obj.getHeader().cputype; Y->Header.cpusubtype = Obj.getHeader().cpusubtype; Y->Header.filetype = Obj.getHeader().filetype; Y->Header.ncmds = Obj.getHeader().ncmds; Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds; Y->Header.flags = Obj.getHeader().flags; Y->Header.reserved = 0; } Error MachODumper::dumpLoadCommands(std::unique_ptr &Y) { for (auto LoadCmd : Obj.load_commands()) { MachOYAML::LoadCommand LC; const char *EndPtr = LoadCmd.Ptr; switch (LoadCmd.C.cmd) { default: memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr, sizeof(MachO::load_command)); if (Obj.isLittleEndian() != sys::IsLittleEndianHost) MachO::swapStruct(LC.Data.load_command_data); if (Expected ExpectedEndPtr = processLoadCommandData(LC, LoadCmd, *Y.get())) EndPtr = *ExpectedEndPtr; else return ExpectedEndPtr.takeError(); break; #include "llvm/BinaryFormat/MachO.def" } auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr); if (!std::all_of(EndPtr, &EndPtr[RemainingBytes], [](const char C) { return C == 0; })) { LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr, &EndPtr[RemainingBytes]); RemainingBytes = 0; } LC.ZeroPadBytes = RemainingBytes; Y->LoadCommands.push_back(std::move(LC)); } return Error::success(); } void MachODumper::dumpLinkEdit(std::unique_ptr &Y) { dumpRebaseOpcodes(Y); dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes()); dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes, Obj.getDyldInfoWeakBindOpcodes()); dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(), true); dumpExportTrie(Y); dumpSymbols(Y); } void MachODumper::dumpRebaseOpcodes(std::unique_ptr &Y) { MachOYAML::LinkEditData &LEData = Y->LinkEdit; auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes(); for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end(); ++OpCode) { MachOYAML::RebaseOpcode RebaseOp; RebaseOp.Opcode = static_cast(*OpCode & MachO::REBASE_OPCODE_MASK); RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK; unsigned Count; uint64_t ULEB = 0; switch (RebaseOp.Opcode) { case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: ULEB = decodeULEB128(OpCode + 1, &Count); RebaseOp.ExtraData.push_back(ULEB); OpCode += Count; LLVM_FALLTHROUGH; // Intentionally no break here -- This opcode has two ULEB values case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: ULEB = decodeULEB128(OpCode + 1, &Count); RebaseOp.ExtraData.push_back(ULEB); OpCode += Count; break; default: break; } LEData.RebaseOpcodes.push_back(RebaseOp); if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE) break; } } StringRef ReadStringRef(const uint8_t *Start) { const uint8_t *Itr = Start; for (; *Itr; ++Itr) ; return StringRef(reinterpret_cast(Start), Itr - Start); } void MachODumper::dumpBindOpcodes( std::vector &BindOpcodes, ArrayRef OpcodeBuffer, bool Lazy) { for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end(); ++OpCode) { MachOYAML::BindOpcode BindOp; BindOp.Opcode = static_cast(*OpCode & MachO::BIND_OPCODE_MASK); BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK; unsigned Count; uint64_t ULEB = 0; int64_t SLEB = 0; switch (BindOp.Opcode) { case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: ULEB = decodeULEB128(OpCode + 1, &Count); BindOp.ULEBExtraData.push_back(ULEB); OpCode += Count; LLVM_FALLTHROUGH; // Intentionally no break here -- this opcode has two ULEB values case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: case MachO::BIND_OPCODE_ADD_ADDR_ULEB: case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: ULEB = decodeULEB128(OpCode + 1, &Count); BindOp.ULEBExtraData.push_back(ULEB); OpCode += Count; break; case MachO::BIND_OPCODE_SET_ADDEND_SLEB: SLEB = decodeSLEB128(OpCode + 1, &Count); BindOp.SLEBExtraData.push_back(SLEB); OpCode += Count; break; case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: BindOp.Symbol = ReadStringRef(OpCode + 1); OpCode += BindOp.Symbol.size() + 1; break; default: break; } BindOpcodes.push_back(BindOp); // Lazy bindings have DONE opcodes between operations, so we need to keep // processing after a DONE. if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE) break; } } /*! * /brief processes a node from the export trie, and its children. * * To my knowledge there is no documentation of the encoded format of this data * other than in the heads of the Apple linker engineers. To that end hopefully * this comment and the implementation below can serve to light the way for * anyone crazy enough to come down this path in the future. * * This function reads and preserves the trie structure of the export trie. To * my knowledge there is no code anywhere else that reads the data and preserves * the Trie. LD64 (sources available at opensource.apple.com) has a similar * implementation that parses the export trie into a vector. That code as well * as LLVM's libObject MachO implementation were the basis for this. * * The export trie is an encoded trie. The node serialization is a bit awkward. * The below pseudo-code is the best description I've come up with for it. * * struct SerializedNode { * ULEB128 TerminalSize; * struct TerminalData { <-- This is only present if TerminalSize > 0 * ULEB128 Flags; * ULEB128 Address; <-- Present if (! Flags & REEXPORT ) * ULEB128 Other; <-- Present if ( Flags & REEXPORT || * Flags & STUB_AND_RESOLVER ) * char[] ImportName; <-- Present if ( Flags & REEXPORT ) * } * uint8_t ChildrenCount; * Pair ChildNameOffsetPair[ChildrenCount]; * SerializedNode Children[ChildrenCount] * } * * Terminal nodes are nodes that represent actual exports. They can appear * anywhere in the tree other than at the root; they do not need to be leaf * nodes. When reading the data out of the trie this routine reads it in-order, * but it puts the child names and offsets directly into the child nodes. This * results in looping over the children twice during serialization and * de-serialization, but it makes the YAML representation more human readable. * * Below is an example of the graph from a "Hello World" executable: * * ------- * | '' | * ------- * | * ------- * | '_' | * ------- * | * |----------------------------------------| * | | * ------------------------ --------------------- * | '_mh_execute_header' | | 'main' | * | Flags: 0x00000000 | | Flags: 0x00000000 | * | Addr: 0x00000000 | | Addr: 0x00001160 | * ------------------------ --------------------- * * This graph represents the trie for the exports "__mh_execute_header" and * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are * terminal. */ const uint8_t *processExportNode(const uint8_t *CurrPtr, const uint8_t *const End, MachOYAML::ExportEntry &Entry) { if (CurrPtr >= End) return CurrPtr; unsigned Count = 0; Entry.TerminalSize = decodeULEB128(CurrPtr, &Count); CurrPtr += Count; if (Entry.TerminalSize != 0) { Entry.Flags = decodeULEB128(CurrPtr, &Count); CurrPtr += Count; if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { Entry.Address = 0; Entry.Other = decodeULEB128(CurrPtr, &Count); CurrPtr += Count; Entry.ImportName = std::string(reinterpret_cast(CurrPtr)); } else { Entry.Address = decodeULEB128(CurrPtr, &Count); CurrPtr += Count; if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { Entry.Other = decodeULEB128(CurrPtr, &Count); CurrPtr += Count; } else Entry.Other = 0; } } uint8_t childrenCount = *CurrPtr++; if (childrenCount == 0) return CurrPtr; Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount, MachOYAML::ExportEntry()); for (auto &Child : Entry.Children) { Child.Name = std::string(reinterpret_cast(CurrPtr)); CurrPtr += Child.Name.length() + 1; Child.NodeOffset = decodeULEB128(CurrPtr, &Count); CurrPtr += Count; } for (auto &Child : Entry.Children) { CurrPtr = processExportNode(CurrPtr, End, Child); } return CurrPtr; } void MachODumper::dumpExportTrie(std::unique_ptr &Y) { MachOYAML::LinkEditData &LEData = Y->LinkEdit; auto ExportsTrie = Obj.getDyldInfoExportsTrie(); processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie); } template MachOYAML::NListEntry constructNameList(const nlist_t &nlist) { MachOYAML::NListEntry NL; NL.n_strx = nlist.n_strx; NL.n_type = nlist.n_type; NL.n_sect = nlist.n_sect; NL.n_desc = nlist.n_desc; NL.n_value = nlist.n_value; return NL; } void MachODumper::dumpSymbols(std::unique_ptr &Y) { MachOYAML::LinkEditData &LEData = Y->LinkEdit; for (auto Symbol : Obj.symbols()) { MachOYAML::NListEntry NLE = Obj.is64Bit() ? constructNameList( Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) : constructNameList( Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl())); LEData.NameList.push_back(NLE); } StringRef RemainingTable = Obj.getStringTableData(); while (RemainingTable.size() > 0) { auto SymbolPair = RemainingTable.split('\0'); RemainingTable = SymbolPair.second; LEData.StringTable.push_back(SymbolPair.first); } } Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) { std::unique_ptr DCtx = DWARFContext::create(Obj); MachODumper Dumper(Obj, std::move(DCtx)); Expected> YAML = Dumper.dump(); if (!YAML) return YAML.takeError(); yaml::YamlObjectFile YAMLFile; YAMLFile.MachO = std::move(YAML.get()); yaml::Output Yout(Out); Yout << YAMLFile; return Error::success(); } Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) { yaml::YamlObjectFile YAMLFile; YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary()); MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO; YAML.Header.magic = Obj.getMagic(); YAML.Header.nfat_arch = Obj.getNumberOfObjects(); for (auto Slice : Obj.objects()) { MachOYAML::FatArch arch; arch.cputype = Slice.getCPUType(); arch.cpusubtype = Slice.getCPUSubType(); arch.offset = Slice.getOffset(); arch.size = Slice.getSize(); arch.align = Slice.getAlign(); arch.reserved = Slice.getReserved(); YAML.FatArchs.push_back(arch); auto SliceObj = Slice.getAsObjectFile(); if (!SliceObj) return SliceObj.takeError(); std::unique_ptr DCtx = DWARFContext::create(*SliceObj.get()); MachODumper Dumper(*SliceObj.get(), std::move(DCtx)); Expected> YAMLObj = Dumper.dump(); if (!YAMLObj) return YAMLObj.takeError(); YAML.Slices.push_back(*YAMLObj.get()); } yaml::Output Yout(Out); Yout << YAML; return Error::success(); } Error macho2yaml(raw_ostream &Out, const object::Binary &Binary) { if (const auto *MachOObj = dyn_cast(&Binary)) return macho2yaml(Out, *MachOObj); if (const auto *MachOObj = dyn_cast(&Binary)) return macho2yaml(Out, *MachOObj); llvm_unreachable("unexpected Mach-O file format"); }