//===- Object.h -------------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef LLVM_TOOLS_OBJCOPY_OBJECT_H #define LLVM_TOOLS_OBJCOPY_OBJECT_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/JamCRC.h" #include #include #include #include #include #include namespace llvm { namespace objcopy { class Buffer; class SectionBase; class Section; class OwnedDataSection; class StringTableSection; class SymbolTableSection; class RelocationSection; class DynamicRelocationSection; class GnuDebugLinkSection; class GroupSection; class SectionIndexSection; class Segment; class Object; struct Symbol; class SectionTableRef { MutableArrayRef> Sections; public: using iterator = pointee_iterator *>; explicit SectionTableRef(MutableArrayRef> Secs) : Sections(Secs) {} SectionTableRef(const SectionTableRef &) = default; iterator begin() { return iterator(Sections.data()); } iterator end() { return iterator(Sections.data() + Sections.size()); } SectionBase *getSection(uint32_t Index, Twine ErrMsg); template T *getSectionOfType(uint32_t Index, Twine IndexErrMsg, Twine TypeErrMsg); }; enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE }; class SectionVisitor { public: virtual ~SectionVisitor(); virtual void visit(const Section &Sec) = 0; virtual void visit(const OwnedDataSection &Sec) = 0; virtual void visit(const StringTableSection &Sec) = 0; virtual void visit(const SymbolTableSection &Sec) = 0; virtual void visit(const RelocationSection &Sec) = 0; virtual void visit(const DynamicRelocationSection &Sec) = 0; virtual void visit(const GnuDebugLinkSection &Sec) = 0; virtual void visit(const GroupSection &Sec) = 0; virtual void visit(const SectionIndexSection &Sec) = 0; }; class SectionWriter : public SectionVisitor { protected: Buffer &Out; public: virtual ~SectionWriter(){}; void visit(const Section &Sec) override; void visit(const OwnedDataSection &Sec) override; void visit(const StringTableSection &Sec) override; void visit(const DynamicRelocationSection &Sec) override; virtual void visit(const SymbolTableSection &Sec) override = 0; virtual void visit(const RelocationSection &Sec) override = 0; virtual void visit(const GnuDebugLinkSection &Sec) override = 0; virtual void visit(const GroupSection &Sec) override = 0; virtual void visit(const SectionIndexSection &Sec) override = 0; explicit SectionWriter(Buffer &Buf) : Out(Buf) {} }; template class ELFSectionWriter : public SectionWriter { private: using Elf_Word = typename ELFT::Word; using Elf_Rel = typename ELFT::Rel; using Elf_Rela = typename ELFT::Rela; public: virtual ~ELFSectionWriter() {} void visit(const SymbolTableSection &Sec) override; void visit(const RelocationSection &Sec) override; void visit(const GnuDebugLinkSection &Sec) override; void visit(const GroupSection &Sec) override; void visit(const SectionIndexSection &Sec) override; explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {} }; #define MAKE_SEC_WRITER_FRIEND \ friend class SectionWriter; \ template friend class ELFSectionWriter; class BinarySectionWriter : public SectionWriter { public: virtual ~BinarySectionWriter() {} void visit(const SymbolTableSection &Sec) override; void visit(const RelocationSection &Sec) override; void visit(const GnuDebugLinkSection &Sec) override; void visit(const GroupSection &Sec) override; void visit(const SectionIndexSection &Sec) override; explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {} }; // The class Buffer abstracts out the common interface of FileOutputBuffer and // WritableMemoryBuffer so that the hierarchy of Writers depends on this // abstract interface and doesn't depend on a particular implementation. // TODO: refactor the buffer classes in LLVM to enable us to use them here // directly. class Buffer { StringRef Name; public: virtual ~Buffer(); virtual void allocate(size_t Size) = 0; virtual uint8_t *getBufferStart() = 0; virtual Error commit() = 0; explicit Buffer(StringRef Name) : Name(Name) {} StringRef getName() const { return Name; } }; class FileBuffer : public Buffer { std::unique_ptr Buf; public: void allocate(size_t Size) override; uint8_t *getBufferStart() override; Error commit() override; explicit FileBuffer(StringRef FileName) : Buffer(FileName) {} }; class MemBuffer : public Buffer { std::unique_ptr Buf; public: void allocate(size_t Size) override; uint8_t *getBufferStart() override; Error commit() override; explicit MemBuffer(StringRef Name) : Buffer(Name) {} std::unique_ptr releaseMemoryBuffer(); }; class Writer { protected: Object &Obj; Buffer &Buf; public: virtual ~Writer(); virtual void finalize() = 0; virtual void write() = 0; Writer(Object &O, Buffer &B) : Obj(O), Buf(B) {} }; template class ELFWriter : public Writer { private: using Elf_Shdr = typename ELFT::Shdr; using Elf_Phdr = typename ELFT::Phdr; using Elf_Ehdr = typename ELFT::Ehdr; void writeEhdr(); void writePhdr(const Segment &Seg); void writeShdr(const SectionBase &Sec); void writePhdrs(); void writeShdrs(); void writeSectionData(); void assignOffsets(); std::unique_ptr> SecWriter; size_t totalSize() const; public: virtual ~ELFWriter() {} bool WriteSectionHeaders = true; void finalize() override; void write() override; ELFWriter(Object &Obj, Buffer &Buf, bool WSH) : Writer(Obj, Buf), WriteSectionHeaders(WSH) {} }; class BinaryWriter : public Writer { private: std::unique_ptr SecWriter; uint64_t TotalSize; public: ~BinaryWriter() {} void finalize() override; void write() override; BinaryWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {} }; class SectionBase { public: StringRef Name; Segment *ParentSegment = nullptr; uint64_t HeaderOffset; uint64_t OriginalOffset = std::numeric_limits::max(); uint32_t Index; bool HasSymbol = false; uint64_t Addr = 0; uint64_t Align = 1; uint32_t EntrySize = 0; uint64_t Flags = 0; uint64_t Info = 0; uint64_t Link = ELF::SHN_UNDEF; uint64_t NameIndex = 0; uint64_t Offset = 0; uint64_t Size = 0; uint64_t Type = ELF::SHT_NULL; ArrayRef OriginalData; virtual ~SectionBase() = default; virtual void initialize(SectionTableRef SecTable); virtual void finalize(); virtual void removeSectionReferences(const SectionBase *Sec); virtual void removeSymbols(function_ref ToRemove); virtual void accept(SectionVisitor &Visitor) const = 0; virtual void markSymbols(); }; class Segment { private: struct SectionCompare { bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const { // Some sections might have the same address if one of them is empty. To // fix this we can use the lexicographic ordering on ->Addr and the // address of the actully stored section. if (Lhs->OriginalOffset == Rhs->OriginalOffset) return Lhs < Rhs; return Lhs->OriginalOffset < Rhs->OriginalOffset; } }; std::set Sections; ArrayRef Contents; public: uint64_t Align; uint64_t FileSize; uint32_t Flags; uint32_t Index; uint64_t MemSize; uint64_t Offset; uint64_t PAddr; uint64_t Type; uint64_t VAddr; uint64_t OriginalOffset; Segment *ParentSegment = nullptr; explicit Segment(ArrayRef Data) : Contents(Data) {} Segment() {} const SectionBase *firstSection() const { if (!Sections.empty()) return *Sections.begin(); return nullptr; } void removeSection(const SectionBase *Sec) { Sections.erase(Sec); } void addSection(const SectionBase *Sec) { Sections.insert(Sec); } }; class Section : public SectionBase { MAKE_SEC_WRITER_FRIEND ArrayRef Contents; SectionBase *LinkSection = nullptr; public: explicit Section(ArrayRef Data) : Contents(Data) {} void accept(SectionVisitor &Visitor) const override; void removeSectionReferences(const SectionBase *Sec) override; void initialize(SectionTableRef SecTable) override; void finalize() override; }; class OwnedDataSection : public SectionBase { MAKE_SEC_WRITER_FRIEND std::vector Data; public: OwnedDataSection(StringRef SecName, ArrayRef Data) : Data(std::begin(Data), std::end(Data)) { Name = SecName; Type = ELF::SHT_PROGBITS; Size = Data.size(); OriginalOffset = std::numeric_limits::max(); } void accept(SectionVisitor &Sec) const override; }; // There are two types of string tables that can exist, dynamic and not dynamic. // In the dynamic case the string table is allocated. Changing a dynamic string // table would mean altering virtual addresses and thus the memory image. So // dynamic string tables should not have an interface to modify them or // reconstruct them. This type lets us reconstruct a string table. To avoid // this class being used for dynamic string tables (which has happened) the // classof method checks that the particular instance is not allocated. This // then agrees with the makeSection method used to construct most sections. class StringTableSection : public SectionBase { MAKE_SEC_WRITER_FRIEND StringTableBuilder StrTabBuilder; public: StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) { Type = ELF::SHT_STRTAB; } void addString(StringRef Name); uint32_t findIndex(StringRef Name) const; void finalize() override; void accept(SectionVisitor &Visitor) const override; static bool classof(const SectionBase *S) { if (S->Flags & ELF::SHF_ALLOC) return false; return S->Type == ELF::SHT_STRTAB; } }; // Symbols have a st_shndx field that normally stores an index but occasionally // stores a different special value. This enum keeps track of what the st_shndx // field means. Most of the values are just copies of the special SHN_* values. // SYMBOL_SIMPLE_INDEX means that the st_shndx is just an index of a section. enum SymbolShndxType { SYMBOL_SIMPLE_INDEX = 0, SYMBOL_ABS = ELF::SHN_ABS, SYMBOL_COMMON = ELF::SHN_COMMON, SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON, SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2, SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4, SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8, SYMBOL_XINDEX = ELF::SHN_XINDEX, }; struct Symbol { uint8_t Binding; SectionBase *DefinedIn = nullptr; SymbolShndxType ShndxType; uint32_t Index; StringRef Name; uint32_t NameIndex; uint64_t Size; uint8_t Type; uint64_t Value; uint8_t Visibility; bool Referenced = false; uint16_t getShndx() const; }; class SectionIndexSection : public SectionBase { MAKE_SEC_WRITER_FRIEND private: std::vector Indexes; SymbolTableSection *Symbols = nullptr; public: virtual ~SectionIndexSection() {} void addIndex(uint32_t Index) { Indexes.push_back(Index); Size += 4; } void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; } void initialize(SectionTableRef SecTable) override; void finalize() override; void accept(SectionVisitor &Visitor) const override; SectionIndexSection() { Name = ".symtab_shndx"; Align = 4; EntrySize = 4; Type = ELF::SHT_SYMTAB_SHNDX; } }; class SymbolTableSection : public SectionBase { MAKE_SEC_WRITER_FRIEND void setStrTab(StringTableSection *StrTab) { SymbolNames = StrTab; } void assignIndices(); protected: std::vector> Symbols; StringTableSection *SymbolNames = nullptr; SectionIndexSection *SectionIndexTable = nullptr; using SymPtr = std::unique_ptr; public: void addSymbol(StringRef Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn, uint64_t Value, uint8_t Visibility, uint16_t Shndx, uint64_t Sz); void prepareForLayout(); // An 'empty' symbol table still contains a null symbol. bool empty() const { return Symbols.size() == 1; } void setShndxTable(SectionIndexSection *ShndxTable) { SectionIndexTable = ShndxTable; } const SectionIndexSection *getShndxTable() const { return SectionIndexTable; } const SectionBase *getStrTab() const { return SymbolNames; } const Symbol *getSymbolByIndex(uint32_t Index) const; Symbol *getSymbolByIndex(uint32_t Index); void updateSymbols(function_ref Callable); void removeSectionReferences(const SectionBase *Sec) override; void initialize(SectionTableRef SecTable) override; void finalize() override; void accept(SectionVisitor &Visitor) const override; void removeSymbols(function_ref ToRemove) override; static bool classof(const SectionBase *S) { return S->Type == ELF::SHT_SYMTAB; } }; struct Relocation { Symbol *RelocSymbol = nullptr; uint64_t Offset; uint64_t Addend; uint32_t Type; }; // All relocation sections denote relocations to apply to another section. // However, some relocation sections use a dynamic symbol table and others use // a regular symbol table. Because the types of the two symbol tables differ in // our system (because they should behave differently) we can't uniformly // represent all relocations with the same base class if we expose an interface // that mentions the symbol table type. So we split the two base types into two // different classes, one which handles the section the relocation is applied to // and another which handles the symbol table type. The symbol table type is // taken as a type parameter to the class (see RelocSectionWithSymtabBase). class RelocationSectionBase : public SectionBase { protected: SectionBase *SecToApplyRel = nullptr; public: const SectionBase *getSection() const { return SecToApplyRel; } void setSection(SectionBase *Sec) { SecToApplyRel = Sec; } static bool classof(const SectionBase *S) { return S->Type == ELF::SHT_REL || S->Type == ELF::SHT_RELA; } }; // Takes the symbol table type to use as a parameter so that we can deduplicate // that code between the two symbol table types. template class RelocSectionWithSymtabBase : public RelocationSectionBase { SymTabType *Symbols = nullptr; void setSymTab(SymTabType *SymTab) { Symbols = SymTab; } protected: RelocSectionWithSymtabBase() = default; public: void removeSectionReferences(const SectionBase *Sec) override; void initialize(SectionTableRef SecTable) override; void finalize() override; }; class RelocationSection : public RelocSectionWithSymtabBase { MAKE_SEC_WRITER_FRIEND std::vector Relocations; public: void addRelocation(Relocation Rel) { Relocations.push_back(Rel); } void accept(SectionVisitor &Visitor) const override; void removeSymbols(function_ref ToRemove) override; void markSymbols() override; static bool classof(const SectionBase *S) { if (S->Flags & ELF::SHF_ALLOC) return false; return S->Type == ELF::SHT_REL || S->Type == ELF::SHT_RELA; } }; // TODO: The way stripping and groups interact is complicated // and still needs to be worked on. class GroupSection : public SectionBase { MAKE_SEC_WRITER_FRIEND const SymbolTableSection *SymTab = nullptr; Symbol *Sym = nullptr; ELF::Elf32_Word FlagWord; SmallVector GroupMembers; public: // TODO: Contents is present in several classes of the hierarchy. // This needs to be refactored to avoid duplication. ArrayRef Contents; explicit GroupSection(ArrayRef Data) : Contents(Data) {} void setSymTab(const SymbolTableSection *SymTabSec) { SymTab = SymTabSec; } void setSymbol(Symbol *S) { Sym = S; } void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; } void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); } void initialize(SectionTableRef SecTable) override{}; void accept(SectionVisitor &) const override; void finalize() override; void removeSymbols(function_ref ToRemove) override; void markSymbols() override; static bool classof(const SectionBase *S) { return S->Type == ELF::SHT_GROUP; } }; class DynamicSymbolTableSection : public Section { public: explicit DynamicSymbolTableSection(ArrayRef Data) : Section(Data) {} static bool classof(const SectionBase *S) { return S->Type == ELF::SHT_DYNSYM; } }; class DynamicSection : public Section { public: explicit DynamicSection(ArrayRef Data) : Section(Data) {} static bool classof(const SectionBase *S) { return S->Type == ELF::SHT_DYNAMIC; } }; class DynamicRelocationSection : public RelocSectionWithSymtabBase { MAKE_SEC_WRITER_FRIEND private: ArrayRef Contents; public: explicit DynamicRelocationSection(ArrayRef Data) : Contents(Data) {} void accept(SectionVisitor &) const override; static bool classof(const SectionBase *S) { if (!(S->Flags & ELF::SHF_ALLOC)) return false; return S->Type == ELF::SHT_REL || S->Type == ELF::SHT_RELA; } }; class GnuDebugLinkSection : public SectionBase { MAKE_SEC_WRITER_FRIEND private: StringRef FileName; uint32_t CRC32; void init(StringRef File, StringRef Data); public: // If we add this section from an external source we can use this ctor. explicit GnuDebugLinkSection(StringRef File); void accept(SectionVisitor &Visitor) const override; }; class Reader { public: virtual ~Reader(); virtual std::unique_ptr create() const = 0; }; using object::Binary; using object::ELFFile; using object::ELFObjectFile; using object::OwningBinary; template class ELFBuilder { private: using Elf_Addr = typename ELFT::Addr; using Elf_Shdr = typename ELFT::Shdr; using Elf_Ehdr = typename ELFT::Ehdr; using Elf_Word = typename ELFT::Word; const ELFFile &ElfFile; Object &Obj; void setParentSegment(Segment &Child); void readProgramHeaders(); void initGroupSection(GroupSection *GroupSec); void initSymbolTable(SymbolTableSection *SymTab); void readSectionHeaders(); SectionBase &makeSection(const Elf_Shdr &Shdr); public: ELFBuilder(const ELFObjectFile &ElfObj, Object &Obj) : ElfFile(*ElfObj.getELFFile()), Obj(Obj) {} void build(); }; class ELFReader : public Reader { Binary *Bin; public: ElfType getElfType() const; std::unique_ptr create() const override; explicit ELFReader(Binary *B) : Bin(B){}; }; class Object { private: using SecPtr = std::unique_ptr; using SegPtr = std::unique_ptr; std::vector Sections; std::vector Segments; public: template using Range = iterator_range< pointee_iterator>::iterator>>; template using ConstRange = iterator_range>::const_iterator>>; // It is often the case that the ELF header and the program header table are // not present in any segment. This could be a problem during file layout, // because other segments may get assigned an offset where either of the // two should reside, which will effectively corrupt the resulting binary. // Other than that we use these segments to track program header offsets // when they may not follow the ELF header. Segment ElfHdrSegment; Segment ProgramHdrSegment; uint8_t Ident[16]; uint64_t Entry; uint64_t SHOffset; uint32_t Type; uint32_t Machine; uint32_t Version; uint32_t Flags; StringTableSection *SectionNames = nullptr; SymbolTableSection *SymbolTable = nullptr; SectionIndexSection *SectionIndexTable = nullptr; void sortSections(); SectionTableRef sections() { return SectionTableRef(Sections); } ConstRange sections() const { return make_pointee_range(Sections); } Range segments() { return make_pointee_range(Segments); } ConstRange segments() const { return make_pointee_range(Segments); } void removeSections(std::function ToRemove); void removeSymbols(function_ref ToRemove); template T &addSection(Ts &&... Args) { auto Sec = llvm::make_unique(std::forward(Args)...); auto Ptr = Sec.get(); Sections.emplace_back(std::move(Sec)); return *Ptr; } Segment &addSegment(ArrayRef Data) { Segments.emplace_back(llvm::make_unique(Data)); return *Segments.back(); } }; } // end namespace objcopy } // end namespace llvm #endif // LLVM_TOOLS_OBJCOPY_OBJECT_H