From 168234fec64a2d25e2c4e472c79331638f54d3c2 Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Wed, 21 Oct 2020 16:11:50 +0100 Subject: [PATCH] [llvm-ar][Object] Fix detection of need for 64-bit archive symbol tables The code to detect the requirement for 64-bit offsets in the archive symbol table was not correctly accounting for the archive file signature and the size of all the contents of the symbol table itself, e.g. the symbol table's header and string table. Also was not considering the variation in symbol table formats. This could result in the creation of large archives with a corrupt symbol table. Change the testing environment variable SYM64_THRESHOLD to be an absolute value rather than a power of 2 in order to enable precise testing of this detection code. Differential Revision: https://reviews.llvm.org/D89891 --- lib/Object/ArchiveWriter.cpp | 88 +++++++++++++++++++++------------ test/Object/archive-symtab.test | 16 ++++-- 2 files changed, 69 insertions(+), 35 deletions(-) diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index 08f1b85f225..ce997464caa 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -286,6 +286,42 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, print(Out, Kind, Val); } +static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, + uint64_t NumSyms, uint64_t OffsetSize, + StringRef StringTable, + uint32_t *Padding = nullptr) { + assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize"); + uint64_t Size = OffsetSize; // Number of entries + if (isBSDLike(Kind)) + Size += NumSyms * OffsetSize * 2; // Table + else + Size += NumSyms * OffsetSize; // Table + if (isBSDLike(Kind)) + Size += OffsetSize; // byte count + Size += StringTable.size(); + // ld64 expects the members to be 8-byte aligned for 64-bit content and at + // least 4-byte aligned for 32-bit content. Opt for the larger encoding + // uniformly. + // We do this for all bsd formats because it simplifies aligning members. + uint32_t Pad = offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); + Size += Pad; + if (Padding) + *Padding = Pad; + return Size; +} + +static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, + bool Deterministic, uint64_t Size) { + if (isBSDLike(Kind)) { + const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF"; + printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0, + Size); + } else { + const char *Name = is64BitKind(Kind) ? "/SYM64" : ""; + printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size); + } +} + static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, bool Deterministic, ArrayRef Members, StringRef StringTable) { @@ -298,33 +334,10 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, for (const MemberData &M : Members) NumSyms += M.Symbols.size(); - unsigned Size = 0; - unsigned OffsetSize = is64BitKind(Kind) ? sizeof(uint64_t) : sizeof(uint32_t); - - Size += OffsetSize; // Number of entries - if (isBSDLike(Kind)) - Size += NumSyms * OffsetSize * 2; // Table - else - Size += NumSyms * OffsetSize; // Table - if (isBSDLike(Kind)) - Size += OffsetSize; // byte count - Size += StringTable.size(); - // ld64 expects the members to be 8-byte aligned for 64-bit content and at - // least 4-byte aligned for 32-bit content. Opt for the larger encoding - // uniformly. - // We do this for all bsd formats because it simplifies aligning members. - const Align Alignment(isBSDLike(Kind) ? 8 : 2); - unsigned Pad = offsetToAlignment(Size, Alignment); - Size += Pad; - - if (isBSDLike(Kind)) { - const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF"; - printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0, - Size); - } else { - const char *Name = is64BitKind(Kind) ? "/SYM64" : ""; - printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size); - } + uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4; + uint32_t Pad; + uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad); + writeSymbolTableHeader(Out, Kind, Deterministic, Size); uint64_t Pos = Out.tell() + Size; @@ -579,17 +592,28 @@ static Error writeArchiveToStream(raw_ostream &Out, // We would like to detect if we need to switch to a 64-bit symbol table. if (WriteSymtab) { - uint64_t MaxOffset = 0; + uint64_t MaxOffset = 8; // For the file signature. uint64_t LastOffset = MaxOffset; + uint64_t NumSyms = 0; for (const auto &M : Data) { // Record the start of the member's offset LastOffset = MaxOffset; // Account for the size of each part associated with the member. MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size(); - // We assume 32-bit symbols to see if 32-bit symbols are possible or not. - MaxOffset += M.Symbols.size() * 4; + NumSyms += M.Symbols.size(); } + // We assume 32-bit offsets to see if 32-bit symbols are possible or not. + uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf); + auto computeSymbolTableHeaderSize = + [=] { + SmallString<0> TmpBuf; + raw_svector_ostream Tmp(TmpBuf); + writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize); + return TmpBuf.size(); + }; + LastOffset += computeSymbolTableHeaderSize() + SymtabSize; + // The SYM64 format is used when an archive's member offsets are larger than // 32-bits can hold. The need for this shift in format is detected by // writeArchive. To test this we need to generate a file with a member that @@ -597,15 +621,15 @@ static Error writeArchiveToStream(raw_ostream &Out, // speed the test up we use this environment variable to pretend like the // cutoff happens before 32-bits and instead happens at some much smaller // value. + uint64_t Sym64Threshold = 1ULL << 32; const char *Sym64Env = std::getenv("SYM64_THRESHOLD"); - int Sym64Threshold = 32; if (Sym64Env) StringRef(Sym64Env).getAsInteger(10, Sym64Threshold); // If LastOffset isn't going to fit in a 32-bit varible we need to switch // to 64-bit. Note that the file can be larger than 4GB as long as the last // member starts before the 4GB offset. - if (LastOffset >= (1ULL << Sym64Threshold)) { + if (LastOffset >= Sym64Threshold) { if (Kind == object::Archive::K_DARWIN) Kind = object::Archive::K_DARWIN64; else diff --git a/test/Object/archive-symtab.test b/test/Object/archive-symtab.test index d77244a0e7a..31cd849d19d 100644 --- a/test/Object/archive-symtab.test +++ b/test/Object/archive-symtab.test @@ -51,9 +51,14 @@ Symbols: # RUN: llvm-nm -M %t.a | FileCheck %s # RUN: rm -f %t.a -# RUN: env SYM64_THRESHOLD=1 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64 +# RUN: env SYM64_THRESHOLD=836 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64 # RUN: llvm-nm -M %t.a | FileCheck %s -# RUXX: grep SYM64 %t.a +# RUN: grep '/SYM64/' %t.a + +# RUN: rm -f %t.a +# RUN: env SYM64_THRESHOLD=837 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64 +# RUN: llvm-nm -M %t.a | FileCheck %s +# RUN: not grep '/SYM64/' %t.a # CHECK: Archive map # CHECK-NEXT: main in {{.*}}.elf-x86-64 @@ -136,10 +141,15 @@ Symbols: # RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s # RUN: rm -f %t.a -# RUN: env SYM64_THRESHOLD=1 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64 +# RUN: env SYM64_THRESHOLD=784 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64 # RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s # RUN: grep '__\.SYMDEF_64' %t.a +# RUN: rm -f %t.a +# RUN: env SYM64_THRESHOLD=785 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64 +# RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s +# RUN: not grep '__\.SYMDEF_64' %t.a + # MACHO: Archive map # MACHO-NEXT: _main in trivial-object-test.macho-x86-64 # MACHO-NEXT: _foo in trivial-object-test2.macho-x86-64