From dbb5894a2520b37ff1c91a5c399fba67846ccc6d Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 20 Jun 2019 00:29:40 +0000 Subject: [PATCH] [llvm-objdump] Switch between ARM/Thumb based on mapping symbols. The ARMDisassembler changes allow changing between ARM and Thumb mode based on the MCSubtargetInfo, rather than the Target, which simplifies the other changes a bit. I'm not really happy with adding more target-specific logic to tools/llvm-objdump/, but there isn't any easy way around it: the logic in question specifically applies to disassembling an object file, and that code simply isn't located in lib/Target, at least at the moment. Differential Revision: https://reviews.llvm.org/D60927 llvm-svn: 363903 --- include/llvm/Object/ELFObjectFile.h | 3 +- .../ARM/Disassembler/ARMDisassembler.cpp | 59 +++++----- test/CodeGen/ARM/inlineasm-switch-mode.ll | 19 +-- test/tools/llvm-objdump/ARM/v7r-subfeatures.s | 8 +- tools/llvm-objdump/llvm-objdump.cpp | 109 +++++++++++++----- 5 files changed, 120 insertions(+), 78 deletions(-) diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h index 30618950af5..86c015efd70 100644 --- a/include/llvm/Object/ELFObjectFile.h +++ b/include/llvm/Object/ELFObjectFile.h @@ -54,7 +54,6 @@ class ELFObjectFileBase : public ObjectFile { protected: ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source); - virtual uint16_t getEMachine() const = 0; virtual uint64_t getSymbolSize(DataRefImpl Symb) const = 0; virtual uint8_t getSymbolBinding(DataRefImpl Symb) const = 0; virtual uint8_t getSymbolOther(DataRefImpl Symb) const = 0; @@ -91,6 +90,8 @@ public: virtual uint16_t getEType() const = 0; + virtual uint16_t getEMachine() const = 0; + std::vector> getPltAddresses() const; }; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 048b8c68db8..20670e16446 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -135,27 +135,22 @@ public: ~ARMDisassembler() override = default; - DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, - ArrayRef Bytes, uint64_t Address, - raw_ostream &VStream, - raw_ostream &CStream) const override; -}; - -/// Thumb disassembler for all Thumb platforms. -class ThumbDisassembler : public MCDisassembler { -public: - ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : - MCDisassembler(STI, Ctx) { - } - - ~ThumbDisassembler() override = default; - DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const override; private: + DecodeStatus getARMInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const; + + DecodeStatus getThumbInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const; + mutable ITStatus ITBlock; mutable VPTStatus VPTBlock; @@ -519,12 +514,6 @@ static MCDisassembler *createARMDisassembler(const Target &T, return new ARMDisassembler(STI, Ctx); } -static MCDisassembler *createThumbDisassembler(const Target &T, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new ThumbDisassembler(STI, Ctx); -} - // Post-decoding checks static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size, uint64_t Address, raw_ostream &OS, @@ -562,6 +551,16 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &OS, raw_ostream &CS) const { + if (STI.getFeatureBits()[ARM::ModeThumb]) + return getThumbInstruction(MI, Size, Bytes, Address, OS, CS); + return getARMInstruction(MI, Size, Bytes, Address, OS, CS); +} + +DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &OS, + raw_ostream &CS) const { CommentStream = &CS; assert(!STI.getFeatureBits()[ARM::ModeThumb] && @@ -698,7 +697,7 @@ static bool isVectorPredicable(unsigned Opcode) { // to fix up the predicate operands using this context information as a // post-pass. MCDisassembler::DecodeStatus -ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { +ARMDisassembler::AddThumbPredicate(MCInst &MI) const { MCDisassembler::DecodeStatus S = Success; const FeatureBitset &FeatureBits = getSubtargetInfo().getFeatureBits(); @@ -813,7 +812,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { // mode, the auto-generated decoder will give them an (incorrect) // predicate operand. We need to rewrite these operands based on the IT // context as a post-pass. -void ThumbDisassembler::UpdateThumbVFPPredicate( +void ARMDisassembler::UpdateThumbVFPPredicate( DecodeStatus &S, MCInst &MI) const { unsigned CC; CC = ITBlock.getITCC(); @@ -844,11 +843,11 @@ void ThumbDisassembler::UpdateThumbVFPPredicate( } } -DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, - ArrayRef Bytes, - uint64_t Address, - raw_ostream &OS, - raw_ostream &CS) const { +DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &OS, + raw_ostream &CS) const { CommentStream = &CS; assert(STI.getFeatureBits()[ARM::ModeThumb] && @@ -1046,9 +1045,9 @@ extern "C" void LLVMInitializeARMDisassembler() { TargetRegistry::RegisterMCDisassembler(getTheARMBETarget(), createARMDisassembler); TargetRegistry::RegisterMCDisassembler(getTheThumbLETarget(), - createThumbDisassembler); + createARMDisassembler); TargetRegistry::RegisterMCDisassembler(getTheThumbBETarget(), - createThumbDisassembler); + createARMDisassembler); } static const uint16_t GPRDecoderTable[] = { diff --git a/test/CodeGen/ARM/inlineasm-switch-mode.ll b/test/CodeGen/ARM/inlineasm-switch-mode.ll index 6035612788d..c0d6492f109 100644 --- a/test/CodeGen/ARM/inlineasm-switch-mode.ll +++ b/test/CodeGen/ARM/inlineasm-switch-mode.ll @@ -1,7 +1,4 @@ -;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj > %t -; Two pass decoding needed because llvm-objdump does not respect mapping symbols -;RUN: llvm-objdump -triple=armv7 -d %t | FileCheck %s --check-prefix=ARM -;RUN: llvm-objdump -triple=thumbv7 -d %t | FileCheck %s --check-prefix=THUMB +;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj | llvm-objdump -d - | FileCheck %s define hidden i32 @bah(i8* %start) #0 align 2 { %1 = ptrtoint i8* %start to i32 @@ -10,13 +7,7 @@ define hidden i32 @bah(i8* %start) #0 align 2 { ret i32 %3 } -; ARM: $a -; ARM-NEXT: 04 70 2d e5 str r7, [sp, #-4]! -; ARM: $t -; ARM-NEXT: 48 1c - -; THUMB: $a{{.*}}: -; THUMB-NEXT: 04 70 -; THUMB-NEXT: 2d e5 -; THUMB: $t{{.*}}: -; THUMB-NEXT: 48 1c adds r0, r1, #1 +; CHECK: $a{{.*}}: +; CHECK-NEXT: 04 70 2d e5 str r7, [sp, #-4]! +; CHECK: $t{{.*}}: +; CHECK-NEXT: 48 1c adds r0, r1, #1 diff --git a/test/tools/llvm-objdump/ARM/v7r-subfeatures.s b/test/tools/llvm-objdump/ARM/v7r-subfeatures.s index bbe40a19387..78ec0ac61a7 100644 --- a/test/tools/llvm-objdump/ARM/v7r-subfeatures.s +++ b/test/tools/llvm-objdump/ARM/v7r-subfeatures.s @@ -1,5 +1,6 @@ -@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -triple=thumb -d - | FileCheck %s -@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -triple=arm -d - | FileCheck %s --check-prefix=CHECK-ARM +@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -d - | FileCheck %s +@ v7r implies Thumb hwdiv, but ARM hwdiv is optional +@ FIXME: Does that imply we should actually refuse to disassemble it? .eabi_attribute Tag_CPU_arch, 10 // v7 .eabi_attribute Tag_CPU_arch_profile, 0x52 // 'R' profile @@ -9,8 +10,7 @@ div_arm: udiv r0, r1, r2 @CHECK-LABEL: div_arm -@CHECK-NOT: udiv r0, r1, r2 -@CHECK-ARM-NOT: udiv r0, r1, r2 +@CHECK: 11 f2 30 e7 .thumb div_thumb: diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 22167f15e14..4e841827892 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -603,13 +603,18 @@ void SourcePrinter::printSourceLine(raw_ostream &OS, OldLineInfo = LineInfo; } +static bool isAArch64Elf(const ObjectFile *Obj) { + const auto *Elf = dyn_cast(Obj); + return Elf && Elf->getEMachine() == ELF::EM_AARCH64; +} + static bool isArmElf(const ObjectFile *Obj) { - return (Obj->isELF() && - (Obj->getArch() == Triple::aarch64 || - Obj->getArch() == Triple::aarch64_be || - Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb || - Obj->getArch() == Triple::thumb || - Obj->getArch() == Triple::thumbeb)); + const auto *Elf = dyn_cast(Obj); + return Elf && Elf->getEMachine() == ELF::EM_ARM; +} + +static bool hasMappingSymbols(const ObjectFile *Obj) { + return isArmElf(Obj) || isAArch64Elf(Obj); } static void printRelocation(const RelocationRef &Rel, uint64_t Address, @@ -954,10 +959,24 @@ static bool shouldAdjustVA(const SectionRef &Section) { return false; } + +typedef std::pair MappingSymbolPair; +static char getMappingSymbolKind(ArrayRef MappingSymbols, + uint64_t Address) { + auto Sym = bsearch(MappingSymbols, [Address](const MappingSymbolPair &Val) { + return Val.first > Address; + }); + // Return zero for any address before the first mapping symbol; this means + // we should use the default disassembly mode, depending on the target. + if (Sym == MappingSymbols.begin()) + return '\x00'; + return (Sym - 1)->second; +} + static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, const ObjectFile *Obj, ArrayRef Bytes, - const std::vector &TextMappingSymsAddr) { + ArrayRef MappingSymbols) { support::endianness Endian = Obj->isLittleEndian() ? support::little : support::big; while (Index < End) { @@ -981,8 +1000,7 @@ dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, ++Index; } outs() << "\n"; - if (std::binary_search(TextMappingSymsAddr.begin(), - TextMappingSymsAddr.end(), Index)) + if (getMappingSymbolKind(MappingSymbols, Index) != 'd') break; } return Index; @@ -1023,10 +1041,19 @@ static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, } static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, - MCContext &Ctx, MCDisassembler *DisAsm, + MCContext &Ctx, MCDisassembler *PrimaryDisAsm, + MCDisassembler *SecondaryDisAsm, const MCInstrAnalysis *MIA, MCInstPrinter *IP, - const MCSubtargetInfo *STI, PrettyPrinter &PIP, + const MCSubtargetInfo *PrimarySTI, + const MCSubtargetInfo *SecondarySTI, + PrettyPrinter &PIP, SourcePrinter &SP, bool InlineRelocs) { + const MCSubtargetInfo *STI = PrimarySTI; + MCDisassembler *DisAsm = PrimaryDisAsm; + bool PrimaryIsThumb = false; + if (isArmElf(Obj)) + PrimaryIsThumb = STI->checkFeatures("+thumb-mode"); + std::map> RelocMap; if (InlineRelocs) RelocMap = getRelocsMap(*Obj); @@ -1113,25 +1140,23 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // Get the list of all the symbols in this section. SectionSymbolsTy &Symbols = AllSymbols[Section]; - std::vector DataMappingSymsAddr; - std::vector TextMappingSymsAddr; - if (isArmElf(Obj)) { + std::vector MappingSymbols; + if (hasMappingSymbols(Obj)) { for (const auto &Symb : Symbols) { uint64_t Address = std::get<0>(Symb); StringRef Name = std::get<1>(Symb); if (Name.startswith("$d")) - DataMappingSymsAddr.push_back(Address - SectionAddr); + MappingSymbols.emplace_back(Address - SectionAddr, 'd'); if (Name.startswith("$x")) - TextMappingSymsAddr.push_back(Address - SectionAddr); + MappingSymbols.emplace_back(Address - SectionAddr, 'x'); if (Name.startswith("$a")) - TextMappingSymsAddr.push_back(Address - SectionAddr); + MappingSymbols.emplace_back(Address - SectionAddr, 'a'); if (Name.startswith("$t")) - TextMappingSymsAddr.push_back(Address - SectionAddr); + MappingSymbols.emplace_back(Address - SectionAddr, 't'); } } - llvm::sort(DataMappingSymsAddr); - llvm::sort(TextMappingSymsAddr); + llvm::sort(MappingSymbols); if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { // AMDGPU disassembler uses symbolizer for printing labels @@ -1269,19 +1294,18 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, Index = End; } - bool CheckARMELFData = isArmElf(Obj) && + bool CheckARMELFData = hasMappingSymbols(Obj) && std::get<2>(Symbols[SI]) != ELF::STT_OBJECT && !DisassembleAll; while (Index < End) { - // AArch64 ELF binaries can interleave data and text in the same - // section. We rely on the markers introduced to understand what we - // need to dump. If the data marker is within a function, it is + // ARM and AArch64 ELF binaries can interleave data and text in the + // same section. We rely on the markers introduced to understand what + // we need to dump. If the data marker is within a function, it is // denoted as a word/short etc. if (CheckARMELFData && - std::binary_search(DataMappingSymsAddr.begin(), - DataMappingSymsAddr.end(), Index)) { + getMappingSymbolKind(MappingSymbols, Index) == 'd') { Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes, - TextMappingSymsAddr); + MappingSymbols); continue; } @@ -1302,6 +1326,16 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } } + if (SecondarySTI) { + if (getMappingSymbolKind(MappingSymbols, Index) == 'a') { + STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI; + DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm; + } else if (getMappingSymbolKind(MappingSymbols, Index) == 't') { + STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI; + DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm; + } + } + // Disassemble a real instruction or a data when disassemble all is // provided MCInst Inst; @@ -1459,6 +1493,22 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { report_error(Obj->getFileName(), "no disassembler for target " + TripleName); + // If we have an ARM object file, we need a second disassembler, because + // ARM CPUs have two different instruction sets: ARM mode, and Thumb mode. + // We use mapping symbols to switch between the two assemblers, where + // appropriate. + std::unique_ptr SecondaryDisAsm; + std::unique_ptr SecondarySTI; + if (isArmElf(Obj) && !STI->checkFeatures("+mclass")) { + if (STI->checkFeatures("+thumb-mode")) + Features.AddFeature("-thumb-mode"); + else + Features.AddFeature("+thumb-mode"); + SecondarySTI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU, + Features.getString())); + SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx)); + } + std::unique_ptr MIA( TheTarget->createMCInstrAnalysis(MII.get())); @@ -1477,8 +1527,9 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { if (!IP->applyTargetSpecificCLOption(Opt)) error("Unrecognized disassembler option: " + Opt); - disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), MIA.get(), IP.get(), - STI.get(), PIP, SP, InlineRelocs); + disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(), + MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP, + SP, InlineRelocs); } void printRelocations(const ObjectFile *Obj) {