1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/lib/MC/MCExpr.cpp

1023 lines
36 KiB
C++
Raw Normal View History

//===- MCExpr.cpp - Assembly Level Expression Implementation --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCExpr.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Config/llvm-config.h"
[RISCV] Add symbol diff relocation support for RISC-V For RISC-V it is desirable to have relaxation happen in the linker once addresses are known, and as such the size between two instructions/byte sequences in a section could change. For most assembler expressions, this is fine, as the absolute address results in the expression being converted to a fixup, and finally relocations. However, for expressions such as .quad .L2-.L1, the assembler folds this down to a constant once fragments are laid out, under the assumption that the difference can no longer change, although in the case of linker relaxation the differences can change at link time, so the constant is incorrect. One place where this commonly appears is in debug information, where the size of a function expression is in a form similar to the above. This patch extends the assembler to allow an AsmBackend to declare that it does not want the assembler to fold down this expression, and instead generate a pair of relocations that allow the linker to carry out the calculation. In this case, the expression is not folded, but when it comes to emitting a fixup, the generic FK_Data_* fixups are converted into a pair, one for the addition half, one for the subtraction, and this is passed to the relocation generating methods as usual. I have named these FK_Data_Add_* and FK_Data_Sub_* to indicate which half these are for. For RISC-V, which supports this via e.g. the R_RISCV_ADD64, R_RISCV_SUB64 pair of relocations, these are also set to always emit relocations relative to local symbols rather than section offsets. This is to deal with the fact that if relocations were calculated on e.g. .text+8 and .text+4, the result 12 would be stored rather than 4 as both addends are added in the linker. Differential Revision: https://reviews.llvm.org/D45181 Patch by Simon Cook. llvm-svn: 333079
2018-05-23 14:36:18 +02:00
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
using namespace llvm;
#define DEBUG_TYPE "mcexpr"
namespace {
namespace stats {
STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations");
} // end namespace stats
} // end anonymous namespace
[mips] Use MipsMCExpr instead of MCSymbolRefExpr for all relocations. Summary: This is much closer to the way MIPS relocation expressions work (%hi(foo + 2) rather than %hi(foo) + 2) and removes the need for the various bodges in MipsAsmParser::evaluateRelocExpr(). Removing those bodges ensures that the constant stored in MCValue is the full 32 or 64-bit (depending on ABI) offset from the symbol. This will be used to correct the %hi/%lo matching needed to sort the relocation table correctly. As part of this: * Gave MCExpr::print() the ability to omit parenthesis when emitting a symbol reference inside a MipsMCExpr operator like %hi(X). Without this we print things like %lo(($L1)). * %hi(%neg(%gprel(X))) is now three MipsMCExpr's instead of one. Most of the related special cases have been removed or moved to MipsMCExpr. We can remove the rest as we gain support for the less common relocations when they are not part of this specific combination. * Renamed MipsMCExpr::VariantKind and the enum prefix ('VK_') to avoid confusion with MCSymbolRefExpr::VariantKind and its prefix (also 'VK_'). * fixup_Mips_GOT_Local and fixup_Mips_GOT_Global were found to be identical and merged into fixup_Mips_GOT. * MO_GOT16 and MO_GOT turned out to be identical and have been merged into MO_GOT. * VK_Mips_GOT and VK_Mips_GOT16 turned out to be the same thing so they have been merged into MEK_GOT Reviewers: sdardis Subscribers: dsanders, sdardis, llvm-commits Differential Revision: http://reviews.llvm.org/D19716 llvm-svn: 268379
2016-05-03 15:35:44 +02:00
void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
switch (getKind()) {
case MCExpr::Target:
return cast<MCTargetExpr>(this)->printImpl(OS, MAI);
case MCExpr::Constant: {
auto Value = cast<MCConstantExpr>(*this).getValue();
auto PrintInHex = cast<MCConstantExpr>(*this).useHexFormat();
auto SizeInBytes = cast<MCConstantExpr>(*this).getSizeInBytes();
if (Value < 0 && MAI && !MAI->supportsSignedData())
PrintInHex = true;
if (PrintInHex)
switch (SizeInBytes) {
default:
OS << "0x" << Twine::utohexstr(Value);
break;
case 1:
OS << format("0x%02" PRIx64, Value);
break;
case 2:
OS << format("0x%04" PRIx64, Value);
break;
case 4:
OS << format("0x%08" PRIx64, Value);
break;
case 8:
OS << format("0x%016" PRIx64, Value);
break;
}
else
OS << Value;
return;
}
case MCExpr::SymbolRef: {
const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
const MCSymbol &Sym = SRE.getSymbol();
// Parenthesize names that start with $ so that they don't look like
// absolute names.
[mips] Use MipsMCExpr instead of MCSymbolRefExpr for all relocations. Summary: This is much closer to the way MIPS relocation expressions work (%hi(foo + 2) rather than %hi(foo) + 2) and removes the need for the various bodges in MipsAsmParser::evaluateRelocExpr(). Removing those bodges ensures that the constant stored in MCValue is the full 32 or 64-bit (depending on ABI) offset from the symbol. This will be used to correct the %hi/%lo matching needed to sort the relocation table correctly. As part of this: * Gave MCExpr::print() the ability to omit parenthesis when emitting a symbol reference inside a MipsMCExpr operator like %hi(X). Without this we print things like %lo(($L1)). * %hi(%neg(%gprel(X))) is now three MipsMCExpr's instead of one. Most of the related special cases have been removed or moved to MipsMCExpr. We can remove the rest as we gain support for the less common relocations when they are not part of this specific combination. * Renamed MipsMCExpr::VariantKind and the enum prefix ('VK_') to avoid confusion with MCSymbolRefExpr::VariantKind and its prefix (also 'VK_'). * fixup_Mips_GOT_Local and fixup_Mips_GOT_Global were found to be identical and merged into fixup_Mips_GOT. * MO_GOT16 and MO_GOT turned out to be identical and have been merged into MO_GOT. * VK_Mips_GOT and VK_Mips_GOT16 turned out to be the same thing so they have been merged into MEK_GOT Reviewers: sdardis Subscribers: dsanders, sdardis, llvm-commits Differential Revision: http://reviews.llvm.org/D19716 llvm-svn: 268379
2016-05-03 15:35:44 +02:00
bool UseParens =
!InParens && !Sym.getName().empty() && Sym.getName()[0] == '$';
if (UseParens) {
OS << '(';
Sym.print(OS, MAI);
OS << ')';
} else
Sym.print(OS, MAI);
const MCSymbolRefExpr::VariantKind Kind = SRE.getKind();
if (Kind != MCSymbolRefExpr::VK_None) {
if (MAI && MAI->useParensForSymbolVariant()) // ARM
OS << '(' << MCSymbolRefExpr::getVariantKindName(Kind) << ')';
else
OS << '@' << MCSymbolRefExpr::getVariantKindName(Kind);
}
return;
}
case MCExpr::Unary: {
const MCUnaryExpr &UE = cast<MCUnaryExpr>(*this);
switch (UE.getOpcode()) {
case MCUnaryExpr::LNot: OS << '!'; break;
case MCUnaryExpr::Minus: OS << '-'; break;
case MCUnaryExpr::Not: OS << '~'; break;
case MCUnaryExpr::Plus: OS << '+'; break;
}
bool Binary = UE.getSubExpr()->getKind() == MCExpr::Binary;
if (Binary) OS << "(";
UE.getSubExpr()->print(OS, MAI);
if (Binary) OS << ")";
return;
}
case MCExpr::Binary: {
const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this);
// Only print parens around the LHS if it is non-trivial.
if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) {
BE.getLHS()->print(OS, MAI);
} else {
OS << '(';
BE.getLHS()->print(OS, MAI);
OS << ')';
}
switch (BE.getOpcode()) {
case MCBinaryExpr::Add:
// Print "X-42" instead of "X+-42".
if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
if (RHSC->getValue() < 0) {
OS << RHSC->getValue();
return;
}
}
OS << '+';
break;
case MCBinaryExpr::AShr: OS << ">>"; break;
case MCBinaryExpr::And: OS << '&'; break;
case MCBinaryExpr::Div: OS << '/'; break;
case MCBinaryExpr::EQ: OS << "=="; break;
case MCBinaryExpr::GT: OS << '>'; break;
case MCBinaryExpr::GTE: OS << ">="; break;
case MCBinaryExpr::LAnd: OS << "&&"; break;
case MCBinaryExpr::LOr: OS << "||"; break;
case MCBinaryExpr::LShr: OS << ">>"; break;
case MCBinaryExpr::LT: OS << '<'; break;
case MCBinaryExpr::LTE: OS << "<="; break;
case MCBinaryExpr::Mod: OS << '%'; break;
case MCBinaryExpr::Mul: OS << '*'; break;
case MCBinaryExpr::NE: OS << "!="; break;
case MCBinaryExpr::Or: OS << '|'; break;
case MCBinaryExpr::OrNot: OS << '!'; break;
case MCBinaryExpr::Shl: OS << "<<"; break;
case MCBinaryExpr::Sub: OS << '-'; break;
case MCBinaryExpr::Xor: OS << '^'; break;
}
// Only print parens around the LHS if it is non-trivial.
if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
BE.getRHS()->print(OS, MAI);
} else {
OS << '(';
BE.getRHS()->print(OS, MAI);
OS << ')';
}
return;
}
}
llvm_unreachable("Invalid expression kind!");
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MCExpr::dump() const {
dbgs() << *this;
dbgs() << '\n';
}
#endif
/* *** */
const MCBinaryExpr *MCBinaryExpr::create(Opcode Opc, const MCExpr *LHS,
const MCExpr *RHS, MCContext &Ctx,
SMLoc Loc) {
return new (Ctx) MCBinaryExpr(Opc, LHS, RHS, Loc);
}
const MCUnaryExpr *MCUnaryExpr::create(Opcode Opc, const MCExpr *Expr,
MCContext &Ctx, SMLoc Loc) {
return new (Ctx) MCUnaryExpr(Opc, Expr, Loc);
}
const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx,
bool PrintInHex,
unsigned SizeInBytes) {
return new (Ctx) MCConstantExpr(Value, PrintInHex, SizeInBytes);
}
/* *** */
MCSymbolRefExpr::MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind,
const MCAsmInfo *MAI, SMLoc Loc)
: MCExpr(MCExpr::SymbolRef, Loc,
encodeSubclassData(Kind, MAI->hasSubsectionsViaSymbols())),
Symbol(Symbol) {
assert(Symbol);
}
const MCSymbolRefExpr *MCSymbolRefExpr::create(const MCSymbol *Sym,
VariantKind Kind,
MCContext &Ctx, SMLoc Loc) {
return new (Ctx) MCSymbolRefExpr(Sym, Kind, Ctx.getAsmInfo(), Loc);
}
const MCSymbolRefExpr *MCSymbolRefExpr::create(StringRef Name, VariantKind Kind,
MCContext &Ctx) {
return create(Ctx.getOrCreateSymbol(Name), Kind, Ctx);
}
StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
switch (Kind) {
case VK_Invalid: return "<<invalid>>";
case VK_None: return "<<none>>";
case VK_DTPOFF: return "DTPOFF";
case VK_DTPREL: return "DTPREL";
case VK_GOT: return "GOT";
case VK_GOTOFF: return "GOTOFF";
case VK_GOTREL: return "GOTREL";
case VK_PCREL: return "PCREL";
case VK_GOTPCREL: return "GOTPCREL";
case VK_GOTTPOFF: return "GOTTPOFF";
case VK_INDNTPOFF: return "INDNTPOFF";
case VK_NTPOFF: return "NTPOFF";
case VK_GOTNTPOFF: return "GOTNTPOFF";
case VK_PLT: return "PLT";
case VK_TLSGD: return "TLSGD";
2010-10-28 17:02:40 +02:00
case VK_TLSLD: return "TLSLD";
2010-10-28 16:37:09 +02:00
case VK_TLSLDM: return "TLSLDM";
case VK_TPOFF: return "TPOFF";
case VK_TPREL: return "TPREL";
case VK_TLSCALL: return "tlscall";
case VK_TLSDESC: return "tlsdesc";
case VK_TLVP: return "TLVP";
case VK_TLVPPAGE: return "TLVPPAGE";
case VK_TLVPPAGEOFF: return "TLVPPAGEOFF";
case VK_PAGE: return "PAGE";
case VK_PAGEOFF: return "PAGEOFF";
case VK_GOTPAGE: return "GOTPAGE";
case VK_GOTPAGEOFF: return "GOTPAGEOFF";
case VK_SECREL: return "SECREL32";
case VK_SIZE: return "SIZE";
case VK_WEAKREF: return "WEAKREF";
case VK_X86_ABS8: return "ABS8";
case VK_X86_PLTOFF: return "PLTOFF";
case VK_ARM_NONE: return "none";
case VK_ARM_GOT_PREL: return "GOT_PREL";
case VK_ARM_TARGET1: return "target1";
case VK_ARM_TARGET2: return "target2";
case VK_ARM_PREL31: return "prel31";
case VK_ARM_SBREL: return "sbrel";
case VK_ARM_TLSLDO: return "tlsldo";
case VK_ARM_TLSDESCSEQ: return "tlsdescseq";
case VK_AVR_NONE: return "none";
case VK_AVR_LO8: return "lo8";
case VK_AVR_HI8: return "hi8";
case VK_AVR_HLO8: return "hlo8";
case VK_AVR_DIFF8: return "diff8";
case VK_AVR_DIFF16: return "diff16";
case VK_AVR_DIFF32: return "diff32";
case VK_AVR_PM: return "pm";
case VK_PPC_LO: return "l";
case VK_PPC_HI: return "h";
case VK_PPC_HA: return "ha";
case VK_PPC_HIGH: return "high";
case VK_PPC_HIGHA: return "higha";
case VK_PPC_HIGHER: return "higher";
case VK_PPC_HIGHERA: return "highera";
case VK_PPC_HIGHEST: return "highest";
case VK_PPC_HIGHESTA: return "highesta";
case VK_PPC_GOT_LO: return "got@l";
case VK_PPC_GOT_HI: return "got@h";
case VK_PPC_GOT_HA: return "got@ha";
case VK_PPC_TOCBASE: return "tocbase";
case VK_PPC_TOC: return "toc";
case VK_PPC_TOC_LO: return "toc@l";
case VK_PPC_TOC_HI: return "toc@h";
case VK_PPC_TOC_HA: return "toc@ha";
case VK_PPC_U: return "u";
case VK_PPC_L: return "l";
case VK_PPC_DTPMOD: return "dtpmod";
case VK_PPC_TPREL_LO: return "tprel@l";
case VK_PPC_TPREL_HI: return "tprel@h";
case VK_PPC_TPREL_HA: return "tprel@ha";
case VK_PPC_TPREL_HIGH: return "tprel@high";
case VK_PPC_TPREL_HIGHA: return "tprel@higha";
case VK_PPC_TPREL_HIGHER: return "tprel@higher";
case VK_PPC_TPREL_HIGHERA: return "tprel@highera";
case VK_PPC_TPREL_HIGHEST: return "tprel@highest";
case VK_PPC_TPREL_HIGHESTA: return "tprel@highesta";
case VK_PPC_DTPREL_LO: return "dtprel@l";
case VK_PPC_DTPREL_HI: return "dtprel@h";
case VK_PPC_DTPREL_HA: return "dtprel@ha";
case VK_PPC_DTPREL_HIGH: return "dtprel@high";
case VK_PPC_DTPREL_HIGHA: return "dtprel@higha";
case VK_PPC_DTPREL_HIGHER: return "dtprel@higher";
case VK_PPC_DTPREL_HIGHERA: return "dtprel@highera";
case VK_PPC_DTPREL_HIGHEST: return "dtprel@highest";
case VK_PPC_DTPREL_HIGHESTA: return "dtprel@highesta";
case VK_PPC_GOT_TPREL: return "got@tprel";
case VK_PPC_GOT_TPREL_LO: return "got@tprel@l";
case VK_PPC_GOT_TPREL_HI: return "got@tprel@h";
case VK_PPC_GOT_TPREL_HA: return "got@tprel@ha";
case VK_PPC_GOT_DTPREL: return "got@dtprel";
case VK_PPC_GOT_DTPREL_LO: return "got@dtprel@l";
case VK_PPC_GOT_DTPREL_HI: return "got@dtprel@h";
case VK_PPC_GOT_DTPREL_HA: return "got@dtprel@ha";
case VK_PPC_TLS: return "tls";
case VK_PPC_GOT_TLSGD: return "got@tlsgd";
case VK_PPC_GOT_TLSGD_LO: return "got@tlsgd@l";
case VK_PPC_GOT_TLSGD_HI: return "got@tlsgd@h";
case VK_PPC_GOT_TLSGD_HA: return "got@tlsgd@ha";
[PowerPC] Revert r185476 and fix up TLS variant kinds In the commit message to r185476 I wrote: >The PowerPC-specific modifiers VK_PPC_TLSGD and VK_PPC_TLSLD >correspond exactly to the generic modifiers VK_TLSGD and VK_TLSLD. >This causes some confusion with the asm parser, since VK_PPC_TLSGD >is output as @tlsgd, which is then read back in as VK_TLSGD. > >To avoid this confusion, this patch removes the PowerPC-specific >modifiers and uses the generic modifiers throughout. (The only >drawback is that the generic modifiers are printed in upper case >while the usual convention on PowerPC is to use lower-case modifiers. >But this is just a cosmetic issue.) This was unfortunately incorrect, there is is fact another, serious drawback to using the default VK_TLSLD/VK_TLSGD variant kinds: using these causes ELFObjectWriter::RelocNeedsGOT to return true, which in turn causes the ELFObjectWriter to emit an undefined reference to _GLOBAL_OFFSET_TABLE_. This is a problem on powerpc64, because it uses the TOC instead of the GOT, and the linker does not provide _GLOBAL_OFFSET_TABLE_, so the symbol remains undefined. This means shared libraries using TLS built with the integrated assembler are currently broken. While the whole RelocNeedsGOT / _GLOBAL_OFFSET_TABLE_ situation probably ought to be properly fixed at some point, for now I'm simply reverting the r185476 commit. Now this in turn exposes the breakage of handling @tlsgd/@tlsld in the asm parser that this check-in was originally intended to fix. To avoid this regression, I'm also adding a different fix for this problem: while common code now parses @tlsgd as VK_TLSGD, a special hack in the asm parser translates this code to the platform-specific VK_PPC_TLSGD that the back-end now expects. While this is not really pretty, it's self-contained and shouldn't hurt anything else for now. One the underlying problem is fixed, this hack can be reverted again. llvm-svn: 185945
2013-07-09 18:41:09 +02:00
case VK_PPC_TLSGD: return "tlsgd";
case VK_PPC_AIX_TLSGD:
return "gd";
case VK_PPC_AIX_TLSGDM:
return "m";
case VK_PPC_GOT_TLSLD: return "got@tlsld";
case VK_PPC_GOT_TLSLD_LO: return "got@tlsld@l";
case VK_PPC_GOT_TLSLD_HI: return "got@tlsld@h";
case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha";
case VK_PPC_GOT_PCREL:
return "got@pcrel";
case VK_PPC_GOT_TLSGD_PCREL:
return "got@tlsgd@pcrel";
case VK_PPC_GOT_TLSLD_PCREL:
return "got@tlsld@pcrel";
case VK_PPC_GOT_TPREL_PCREL:
return "got@tprel@pcrel";
case VK_PPC_TLS_PCREL:
return "tls@pcrel";
[PowerPC] Revert r185476 and fix up TLS variant kinds In the commit message to r185476 I wrote: >The PowerPC-specific modifiers VK_PPC_TLSGD and VK_PPC_TLSLD >correspond exactly to the generic modifiers VK_TLSGD and VK_TLSLD. >This causes some confusion with the asm parser, since VK_PPC_TLSGD >is output as @tlsgd, which is then read back in as VK_TLSGD. > >To avoid this confusion, this patch removes the PowerPC-specific >modifiers and uses the generic modifiers throughout. (The only >drawback is that the generic modifiers are printed in upper case >while the usual convention on PowerPC is to use lower-case modifiers. >But this is just a cosmetic issue.) This was unfortunately incorrect, there is is fact another, serious drawback to using the default VK_TLSLD/VK_TLSGD variant kinds: using these causes ELFObjectWriter::RelocNeedsGOT to return true, which in turn causes the ELFObjectWriter to emit an undefined reference to _GLOBAL_OFFSET_TABLE_. This is a problem on powerpc64, because it uses the TOC instead of the GOT, and the linker does not provide _GLOBAL_OFFSET_TABLE_, so the symbol remains undefined. This means shared libraries using TLS built with the integrated assembler are currently broken. While the whole RelocNeedsGOT / _GLOBAL_OFFSET_TABLE_ situation probably ought to be properly fixed at some point, for now I'm simply reverting the r185476 commit. Now this in turn exposes the breakage of handling @tlsgd/@tlsld in the asm parser that this check-in was originally intended to fix. To avoid this regression, I'm also adding a different fix for this problem: while common code now parses @tlsgd as VK_TLSGD, a special hack in the asm parser translates this code to the platform-specific VK_PPC_TLSGD that the back-end now expects. While this is not really pretty, it's self-contained and shouldn't hurt anything else for now. One the underlying problem is fixed, this hack can be reverted again. llvm-svn: 185945
2013-07-09 18:41:09 +02:00
case VK_PPC_TLSLD: return "tlsld";
case VK_PPC_LOCAL: return "local";
case VK_PPC_NOTOC: return "notoc";
case VK_PPC_PCREL_OPT: return "<<invalid>>";
case VK_COFF_IMGREL32: return "IMGREL";
case VK_Hexagon_LO16: return "LO16";
case VK_Hexagon_HI16: return "HI16";
case VK_Hexagon_GPREL: return "GPREL";
case VK_Hexagon_GD_GOT: return "GDGOT";
case VK_Hexagon_LD_GOT: return "LDGOT";
case VK_Hexagon_GD_PLT: return "GDPLT";
case VK_Hexagon_LD_PLT: return "LDPLT";
case VK_Hexagon_IE: return "IE";
case VK_Hexagon_IE_GOT: return "IEGOT";
case VK_WASM_TYPEINDEX: return "TYPEINDEX";
case VK_WASM_MBREL: return "MBREL";
case VK_WASM_TLSREL: return "TLSREL";
case VK_WASM_TBREL: return "TBREL";
case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
case VK_AMDGPU_REL32_LO: return "rel32@lo";
case VK_AMDGPU_REL32_HI: return "rel32@hi";
case VK_AMDGPU_REL64: return "rel64";
case VK_AMDGPU_ABS32_LO: return "abs32@lo";
case VK_AMDGPU_ABS32_HI: return "abs32@hi";
case VK_VE_HI32: return "hi";
case VK_VE_LO32: return "lo";
case VK_VE_PC_HI32: return "pc_hi";
case VK_VE_PC_LO32: return "pc_lo";
case VK_VE_GOT_HI32: return "got_hi";
case VK_VE_GOT_LO32: return "got_lo";
case VK_VE_GOTOFF_HI32: return "gotoff_hi";
case VK_VE_GOTOFF_LO32: return "gotoff_lo";
case VK_VE_PLT_HI32: return "plt_hi";
case VK_VE_PLT_LO32: return "plt_lo";
case VK_VE_TLS_GD_HI32: return "tls_gd_hi";
case VK_VE_TLS_GD_LO32: return "tls_gd_lo";
case VK_VE_TPOFF_HI32: return "tpoff_hi";
case VK_VE_TPOFF_LO32: return "tpoff_lo";
}
llvm_unreachable("Invalid variant kind");
}
MCSymbolRefExpr::VariantKind
MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
return StringSwitch<VariantKind>(Name.lower())
.Case("dtprel", VK_DTPREL)
.Case("dtpoff", VK_DTPOFF)
.Case("got", VK_GOT)
.Case("gotoff", VK_GOTOFF)
.Case("gotrel", VK_GOTREL)
.Case("pcrel", VK_PCREL)
.Case("gotpcrel", VK_GOTPCREL)
.Case("gottpoff", VK_GOTTPOFF)
.Case("indntpoff", VK_INDNTPOFF)
.Case("ntpoff", VK_NTPOFF)
.Case("gotntpoff", VK_GOTNTPOFF)
.Case("plt", VK_PLT)
.Case("tlscall", VK_TLSCALL)
.Case("tlsdesc", VK_TLSDESC)
.Case("tlsgd", VK_TLSGD)
.Case("tlsld", VK_TLSLD)
.Case("tlsldm", VK_TLSLDM)
.Case("tpoff", VK_TPOFF)
.Case("tprel", VK_TPREL)
.Case("tlvp", VK_TLVP)
.Case("tlvppage", VK_TLVPPAGE)
.Case("tlvppageoff", VK_TLVPPAGEOFF)
.Case("page", VK_PAGE)
.Case("pageoff", VK_PAGEOFF)
.Case("gotpage", VK_GOTPAGE)
.Case("gotpageoff", VK_GOTPAGEOFF)
.Case("imgrel", VK_COFF_IMGREL32)
.Case("secrel32", VK_SECREL)
.Case("size", VK_SIZE)
.Case("abs8", VK_X86_ABS8)
.Case("pltoff", VK_X86_PLTOFF)
.Case("l", VK_PPC_LO)
.Case("h", VK_PPC_HI)
.Case("ha", VK_PPC_HA)
.Case("high", VK_PPC_HIGH)
.Case("higha", VK_PPC_HIGHA)
.Case("higher", VK_PPC_HIGHER)
.Case("highera", VK_PPC_HIGHERA)
.Case("highest", VK_PPC_HIGHEST)
.Case("highesta", VK_PPC_HIGHESTA)
.Case("got@l", VK_PPC_GOT_LO)
.Case("got@h", VK_PPC_GOT_HI)
.Case("got@ha", VK_PPC_GOT_HA)
.Case("local", VK_PPC_LOCAL)
.Case("tocbase", VK_PPC_TOCBASE)
.Case("toc", VK_PPC_TOC)
.Case("toc@l", VK_PPC_TOC_LO)
.Case("toc@h", VK_PPC_TOC_HI)
.Case("toc@ha", VK_PPC_TOC_HA)
.Case("u", VK_PPC_U)
.Case("l", VK_PPC_L)
.Case("tls", VK_PPC_TLS)
.Case("dtpmod", VK_PPC_DTPMOD)
.Case("tprel@l", VK_PPC_TPREL_LO)
.Case("tprel@h", VK_PPC_TPREL_HI)
.Case("tprel@ha", VK_PPC_TPREL_HA)
.Case("tprel@high", VK_PPC_TPREL_HIGH)
.Case("tprel@higha", VK_PPC_TPREL_HIGHA)
.Case("tprel@higher", VK_PPC_TPREL_HIGHER)
.Case("tprel@highera", VK_PPC_TPREL_HIGHERA)
.Case("tprel@highest", VK_PPC_TPREL_HIGHEST)
.Case("tprel@highesta", VK_PPC_TPREL_HIGHESTA)
.Case("dtprel@l", VK_PPC_DTPREL_LO)
.Case("dtprel@h", VK_PPC_DTPREL_HI)
.Case("dtprel@ha", VK_PPC_DTPREL_HA)
.Case("dtprel@high", VK_PPC_DTPREL_HIGH)
.Case("dtprel@higha", VK_PPC_DTPREL_HIGHA)
.Case("dtprel@higher", VK_PPC_DTPREL_HIGHER)
.Case("dtprel@highera", VK_PPC_DTPREL_HIGHERA)
.Case("dtprel@highest", VK_PPC_DTPREL_HIGHEST)
.Case("dtprel@highesta", VK_PPC_DTPREL_HIGHESTA)
.Case("got@tprel", VK_PPC_GOT_TPREL)
.Case("got@tprel@l", VK_PPC_GOT_TPREL_LO)
.Case("got@tprel@h", VK_PPC_GOT_TPREL_HI)
.Case("got@tprel@ha", VK_PPC_GOT_TPREL_HA)
.Case("got@dtprel", VK_PPC_GOT_DTPREL)
.Case("got@dtprel@l", VK_PPC_GOT_DTPREL_LO)
.Case("got@dtprel@h", VK_PPC_GOT_DTPREL_HI)
.Case("got@dtprel@ha", VK_PPC_GOT_DTPREL_HA)
.Case("got@tlsgd", VK_PPC_GOT_TLSGD)
.Case("got@tlsgd@l", VK_PPC_GOT_TLSGD_LO)
.Case("got@tlsgd@h", VK_PPC_GOT_TLSGD_HI)
.Case("got@tlsgd@ha", VK_PPC_GOT_TLSGD_HA)
.Case("got@tlsld", VK_PPC_GOT_TLSLD)
.Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO)
.Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI)
.Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA)
.Case("got@pcrel", VK_PPC_GOT_PCREL)
.Case("got@tlsgd@pcrel", VK_PPC_GOT_TLSGD_PCREL)
.Case("got@tlsld@pcrel", VK_PPC_GOT_TLSLD_PCREL)
.Case("got@tprel@pcrel", VK_PPC_GOT_TPREL_PCREL)
.Case("tls@pcrel", VK_PPC_TLS_PCREL)
.Case("notoc", VK_PPC_NOTOC)
.Case("gdgot", VK_Hexagon_GD_GOT)
.Case("gdplt", VK_Hexagon_GD_PLT)
.Case("iegot", VK_Hexagon_IE_GOT)
.Case("ie", VK_Hexagon_IE)
.Case("ldgot", VK_Hexagon_LD_GOT)
.Case("ldplt", VK_Hexagon_LD_PLT)
.Case("none", VK_ARM_NONE)
.Case("got_prel", VK_ARM_GOT_PREL)
.Case("target1", VK_ARM_TARGET1)
.Case("target2", VK_ARM_TARGET2)
.Case("prel31", VK_ARM_PREL31)
.Case("sbrel", VK_ARM_SBREL)
.Case("tlsldo", VK_ARM_TLSLDO)
.Case("lo8", VK_AVR_LO8)
.Case("hi8", VK_AVR_HI8)
.Case("hlo8", VK_AVR_HLO8)
.Case("typeindex", VK_WASM_TYPEINDEX)
.Case("tbrel", VK_WASM_TBREL)
.Case("mbrel", VK_WASM_MBREL)
.Case("tlsrel", VK_WASM_TLSREL)
.Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO)
.Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI)
.Case("rel32@lo", VK_AMDGPU_REL32_LO)
.Case("rel32@hi", VK_AMDGPU_REL32_HI)
.Case("rel64", VK_AMDGPU_REL64)
.Case("abs32@lo", VK_AMDGPU_ABS32_LO)
.Case("abs32@hi", VK_AMDGPU_ABS32_HI)
.Case("hi", VK_VE_HI32)
.Case("lo", VK_VE_LO32)
.Case("pc_hi", VK_VE_PC_HI32)
.Case("pc_lo", VK_VE_PC_LO32)
.Case("got_hi", VK_VE_GOT_HI32)
.Case("got_lo", VK_VE_GOT_LO32)
.Case("gotoff_hi", VK_VE_GOTOFF_HI32)
.Case("gotoff_lo", VK_VE_GOTOFF_LO32)
.Case("plt_hi", VK_VE_PLT_HI32)
.Case("plt_lo", VK_VE_PLT_LO32)
.Case("tls_gd_hi", VK_VE_TLS_GD_HI32)
.Case("tls_gd_lo", VK_VE_TLS_GD_LO32)
.Case("tpoff_hi", VK_VE_TPOFF_HI32)
.Case("tpoff_lo", VK_VE_TPOFF_LO32)
.Default(VK_Invalid);
}
/* *** */
void MCTargetExpr::anchor() {}
/* *** */
bool MCExpr::evaluateAsAbsolute(int64_t &Res) const {
return evaluateAsAbsolute(Res, nullptr, nullptr, nullptr, false);
}
bool MCExpr::evaluateAsAbsolute(int64_t &Res,
const MCAsmLayout &Layout) const {
return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr, false);
}
bool MCExpr::evaluateAsAbsolute(int64_t &Res,
const MCAsmLayout &Layout,
const SectionAddrMap &Addrs) const {
// Setting InSet causes us to absolutize differences across sections and that
// is what the MachO writer uses Addrs for.
return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs, true);
}
bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
return evaluateAsAbsolute(Res, &Asm, nullptr, nullptr, false);
}
bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm) const {
return evaluateAsAbsolute(Res, Asm, nullptr, nullptr, false);
}
bool MCExpr::evaluateKnownAbsolute(int64_t &Res,
const MCAsmLayout &Layout) const {
return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr,
true);
}
bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
const SectionAddrMap *Addrs, bool InSet) const {
MCValue Value;
// Fast path constants.
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(this)) {
Res = CE->getValue();
return true;
}
bool IsRelocatable =
evaluateAsRelocatableImpl(Value, Asm, Layout, nullptr, Addrs, InSet);
// Record the current value.
Res = Value.getConstant();
return IsRelocatable && Value.isAbsolute();
}
/// Helper method for \see EvaluateSymbolAdd().
static void AttemptToFoldSymbolOffsetDifference(
const MCAssembler *Asm, const MCAsmLayout *Layout,
const SectionAddrMap *Addrs, bool InSet, const MCSymbolRefExpr *&A,
const MCSymbolRefExpr *&B, int64_t &Addend) {
if (!A || !B)
return;
const MCSymbol &SA = A->getSymbol();
const MCSymbol &SB = B->getSymbol();
if (SA.isUndefined() || SB.isUndefined())
return;
if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
return;
auto FinalizeFolding = [&]() {
// Pointers to Thumb symbols need to have their low-bit set to allow
// for interworking.
if (Asm->isThumbFunc(&SA))
Addend |= 1;
// If symbol is labeled as micromips, we set low-bit to ensure
// correct offset in .gcc_except_table
if (Asm->getBackend().isMicroMips(&SA))
Addend |= 1;
// Clear the symbol expr pointers to indicate we have folded these
// operands.
A = B = nullptr;
};
const MCFragment *FA = SA.getFragment();
const MCFragment *FB = SB.getFragment();
// If both symbols are in the same fragment, return the difference of their
// offsets
if (FA == FB && !SA.isVariable() && !SA.isUnset() && !SB.isVariable() &&
!SB.isUnset()) {
Addend += SA.getOffset() - SB.getOffset();
return FinalizeFolding();
}
const MCSection &SecA = *FA->getParent();
const MCSection &SecB = *FB->getParent();
if ((&SecA != &SecB) && !Addrs)
return;
if (Layout) {
// One of the symbol involved is part of a fragment being laid out. Quit now
// to avoid a self loop.
if (!Layout->canGetFragmentOffset(FA) || !Layout->canGetFragmentOffset(FB))
return;
// Eagerly evaluate when layout is finalized.
Addend += Layout->getSymbolOffset(A->getSymbol()) -
Layout->getSymbolOffset(B->getSymbol());
if (Addrs && (&SecA != &SecB))
Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
FinalizeFolding();
} else {
// When layout is not finalized, our ability to resolve differences between
// symbols is limited to specific cases where the fragments between two
// symbols (including the fragments the symbols are defined in) are
// fixed-size fragments so the difference can be calculated. For example,
// this is important when the Subtarget is changed and a new MCDataFragment
// is created in the case of foo: instr; .arch_extension ext; instr .if . -
// foo.
if (SA.isVariable() || SA.isUnset() || SB.isVariable() || SB.isUnset() ||
FA->getKind() != MCFragment::FT_Data ||
FB->getKind() != MCFragment::FT_Data ||
FA->getSubsectionNumber() != FB->getSubsectionNumber())
return;
// Try to find a constant displacement from FA to FB, add the displacement
// between the offset in FA of SA and the offset in FB of SB.
int64_t Displacement = SA.getOffset() - SB.getOffset();
for (auto FI = FB->getIterator(), FE = SecA.end(); FI != FE; ++FI) {
if (&*FI == FA) {
Addend += Displacement;
return FinalizeFolding();
}
if (FI->getKind() != MCFragment::FT_Data)
return;
Displacement += cast<MCDataFragment>(FI)->getContents().size();
}
}
}
/// Evaluate the result of an add between (conceptually) two MCValues.
///
/// This routine conceptually attempts to construct an MCValue:
/// Result = (Result_A - Result_B + Result_Cst)
/// from two MCValue's LHS and RHS where
/// Result = LHS + RHS
/// and
/// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
///
/// This routine attempts to aggressively fold the operands such that the result
/// is representable in an MCValue, but may not always succeed.
///
/// \returns True on success, false if the result is not representable in an
/// MCValue.
/// NOTE: It is really important to have both the Asm and Layout arguments.
/// They might look redundant, but this function can be used before layout
/// is done (see the object streamer for example) and having the Asm argument
2010-12-18 05:01:45 +01:00
/// lets us avoid relaxations early.
static bool
EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout,
const SectionAddrMap *Addrs, bool InSet, const MCValue &LHS,
const MCSymbolRefExpr *RHS_A, const MCSymbolRefExpr *RHS_B,
int64_t RHS_Cst, MCValue &Res) {
// FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
// about dealing with modifiers. This will ultimately bite us, one day.
const MCSymbolRefExpr *LHS_A = LHS.getSymA();
const MCSymbolRefExpr *LHS_B = LHS.getSymB();
int64_t LHS_Cst = LHS.getConstant();
// Fold the result constant immediately.
int64_t Result_Cst = LHS_Cst + RHS_Cst;
assert((!Layout || Asm) &&
"Must have an assembler object if layout is given!");
RISCV: adjust handling of relocation emission for RISCV This re-architects the RISCV relocation handling to bring the implementation closer in line with the implementation in binutils. We would previously aggressively resolve the relocation. With this restructuring, we always will emit a paired relocation for any symbolic difference of the type of S±T[±C] where S and T are labels and C is a constant. GAS has a special target hook controlled by `RELOC_EXPANSION_POSSIBLE` which indicates that a fixup may be expanded into multiple relocations. This is used by the RISCV backend to always emit a paired relocation - either ADD[WIDTH] + SUB[WIDTH] for text relocations or SET[WIDTH] + SUB[WIDTH] for a debug info relocation. Irrespective of whether linker relaxation support is enabled, symbolic difference is always emitted as a paired relocation. This change also sinks the target specific behaviour down into the target specific area rather than exposing it to the shared relocation handling. In the process, we also sink the "special" handling for debug information down into the RISCV target. Although this improves the path for the other targets, this is not necessarily entirely ideal either. The changes in the debug info emission could be done through another type of hook as this functionality would be required by any other target which wishes to do linker relaxation. However, as there are no other targets in LLVM which currently do this, this is a reasonable thing to do until such time as the code needs to be shared. Improve the handling of the relocation (and add a reduced test case from the Linux kernel) to ensure that we handle complex expressions for symbolic difference. This ensures that we correct relocate symbols with the adddends normalized and associated with the addition portion of the paired relocation. This change also addresses some review comments from Alex Bradbury about the relocations meant for use in the DWARF CFA being named incorrectly (using ADD6 instead of SET6) in the original change which introduced the relocation type. This resolves the issues with the symbolic difference emission sufficiently to enable building the Linux kernel with clang+IAS+lld (without linker relaxation). Resolves PR50153, PR50156! Fixes: ClangBuiltLinux/linux#1023, ClangBuiltLinux/linux#1143 Reviewed By: nickdesaulniers, maskray Differential Revision: https://reviews.llvm.org/D103539
2021-05-26 17:41:11 +02:00
// If we have a layout, we can fold resolved differences.
if (Asm) {
// First, fold out any differences which are fully resolved. By
// reassociating terms in
// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
// we have the four possible differences:
// (LHS_A - LHS_B),
// (LHS_A - RHS_B),
// (RHS_A - LHS_B),
// (RHS_A - RHS_B).
// Since we are attempting to be as aggressive as possible about folding, we
// attempt to evaluate each possible alternative.
AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B,
Result_Cst);
AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, RHS_B,
Result_Cst);
AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, LHS_B,
Result_Cst);
AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, RHS_B,
Result_Cst);
}
// We can't represent the addition or subtraction of two symbols.
if ((LHS_A && RHS_A) || (LHS_B && RHS_B))
return false;
// At this point, we have at most one additive symbol and one subtractive
// symbol -- find them.
const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
Res = MCValue::get(A, B, Result_Cst);
return true;
}
bool MCExpr::evaluateAsRelocatable(MCValue &Res,
const MCAsmLayout *Layout,
2014-08-10 13:37:07 +02:00
const MCFixup *Fixup) const {
MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr;
return evaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr,
false);
}
bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const {
MCAssembler *Assembler = &Layout.getAssembler();
return evaluateAsRelocatableImpl(Res, Assembler, &Layout, nullptr, nullptr,
true);
}
static bool canExpand(const MCSymbol &Sym, bool InSet) {
const MCExpr *Expr = Sym.getVariableValue();
const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr);
if (Inner) {
if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF)
return false;
}
if (InSet)
return true;
return !Sym.isInSection();
}
bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
const MCFixup *Fixup,
const SectionAddrMap *Addrs,
bool InSet) const {
++stats::MCExprEvaluate;
switch (getKind()) {
case Target:
return cast<MCTargetExpr>(this)->evaluateAsRelocatableImpl(Res, Layout,
Fixup);
case Constant:
Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
return true;
case SymbolRef: {
const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
const MCSymbol &Sym = SRE->getSymbol();
MCExpr::evaluateAsRelocatableImpl : allow evaluation of non-VK_None MCSymbolRefExpr when MCAsmLayout is available https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=4acf8c78e659833be8be047ba2f8561386a11d4b (1994) introduced this behavior: if a fixup symbol is equated to an expression with an undefined symbol, convert the fixup to be against the target symbol. glibc relies on this behavior to perform assembly level indirection ``` asm("memcpy = __GI_memcpy"); // from sysdeps/generic/symbol-hacks.h ... // call memcpy@PLT // The relocation references __GI_memcpy in GNU as, but memcpy in MC (without the patch) memcpy (...); ``` (1) It complements `extern __typeof(memcpy) memcpy asm("__GI_memcpy");` The frontend asm label does not redirect synthesized memcpy in the middle-end. (See D88712 for details) (2) `asm("memcpy = __GI_memcpy");` is in every translation unit, but the memcpy declaration may not be visible in the translation unit where memcpy is synthesized. MC already redirects `memcpy = __GI_memcpy; call memcpy` but not `memcpy = __GI_memcpy; call memcpy@plt`. This patch fixes the latter by allowing MCExpr::evaluateAsRelocatableImpl to evaluate a non-VK_None MCSymbolRefExpr, which is only done after the layout is available. GNU as allows `memcpy = __GI_memcpy+1; call memcpy@PLT` which seems nonsensical, so we don't allow it. `MC/PowerPC/pr38945.s` `NUMBER = 0x6ffffff9; cmpwi 8,NUMBER@l` requires the `symbol@l` form in AsmMatcher, so evaluation needs to be deferred. This is the place whether future simplification may be possible. Note, if we suppress the VM_None evaluation when MCAsmLayout is nullptr, we may lose the `invalid reassignment of non-absolute variable` diagnostic (`ARM/thumb_set-diagnostics.s` and `MC/AsmParser/variables-invalid.s`). We know that this diagnostic is troublesome in some cases (https://github.com/ClangBuiltLinux/linux/issues/1008), so we can consider making simplification in the future. Reviewed By: jyknight Differential Revision: https://reviews.llvm.org/D88625
2020-11-18 22:52:33 +01:00
const auto Kind = SRE->getKind();
// Evaluate recursively if this is a variable.
MCExpr::evaluateAsRelocatableImpl : allow evaluation of non-VK_None MCSymbolRefExpr when MCAsmLayout is available https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=4acf8c78e659833be8be047ba2f8561386a11d4b (1994) introduced this behavior: if a fixup symbol is equated to an expression with an undefined symbol, convert the fixup to be against the target symbol. glibc relies on this behavior to perform assembly level indirection ``` asm("memcpy = __GI_memcpy"); // from sysdeps/generic/symbol-hacks.h ... // call memcpy@PLT // The relocation references __GI_memcpy in GNU as, but memcpy in MC (without the patch) memcpy (...); ``` (1) It complements `extern __typeof(memcpy) memcpy asm("__GI_memcpy");` The frontend asm label does not redirect synthesized memcpy in the middle-end. (See D88712 for details) (2) `asm("memcpy = __GI_memcpy");` is in every translation unit, but the memcpy declaration may not be visible in the translation unit where memcpy is synthesized. MC already redirects `memcpy = __GI_memcpy; call memcpy` but not `memcpy = __GI_memcpy; call memcpy@plt`. This patch fixes the latter by allowing MCExpr::evaluateAsRelocatableImpl to evaluate a non-VK_None MCSymbolRefExpr, which is only done after the layout is available. GNU as allows `memcpy = __GI_memcpy+1; call memcpy@PLT` which seems nonsensical, so we don't allow it. `MC/PowerPC/pr38945.s` `NUMBER = 0x6ffffff9; cmpwi 8,NUMBER@l` requires the `symbol@l` form in AsmMatcher, so evaluation needs to be deferred. This is the place whether future simplification may be possible. Note, if we suppress the VM_None evaluation when MCAsmLayout is nullptr, we may lose the `invalid reassignment of non-absolute variable` diagnostic (`ARM/thumb_set-diagnostics.s` and `MC/AsmParser/variables-invalid.s`). We know that this diagnostic is troublesome in some cases (https://github.com/ClangBuiltLinux/linux/issues/1008), so we can consider making simplification in the future. Reviewed By: jyknight Differential Revision: https://reviews.llvm.org/D88625
2020-11-18 22:52:33 +01:00
if (Sym.isVariable() && (Kind == MCSymbolRefExpr::VK_None || Layout) &&
canExpand(Sym, InSet)) {
bool IsMachO = SRE->hasSubsectionsViaSymbols();
if (Sym.getVariableValue()->evaluateAsRelocatableImpl(
Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) {
MCExpr::evaluateAsRelocatableImpl : allow evaluation of non-VK_None MCSymbolRefExpr when MCAsmLayout is available https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=4acf8c78e659833be8be047ba2f8561386a11d4b (1994) introduced this behavior: if a fixup symbol is equated to an expression with an undefined symbol, convert the fixup to be against the target symbol. glibc relies on this behavior to perform assembly level indirection ``` asm("memcpy = __GI_memcpy"); // from sysdeps/generic/symbol-hacks.h ... // call memcpy@PLT // The relocation references __GI_memcpy in GNU as, but memcpy in MC (without the patch) memcpy (...); ``` (1) It complements `extern __typeof(memcpy) memcpy asm("__GI_memcpy");` The frontend asm label does not redirect synthesized memcpy in the middle-end. (See D88712 for details) (2) `asm("memcpy = __GI_memcpy");` is in every translation unit, but the memcpy declaration may not be visible in the translation unit where memcpy is synthesized. MC already redirects `memcpy = __GI_memcpy; call memcpy` but not `memcpy = __GI_memcpy; call memcpy@plt`. This patch fixes the latter by allowing MCExpr::evaluateAsRelocatableImpl to evaluate a non-VK_None MCSymbolRefExpr, which is only done after the layout is available. GNU as allows `memcpy = __GI_memcpy+1; call memcpy@PLT` which seems nonsensical, so we don't allow it. `MC/PowerPC/pr38945.s` `NUMBER = 0x6ffffff9; cmpwi 8,NUMBER@l` requires the `symbol@l` form in AsmMatcher, so evaluation needs to be deferred. This is the place whether future simplification may be possible. Note, if we suppress the VM_None evaluation when MCAsmLayout is nullptr, we may lose the `invalid reassignment of non-absolute variable` diagnostic (`ARM/thumb_set-diagnostics.s` and `MC/AsmParser/variables-invalid.s`). We know that this diagnostic is troublesome in some cases (https://github.com/ClangBuiltLinux/linux/issues/1008), so we can consider making simplification in the future. Reviewed By: jyknight Differential Revision: https://reviews.llvm.org/D88625
2020-11-18 22:52:33 +01:00
if (Kind != MCSymbolRefExpr::VK_None) {
if (Res.isAbsolute()) {
Res = MCValue::get(SRE, nullptr, 0);
return true;
}
// If the reference has a variant kind, we can only handle expressions
// which evaluate exactly to a single unadorned symbol. Attach the
// original VariantKind to SymA of the result.
if (Res.getRefKind() != MCSymbolRefExpr::VK_None || !Res.getSymA() ||
Res.getSymB() || Res.getConstant())
return false;
Res =
MCValue::get(MCSymbolRefExpr::create(&Res.getSymA()->getSymbol(),
Kind, Asm->getContext()),
Res.getSymB(), Res.getConstant(), Res.getRefKind());
}
if (!IsMachO)
return true;
const MCSymbolRefExpr *A = Res.getSymA();
const MCSymbolRefExpr *B = Res.getSymB();
// FIXME: This is small hack. Given
// a = b + 4
// .long a
// the OS X assembler will completely drop the 4. We should probably
// include it in the relocation or produce an error if that is not
// possible.
// Allow constant expressions.
if (!A && !B)
return true;
// Allows aliases with zero offset.
if (Res.getConstant() == 0 && (!A || !B))
return true;
}
}
Res = MCValue::get(SRE, nullptr, 0);
return true;
}
case Unary: {
const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
MCValue Value;
if (!AUE->getSubExpr()->evaluateAsRelocatableImpl(Value, Asm, Layout, Fixup,
Addrs, InSet))
return false;
switch (AUE->getOpcode()) {
case MCUnaryExpr::LNot:
if (!Value.isAbsolute())
return false;
Res = MCValue::get(!Value.getConstant());
break;
case MCUnaryExpr::Minus:
/// -(a - b + const) ==> (b - a - const)
if (Value.getSymA() && !Value.getSymB())
return false;
// The cast avoids undefined behavior if the constant is INT64_MIN.
Res = MCValue::get(Value.getSymB(), Value.getSymA(),
-(uint64_t)Value.getConstant());
break;
case MCUnaryExpr::Not:
if (!Value.isAbsolute())
return false;
Res = MCValue::get(~Value.getConstant());
break;
case MCUnaryExpr::Plus:
Res = Value;
break;
}
return true;
}
case Binary: {
const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
MCValue LHSValue, RHSValue;
if (!ABE->getLHS()->evaluateAsRelocatableImpl(LHSValue, Asm, Layout, Fixup,
Addrs, InSet) ||
!ABE->getRHS()->evaluateAsRelocatableImpl(RHSValue, Asm, Layout, Fixup,
Addrs, InSet)) {
// Check if both are Target Expressions, see if we can compare them.
if (const MCTargetExpr *L = dyn_cast<MCTargetExpr>(ABE->getLHS()))
if (const MCTargetExpr *R = cast<MCTargetExpr>(ABE->getRHS())) {
switch (ABE->getOpcode()) {
case MCBinaryExpr::EQ:
Res = MCValue::get((L->isEqualTo(R)) ? -1 : 0);
return true;
case MCBinaryExpr::NE:
Res = MCValue::get((R->isEqualTo(R)) ? 0 : -1);
return true;
default: break;
}
}
return false;
}
// We only support a few operations on non-constant expressions, handle
// those first.
if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) {
switch (ABE->getOpcode()) {
default:
return false;
case MCBinaryExpr::Sub:
// Negate RHS and add.
// The cast avoids undefined behavior if the constant is INT64_MIN.
return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
RHSValue.getSymB(), RHSValue.getSymA(),
-(uint64_t)RHSValue.getConstant(), Res);
case MCBinaryExpr::Add:
return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
RHSValue.getSymA(), RHSValue.getSymB(),
RHSValue.getConstant(), Res);
}
}
// FIXME: We need target hooks for the evaluation. It may be limited in
// width, and gas defines the result of comparisons differently from
// Apple as.
int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
int64_t Result = 0;
auto Op = ABE->getOpcode();
switch (Op) {
case MCBinaryExpr::AShr: Result = LHS >> RHS; break;
case MCBinaryExpr::Add: Result = LHS + RHS; break;
case MCBinaryExpr::And: Result = LHS & RHS; break;
case MCBinaryExpr::Div:
case MCBinaryExpr::Mod:
// Handle division by zero. gas just emits a warning and keeps going,
// we try to be stricter.
// FIXME: Currently the caller of this function has no way to understand
// we're bailing out because of 'division by zero'. Therefore, it will
// emit a 'expected relocatable expression' error. It would be nice to
// change this code to emit a better diagnostic.
if (RHS == 0)
return false;
if (ABE->getOpcode() == MCBinaryExpr::Div)
Result = LHS / RHS;
else
Result = LHS % RHS;
break;
case MCBinaryExpr::EQ: Result = LHS == RHS; break;
case MCBinaryExpr::GT: Result = LHS > RHS; break;
case MCBinaryExpr::GTE: Result = LHS >= RHS; break;
case MCBinaryExpr::LAnd: Result = LHS && RHS; break;
case MCBinaryExpr::LOr: Result = LHS || RHS; break;
case MCBinaryExpr::LShr: Result = uint64_t(LHS) >> uint64_t(RHS); break;
case MCBinaryExpr::LT: Result = LHS < RHS; break;
case MCBinaryExpr::LTE: Result = LHS <= RHS; break;
case MCBinaryExpr::Mul: Result = LHS * RHS; break;
case MCBinaryExpr::NE: Result = LHS != RHS; break;
case MCBinaryExpr::Or: Result = LHS | RHS; break;
case MCBinaryExpr::OrNot: Result = LHS | ~RHS; break;
case MCBinaryExpr::Shl: Result = uint64_t(LHS) << uint64_t(RHS); break;
case MCBinaryExpr::Sub: Result = LHS - RHS; break;
case MCBinaryExpr::Xor: Result = LHS ^ RHS; break;
}
switch (Op) {
default:
Res = MCValue::get(Result);
break;
case MCBinaryExpr::EQ:
case MCBinaryExpr::GT:
case MCBinaryExpr::GTE:
case MCBinaryExpr::LT:
case MCBinaryExpr::LTE:
case MCBinaryExpr::NE:
// A comparison operator returns a -1 if true and 0 if false.
Res = MCValue::get(Result ? -1 : 0);
break;
}
return true;
}
}
llvm_unreachable("Invalid assembly expression kind!");
}
MCFragment *MCExpr::findAssociatedFragment() const {
switch (getKind()) {
case Target:
// We never look through target specific expressions.
return cast<MCTargetExpr>(this)->findAssociatedFragment();
case Constant:
return MCSymbol::AbsolutePseudoFragment;
case SymbolRef: {
const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
const MCSymbol &Sym = SRE->getSymbol();
return Sym.getFragment();
}
case Unary:
return cast<MCUnaryExpr>(this)->getSubExpr()->findAssociatedFragment();
case Binary: {
const MCBinaryExpr *BE = cast<MCBinaryExpr>(this);
MCFragment *LHS_F = BE->getLHS()->findAssociatedFragment();
MCFragment *RHS_F = BE->getRHS()->findAssociatedFragment();
// If either is absolute, return the other.
if (LHS_F == MCSymbol::AbsolutePseudoFragment)
return RHS_F;
if (RHS_F == MCSymbol::AbsolutePseudoFragment)
return LHS_F;
MC: For variable symbols, maintain MCSymbol::Section as a cache. Fixes PR19582. Previously, when an asm assignment (.set or =) was created, we would look up the section immediately in MCSymbol::setVariableValue. This caused symbols to receive the wrong section if the RHS of the assignment had not been seen yet. This had a knock-on effect in the object file emitters, causing them to emit extra symbols, or to give symbols the wrong visibility or the wrong section. For example, in the following asm: .data .Llocal: .text leaq .Llocal1(%rip), %rdi .Llocal1 = .Llocal2 .Llocal2 = .Llocal the first assignment would give .Llocal1 a null section, which would never get fixed up by the second assignment. This would cause the ELF object file emitter to consider .Llocal1 to be an undefined symbol and give it external linkage, even though .Llocal1 should not have been emitted at all in the object file. Or in the following asm: alias_to_local = Ltmp0 Ltmp0: the Mach-O object file emitter would give the alias_to_local symbol a n_type of N_SECT and a n_sect of 0. This is invalid under the Mach-O specification, which requires N_SECT symbols to receive a non-zero section number if the symbol is defined in a section in the object file. https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/MachORuntime/#//apple_ref/c/tag/nlist After this change we do not look up the section when the assignment is created, but instead look it up on demand and store it in Section, which is treated as a cache if the symbol is a variable symbol. This change also fixes a bug in MCExpr::FindAssociatedSection. Previously, if we saw a subtraction, we would return the first referenced section, even in cases where we should have been returning the absolute pseudo-section. Now we always return the absolute pseudo-section for expressions that subtract two section-derived expressions. This isn't always correct (e.g. if one of the sections ends up being laid out at an absolute address), but it's probably the best we can do without more context. This allows us to remove code in two places where we appear to have been working around this bug, in MachObjectWriter::markAbsoluteVariableSymbols and in X86AsmPrinter::EmitStartOfAsmFile. Re-applies r233595 (aka D8586), which was reverted in r233898. Differential Revision: http://reviews.llvm.org/D8798 llvm-svn: 233995
2015-04-03 03:46:11 +02:00
// Not always correct, but probably the best we can do without more context.
if (BE->getOpcode() == MCBinaryExpr::Sub)
return MCSymbol::AbsolutePseudoFragment;
MC: For variable symbols, maintain MCSymbol::Section as a cache. Fixes PR19582. Previously, when an asm assignment (.set or =) was created, we would look up the section immediately in MCSymbol::setVariableValue. This caused symbols to receive the wrong section if the RHS of the assignment had not been seen yet. This had a knock-on effect in the object file emitters, causing them to emit extra symbols, or to give symbols the wrong visibility or the wrong section. For example, in the following asm: .data .Llocal: .text leaq .Llocal1(%rip), %rdi .Llocal1 = .Llocal2 .Llocal2 = .Llocal the first assignment would give .Llocal1 a null section, which would never get fixed up by the second assignment. This would cause the ELF object file emitter to consider .Llocal1 to be an undefined symbol and give it external linkage, even though .Llocal1 should not have been emitted at all in the object file. Or in the following asm: alias_to_local = Ltmp0 Ltmp0: the Mach-O object file emitter would give the alias_to_local symbol a n_type of N_SECT and a n_sect of 0. This is invalid under the Mach-O specification, which requires N_SECT symbols to receive a non-zero section number if the symbol is defined in a section in the object file. https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/MachORuntime/#//apple_ref/c/tag/nlist After this change we do not look up the section when the assignment is created, but instead look it up on demand and store it in Section, which is treated as a cache if the symbol is a variable symbol. This change also fixes a bug in MCExpr::FindAssociatedSection. Previously, if we saw a subtraction, we would return the first referenced section, even in cases where we should have been returning the absolute pseudo-section. Now we always return the absolute pseudo-section for expressions that subtract two section-derived expressions. This isn't always correct (e.g. if one of the sections ends up being laid out at an absolute address), but it's probably the best we can do without more context. This allows us to remove code in two places where we appear to have been working around this bug, in MachObjectWriter::markAbsoluteVariableSymbols and in X86AsmPrinter::EmitStartOfAsmFile. Re-applies r233595 (aka D8586), which was reverted in r233898. Differential Revision: http://reviews.llvm.org/D8798 llvm-svn: 233995
2015-04-03 03:46:11 +02:00
// Otherwise, return the first non-null fragment.
return LHS_F ? LHS_F : RHS_F;
}
}
llvm_unreachable("Invalid assembly expression kind!");
}