1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[PowerPC][PCRelative] Thread Local Storage Support for General Dynamic

This patch is the initial support for the General Dynamic Thread Local
Local Storage model to produce code sequence and relocations correct
to the ABI for the model when using PC relative memory operations.

Patch by: NeHuang

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D82315
This commit is contained in:
Kamau Bridgeman 2020-06-22 11:43:03 -05:00
parent 640a9a840f
commit 7be92ab238
19 changed files with 205 additions and 20 deletions

View File

@ -100,6 +100,7 @@
#undef R_PPC64_PCREL_OPT
#undef R_PPC64_PCREL34
#undef R_PPC64_GOT_PCREL34
#undef R_PPC64_GOT_TLSGD_PCREL34
#undef R_PPC64_IRELATIVE
#undef R_PPC64_REL16
#undef R_PPC64_REL16_LO
@ -198,6 +199,7 @@ ELF_RELOC(R_PPC64_REL24_NOTOC, 116)
ELF_RELOC(R_PPC64_PCREL_OPT, 123)
ELF_RELOC(R_PPC64_PCREL34, 132)
ELF_RELOC(R_PPC64_GOT_PCREL34, 133)
ELF_RELOC(R_PPC64_GOT_TLSGD_PCREL34, 148)
ELF_RELOC(R_PPC64_IRELATIVE, 248)
ELF_RELOC(R_PPC64_REL16, 249)
ELF_RELOC(R_PPC64_REL16_LO, 250)

View File

@ -299,6 +299,7 @@ public:
VK_PPC_GOT_TLSLD_HI, // symbol@got@tlsld@h
VK_PPC_GOT_TLSLD_HA, // symbol@got@tlsld@ha
VK_PPC_GOT_PCREL, // symbol@got@pcrel
VK_PPC_GOT_TLSGD_PCREL, // symbol@got@tlsgd@pcrel
VK_PPC_TLSLD, // symbol@tlsld
VK_PPC_LOCAL, // symbol@local
VK_PPC_NOTOC, // symbol@notoc

View File

@ -322,6 +322,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha";
case VK_PPC_GOT_PCREL:
return "got@pcrel";
case VK_PPC_GOT_TLSGD_PCREL:
return "got@tlsgd@pcrel";
case VK_PPC_TLSLD: return "tlsld";
case VK_PPC_LOCAL: return "local";
case VK_PPC_NOTOC: return "notoc";
@ -454,6 +456,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI)
.Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA)
.Case("got@pcrel", VK_PPC_GOT_PCREL)
.Case("got@tlsgd@pcrel", VK_PPC_GOT_TLSGD_PCREL)
.Case("notoc", VK_PPC_NOTOC)
.Case("gdgot", VK_Hexagon_GD_GOT)
.Case("gdplt", VK_Hexagon_GD_PLT)

View File

@ -138,6 +138,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_GOT_PCREL:
Type = ELF::R_PPC64_GOT_PCREL34;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL:
Type = ELF::R_PPC64_GOT_TLSGD_PCREL34;
break;
}
break;
case FK_Data_4:

View File

@ -531,10 +531,17 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
RefExp = cast<MCSymbolRefExpr>(Op.getExpr());
O << RefExp->getSymbol().getName();
// The variant kind VK_PPC_NOTOC needs to be handled as a special case
// because we do not want the assembly to print out the @notoc at the
// end like __tls_get_addr(x@tlsgd)@notoc. Instead we want it to look
// like __tls_get_addr@notoc(x@tlsgd).
if (RefExp->getKind() == MCSymbolRefExpr::VK_PPC_NOTOC)
O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind());
O << '(';
printOperand(MI, OpNo+1, O);
O << ')';
if (RefExp->getKind() != MCSymbolRefExpr::VK_None)
if (RefExp->getKind() != MCSymbolRefExpr::VK_None &&
RefExp->getKind() != MCSymbolRefExpr::VK_PPC_NOTOC)
O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind());
if (ConstExp != nullptr)
O << '+' << ConstExp->getValue();

View File

@ -44,11 +44,13 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
if (MO.isReg() || MO.isImm())
return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
((MI.getOpcode() == PPC::BL8_NOTOC)
((MI.getOpcode() == PPC::BL8_NOTOC ||
MI.getOpcode() == PPC::BL8_NOTOC_TLS)
? (MCFixupKind)PPC::fixup_ppc_br24_notoc
: (MCFixupKind)PPC::fixup_ppc_br24)));
return 0;
@ -229,8 +231,10 @@ PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
(void)SRE;
// Currently these are the only valid PCRelative Relocations.
assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL ||
SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL) &&
"VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL");
SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL ||
SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL) &&
"VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL or "
"VK_PPC_GOT_TLSGD_PCREL");
// Generate the fixup for the relocation.
Fixups.push_back(
MCFixup::create(0, Expr,

View File

@ -109,6 +109,15 @@ namespace llvm {
// PC Relative linker optimization.
MO_PCREL_OPT_FLAG = 16,
/// MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to
/// TLS General Dynamic model.
MO_TLSGD_FLAG = 32,
/// MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set
/// they should produce the relocation @got@tlsgd@pcrel.
/// Fix up is VK_PPC_GOT_TLSGD_PCREL
MO_GOT_TLSGD_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG | MO_TLSGD_FLAG,
/// The next are not flags but distinct values.
MO_ACCESS_MASK = 0xf00,

View File

@ -488,6 +488,13 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
StringRef Name = "__tls_get_addr";
MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
unsigned Opcode = PPC::BL8_NOP_TLS;
assert(MI->getNumOperands() >= 3 && "Expecting at least 3 operands from MI");
if (MI->getOperand(2).getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG) {
Kind = MCSymbolRefExpr::VK_PPC_NOTOC;
Opcode = PPC::BL8_NOTOC_TLS;
}
const Module *M = MF->getFunction().getParent();
assert(MI->getOperand(0).isReg() &&
@ -515,10 +522,9 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, VK, OutContext);
EmitToStreamer(*OutStreamer,
MCInstBuilder(Subtarget->isPPC64() ?
PPC::BL8_NOP_TLS : PPC::BL_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
MCInstBuilder(Subtarget->isPPC64() ? Opcode : PPC::BL_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
}
/// Map a machine operand for a TOC pseudo-machine instruction to its

View File

@ -1472,6 +1472,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
case PPCISD::STRICT_FCTIDZ:
@ -2999,6 +3001,12 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::GeneralDynamic) {
if (Subtarget.isUsingPCRelativeCalls()) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_GOT_TLSGD_PCREL_FLAG);
return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
}
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {

View File

@ -436,6 +436,11 @@ namespace llvm {
/// PLD.
MAT_PCREL_ADDR,
/// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for
/// TLS global address when using dynamic access models. This can be done
/// through an add like PADDI.
TLS_DYNAMIC_MAT_PCREL_ADDR,
// Constrained conversion from floating point to int
STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCTIWZ,

View File

@ -148,6 +148,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
def BL8_NOTOC : IForm<18, 0, 1, (outs),
(ins calltarget:$func),
"bl $func", IIC_BrB, []>;
def BL8_NOTOC_TLS : IForm<18, 0, 1, (outs),
(ins tlscall:$func),
"bl $func", IIC_BrB, []>;
}
}
let Uses = [CTR8, RM] in {

View File

@ -2272,7 +2272,9 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
{MO_PIC_FLAG, "ppc-pic"},
{MO_PCREL_FLAG, "ppc-pcrel"},
{MO_GOT_FLAG, "ppc-got"},
{MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"}};
{MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
{MO_TLSGD_FLAG, "ppc-tlsgd"},
{MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"}};
return makeArrayRef(TargetFlags);
}

View File

@ -339,6 +339,8 @@ def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>;
// PC Relative Specific Nodes
def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>;
def PPCtlsdynamatpcreladdr : SDNode<"PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR",
SDTIntUnaryOp, []>;
//===----------------------------------------------------------------------===//
// PowerPC specific transformation functions and pattern fragments.

View File

@ -811,6 +811,9 @@ let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
// If the PPCmatpcreladdr node is not caught by any other pattern it should be
// caught here and turned into a paddi instruction to materialize the address.
def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
// PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize
// tls global address with paddi instruction.
def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
}
let Predicates = [PrefixInstrs] in {

View File

@ -84,6 +84,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PCREL;
else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG))
RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL;
else if (MO.getTargetFlags() == PPCII::MO_GOT_TLSGD_PCREL_FLAG)
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL;
const MachineInstr *MI = MO.getParent();
const MachineFunction *MF = MI->getMF();

View File

@ -50,16 +50,17 @@ protected:
bool Changed = false;
bool NeedFence = true;
bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
bool IsTLSGDPCREL = false;
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE;) {
MachineInstr &MI = *I;
IsTLSGDPCREL = isTLSGDPCREL(MI);
if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
MI.getOpcode() != PPC::ADDItlsldLADDR &&
MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
MI.getOpcode() != PPC::ADDItlsldLADDR32) {
MI.getOpcode() != PPC::ADDItlsldLADDR32 && !IsTLSGDPCREL) {
// Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
// as scheduling fences, we skip creating fences if we already
// have existing ADJCALLSTACKDOWN/UP to avoid nesting,
@ -76,12 +77,16 @@ protected:
LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI);
Register OutReg = MI.getOperand(0).getReg();
Register InReg = MI.getOperand(1).getReg();
DebugLoc DL = MI.getDebugLoc();
Register InReg = PPC::NoRegister;
Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
unsigned Opc1, Opc2;
const Register OrigRegs[] = {OutReg, InReg, GPR3};
SmallVector<Register, 3> OrigRegs = {OutReg, GPR3};
if (!IsTLSGDPCREL) {
InReg = MI.getOperand(1).getReg();
OrigRegs.push_back(InReg);
}
DebugLoc DL = MI.getDebugLoc();
unsigned Opc1, Opc2;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Opcode inconsistency error");
@ -101,6 +106,10 @@ protected:
Opc1 = PPC::ADDItlsldL32;
Opc2 = PPC::GETtlsldADDR32;
break;
case PPC::PADDI8pc:
assert(IsTLSGDPCREL && "Expecting General Dynamic PCRel");
Opc1 = PPC::PADDI8pc;
Opc2 = PPC::GETtlsADDR;
}
// We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr
@ -113,9 +122,15 @@ protected:
BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
.addImm(0);
// Expand into two ops built prior to the existing instruction.
MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
.addReg(InReg);
MachineInstr *Addi;
if (IsTLSGDPCREL) {
Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addImm(0);
} else {
// Expand into two ops built prior to the existing instruction.
assert(InReg != PPC::NoRegister && "Operand must be a register");
Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3).addReg(InReg);
}
Addi->addOperand(MI.getOperand(2));
// The ADDItls* instruction is the first instruction in the
@ -125,7 +140,10 @@ protected:
MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
.addReg(GPR3));
Call->addOperand(MI.getOperand(3));
if (IsTLSGDPCREL)
Call->addOperand(MI.getOperand(2));
else
Call->addOperand(MI.getOperand(3));
if (NeedFence)
BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
@ -150,6 +168,11 @@ protected:
}
public:
bool isTLSGDPCREL(const MachineInstr &MI) {
return (MI.getOpcode() == PPC::PADDI8pc) &&
(MI.getOperand(2).getTargetFlags() ==
PPCII::MO_GOT_TLSGD_PCREL_FLAG);
}
bool runOnMachineFunction(MachineFunction &MF) override {
TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();

View File

@ -0,0 +1,51 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: --relocation-model=pic -mcpu=pwr10 -ppc-asm-full-reg-names \
; RUN: -enable-ppc-pcrel-tls < %s | FileCheck %s --check-prefix=CHECK-S
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: --relocation-model=pic -mcpu=pwr10 -ppc-asm-full-reg-names \
; RUN: -enable-ppc-pcrel-tls --filetype=obj < %s | \
; RUN: llvm-objdump --mcpu=pwr10 -dr - | FileCheck %s --check-prefix=CHECK-O
; These test cases are to ensure that when using pc relative memory operations
; ABI correct code and relocations are produced for General Dynamic TLS Model.
@x = external thread_local global i32, align 4
define nonnull i32* @GeneralDynamicAddressLoad() {
; CHECK-S-LABEL: GeneralDynamicAddressLoad:
; CHECK-S: paddi r3, 0, x@got@tlsgd@pcrel, 1
; CHECK-S-NEXT: bl __tls_get_addr@notoc(x@tlsgd)
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-O-LABEL: <GeneralDynamicAddressLoad>:
; CHECK-O: c: 00 00 10 06 00 00 60 38 paddi 3, 0, 0, 1
; CHECK-O-NEXT: 000000000000000c: R_PPC64_GOT_TLSGD_PCREL34 x
; CHECK-O-NEXT: 14: 01 00 00 48 bl 0x14
; CHECK-O-NEXT: 0000000000000014: R_PPC64_TLSGD x
; CHECK-O-NEXT: 0000000000000014: R_PPC64_REL24_NOTOC __tls_get_addr
entry:
ret i32* @x
}
define i32 @GeneralDynamicValueLoad() {
; CHECK-S-LABEL: GeneralDynamicValueLoad:
; CHECK-S: paddi r3, 0, x@got@tlsgd@pcrel, 1
; CHECK-S-NEXT: bl __tls_get_addr@notoc(x@tlsgd)
; CHECK-S-NEXT: lwz r3, 0(r3)
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-O-LABEL: <GeneralDynamicValueLoad>:
; CHECK-O: 4c: 00 00 10 06 00 00 60 38 paddi 3, 0, 0, 1
; CHECK-O-NEXT: 000000000000004c: R_PPC64_GOT_TLSGD_PCREL34 x
; CHECK-O-NEXT: 54: 01 00 00 48 bl 0x54
; CHECK-O-NEXT: 0000000000000054: R_PPC64_TLSGD x
; CHECK-O-NEXT: 0000000000000054: R_PPC64_REL24_NOTOC __tls_get_addr
; CHECK-O-NEXT: 58: 00 00 63 80 lwz 3, 0(3)
entry:
%0 = load i32, i32* @x, align 4
ret i32 %0
}

View File

@ -0,0 +1,25 @@
# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s 2>&1 | \
# RUN: FileCheck %s -check-prefix=MC
# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s | \
# RUN: llvm-readobj -r - | FileCheck %s -check-prefix=READOBJ
# This test checks that on Power PC we can correctly convert @got@tlsgd@pcrel
# x@tlsgd and __tls_get_addr@notoc into R_PPC64_GOT_TLSGD_PCREL34, R_PPC64_TLSGD
# and R_PPC64_REL24_NOTOC for general dynamic relocations with address loaded
# MC-NOT: error: invalid variant
# READOBJ: 0xC R_PPC64_GOT_TLSGD_PCREL34 x 0x0
# READOBJ-NEXT: 0x14 R_PPC64_TLSGD x 0x0
# READOBJ-NEXT: 0x14 R_PPC64_REL24_NOTOC __tls_get_addr 0x0
GeneralDynamicAddrLoad: # @GeneralDynamicAddrLoad
mflr 0
std 0, 16(1)
stdu 1, -32(1)
paddi 3, 0, x@got@tlsgd@pcrel, 1
bl __tls_get_addr@notoc(x@tlsgd)
addi 1, 1, 32
ld 0, 16(1)
mtlr 0
blr

View File

@ -0,0 +1,26 @@
# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s 2>&1 | \
# RUN: FileCheck %s -check-prefix=MC
# RUN: llvm-mc -triple=powerpc64le-unknown-unknown -filetype=obj %s | \
# RUN: llvm-readobj -r - | FileCheck %s -check-prefix=READOBJ
# This test checks that on Power PC we can correctly convert @got@tlsgd@pcrel
# x@tlsgd and __tls_get_addr@notoc into R_PPC64_GOT_TLSGD_PCREL34, R_PPC64_TLSGD
# and R_PPC64_REL24_NOTOC for general dynamic relocations with value loaded
# MC-NOT: error: invalid variant
# READOBJ: 0xC R_PPC64_GOT_TLSGD_PCREL34 x 0x0
# READOBJ-NEXT: 0x14 R_PPC64_TLSGD x 0x0
# READOBJ-NEXT: 0x14 R_PPC64_REL24_NOTOC __tls_get_addr 0x0
GeneralDynamicValueLoad: # @GeneralDynamicValueLoad
mflr 0
std 0, 16(1)
stdu 1, -32(1)
paddi 3, 0, x@got@tlsgd@pcrel, 1
bl __tls_get_addr@notoc(x@tlsgd)
lwz 3, 0(3)
addi 1, 1, 32
ld 0, 16(1)
mtlr 0
blr