1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[PowerPC][PCRelative] Add new pseudo instructions for PCRel TLS to fix R2 clobber issue

New pseudo instructions GETtlsADDRPCREL and GETtlsldADDRPCREL are added for properly
setting REGMASK for tls_get_addr function when using PCRelative address.

Differential Revisien: https://reviews.llvm.org/D91420
Reviewed by: bsaleil
This commit is contained in:
Victor Huang 2020-11-24 10:48:11 -06:00
parent 0c51fd207f
commit 8636dcc8dc
4 changed files with 56 additions and 19 deletions

View File

@ -1076,6 +1076,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::GETtlsADDR: case PPC::GETtlsADDR:
// Transform: %x3 = GETtlsADDR %x3, @sym // Transform: %x3 = GETtlsADDR %x3, @sym
// Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd) // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
case PPC::GETtlsADDRPCREL:
case PPC::GETtlsADDR32: { case PPC::GETtlsADDR32: {
// Transform: %r3 = GETtlsADDR32 %r3, @sym // Transform: %r3 = GETtlsADDR32 %r3, @sym
// Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT
@ -1121,6 +1122,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::GETtlsldADDR: case PPC::GETtlsldADDR:
// Transform: %x3 = GETtlsldADDR %x3, @sym // Transform: %x3 = GETtlsldADDR %x3, @sym
// Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld) // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld)
case PPC::GETtlsldADDRPCREL:
case PPC::GETtlsldADDR32: { case PPC::GETtlsldADDR32: {
// Transform: %r3 = GETtlsldADDR32 %r3, @sym // Transform: %r3 = GETtlsldADDR32 %r3, @sym
// Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT

View File

@ -1265,17 +1265,36 @@ def ADDItlsgdL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm6
[(set i64:$rD, [(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64; isPPC64;
// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
class GETtlsADDRPseudo <string asmstr> : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
asmstr,
[(set i64:$rD,
(PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
class GETtlsldADDRPseudo <string asmstr> : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
asmstr,
[(set i64:$rD,
(PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1 in {
// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
// explicitly defined when this op is created, so not mentioned here. // explicitly defined when this op is created, so not mentioned here.
// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be // This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be
// correct because the branch select pass is relying on it. // correct because the branch select pass is relying on it.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8, let Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7], Size = 8 in
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in def GETtlsADDR : GETtlsADDRPseudo <"#GETtlsADDR">;
def GETtlsADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), let Defs = [X0,X2,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7], Size = 8 in
"#GETtlsADDR", def GETtlsADDRPCREL : GETtlsADDRPseudo <"#GETtlsADDRPCREL">;
[(set i64:$rD,
(PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, // LR8 is a true define, while the rest of the Defs are clobbers. X3 is
isPPC64; // explicitly defined when this op is created, so not mentioned here.
let Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
def GETtlsldADDR : GETtlsldADDRPseudo <"#GETtlsldADDR">;
let Defs = [X0,X2,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
def GETtlsldADDRPCREL : GETtlsldADDRPseudo <"#GETtlsldADDRPCREL">;
}
// Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8 // Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8
// are true defines while the rest of the Defs are clobbers. // are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
@ -1299,15 +1318,6 @@ def ADDItlsldL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm6
[(set i64:$rD, [(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64; isPPC64;
// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
def GETtlsldADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsldADDR",
[(set i64:$rD,
(PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
// Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8 // Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8
// are true defines, while the rest of the Defs are clobbers. // are true defines, while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,

View File

@ -111,8 +111,8 @@ protected:
Opc1 = PPC::PADDI8pc; Opc1 = PPC::PADDI8pc;
Opc2 = MI.getOperand(2).getTargetFlags() == Opc2 = MI.getOperand(2).getTargetFlags() ==
PPCII::MO_GOT_TLSGD_PCREL_FLAG PPCII::MO_GOT_TLSGD_PCREL_FLAG
? PPC::GETtlsADDR ? PPC::GETtlsADDRPCREL
: PPC::GETtlsldADDR; : PPC::GETtlsldADDRPCREL;
} }
// We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr

View File

@ -0,0 +1,25 @@
; RUN: llc -verify-machineinstrs -mtriple="powerpc64le-unknown-linux-gnu" \
; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 -relocation-model=pic < %s | FileCheck %s
%0 = type { i32 (...)**, %0* }
@x = external dso_local thread_local unnamed_addr global %0*, align 8
define void @test(i8* %arg) {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK: std r30, -16(r1)
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: paddi r3, 0, x@got@tlsld@pcrel, 1
; CHECK-NEXT: bl __tls_get_addr@notoc(x@tlsld)
; CHECK-NEXT: paddi r3, r3, x@DTPREL, 0
; CHECK-NEXT: std r30, 0(r3)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1)
; CHECK-NEXT: mtlr r0
entry:
store i8* %arg, i8** bitcast (%0** @x to i8**), align 8
ret void
}