mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[PowerPC] Optimize TLS initial-exec sequence to use X-Form loads/stores
This patch adds new load/store instructions for integer scalar types which can be used for X-Form when fed by add with an @tls relocation. Differential Revision: https://reviews.llvm.org/D43315 llvm-svn: 327635
This commit is contained in:
parent
9dafbecd2e
commit
840541d258
@ -101,6 +101,11 @@ static cl::opt<bool> EnableBranchHint(
|
||||
cl::desc("Enable static hinting of branches on ppc"),
|
||||
cl::Hidden);
|
||||
|
||||
static cl::opt<bool> EnableTLSOpt(
|
||||
"ppc-tls-opt", cl::init(true),
|
||||
cl::desc("Enable tls optimization peephole"),
|
||||
cl::Hidden);
|
||||
|
||||
enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
|
||||
ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
|
||||
ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
|
||||
@ -199,6 +204,14 @@ namespace {
|
||||
bool tryBitPermutation(SDNode *N);
|
||||
bool tryIntCompareInGPR(SDNode *N);
|
||||
|
||||
// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
|
||||
// an X-Form load instruction with the offset being a relocation coming from
|
||||
// the PPCISD::ADD_TLS.
|
||||
bool tryTLSXFormLoad(LoadSDNode *N);
|
||||
// tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
|
||||
// an X-Form store instruction with the offset being a relocation coming from
|
||||
// the PPCISD::ADD_TLS.
|
||||
bool tryTLSXFormStore(StoreSDNode *N);
|
||||
/// SelectCC - Select a comparison of the specified values with the
|
||||
/// specified condition code, returning the CR# of the expression.
|
||||
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||
@ -582,6 +595,90 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
|
||||
SDValue Base = ST->getBasePtr();
|
||||
if (Base.getOpcode() != PPCISD::ADD_TLS)
|
||||
return false;
|
||||
SDValue Offset = ST->getOffset();
|
||||
if (!Offset.isUndef())
|
||||
return false;
|
||||
|
||||
SDLoc dl(ST);
|
||||
EVT MemVT = ST->getMemoryVT();
|
||||
EVT RegVT = ST->getValue().getValueType();
|
||||
|
||||
unsigned Opcode;
|
||||
switch (MemVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return false;
|
||||
case MVT::i8: {
|
||||
Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
|
||||
break;
|
||||
}
|
||||
case MVT::i16: {
|
||||
Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
|
||||
break;
|
||||
}
|
||||
case MVT::i32: {
|
||||
Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
|
||||
break;
|
||||
}
|
||||
case MVT::i64: {
|
||||
Opcode = PPC::STDXTLS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
SDValue Chain = ST->getChain();
|
||||
SDVTList VTs = ST->getVTList();
|
||||
SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
|
||||
Chain};
|
||||
SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
|
||||
transferMemOperands(ST, MN);
|
||||
ReplaceNode(ST, MN);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
|
||||
SDValue Base = LD->getBasePtr();
|
||||
if (Base.getOpcode() != PPCISD::ADD_TLS)
|
||||
return false;
|
||||
SDValue Offset = LD->getOffset();
|
||||
if (!Offset.isUndef())
|
||||
return false;
|
||||
|
||||
SDLoc dl(LD);
|
||||
EVT MemVT = LD->getMemoryVT();
|
||||
EVT RegVT = LD->getValueType(0);
|
||||
unsigned Opcode;
|
||||
switch (MemVT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return false;
|
||||
case MVT::i8: {
|
||||
Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
|
||||
break;
|
||||
}
|
||||
case MVT::i16: {
|
||||
Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
|
||||
break;
|
||||
}
|
||||
case MVT::i32: {
|
||||
Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
|
||||
break;
|
||||
}
|
||||
case MVT::i64: {
|
||||
Opcode = PPC::LDXTLS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
SDValue Chain = LD->getChain();
|
||||
SDVTList VTs = LD->getVTList();
|
||||
SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
|
||||
SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
|
||||
transferMemOperands(LD, MN);
|
||||
ReplaceNode(LD, MN);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Turn an or of two masked values into the rotate left word immediate then
|
||||
/// mask insert (rlwimi) instruction.
|
||||
bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
|
||||
@ -3949,14 +4046,28 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
case ISD::STORE: {
|
||||
// Change TLS initial-exec D-form stores to X-form stores.
|
||||
StoreSDNode *ST = cast<StoreSDNode>(N);
|
||||
if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
|
||||
ST->getAddressingMode() != ISD::PRE_INC)
|
||||
if (tryTLSXFormStore(ST))
|
||||
return;
|
||||
break;
|
||||
}
|
||||
case ISD::LOAD: {
|
||||
// Handle preincrement loads.
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||
EVT LoadedVT = LD->getMemoryVT();
|
||||
|
||||
// Normal loads are handled by code generated from the .td file.
|
||||
if (LD->getAddressingMode() != ISD::PRE_INC)
|
||||
if (LD->getAddressingMode() != ISD::PRE_INC) {
|
||||
// Change TLS initial-exec D-form loads to X-form loads.
|
||||
if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
|
||||
if (tryTLSXFormLoad(LD))
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
SDValue Offset = LD->getOffset();
|
||||
if (Offset.getOpcode() == ISD::TargetConstant ||
|
||||
|
@ -499,7 +499,49 @@ defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
|
||||
def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc_nox0:$rA, tlsreg:$rB),
|
||||
"add $rT, $rA, $rB", IIC_IntSimple,
|
||||
[(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
|
||||
|
||||
let mayLoad = 1 in {
|
||||
def LBZXTLS : XForm_1<31, 87, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
|
||||
def LHZXTLS : XForm_1<31, 279, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
|
||||
def LWZXTLS : XForm_1<31, 23, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
|
||||
def LDXTLS : XForm_1<31, 21, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"ldx $rD, $rA, $rB", IIC_LdStLD, []>, isPPC64;
|
||||
def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
|
||||
def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
|
||||
def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
|
||||
|
||||
}
|
||||
|
||||
let mayStore = 1 in {
|
||||
def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"stbx $rS, $rA, $rB", IIC_LdStStore, []>,
|
||||
PPC970_DGroup_Cracked;
|
||||
def STHXTLS : XForm_8<31, 407, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"sthx $rS, $rA, $rB", IIC_LdStStore, []>,
|
||||
PPC970_DGroup_Cracked;
|
||||
def STWXTLS : XForm_8<31, 151, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"stwx $rS, $rA, $rB", IIC_LdStStore, []>,
|
||||
PPC970_DGroup_Cracked;
|
||||
def STDXTLS : XForm_8<31, 149, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"stdx $rS, $rA, $rB", IIC_LdStSTD, []>, isPPC64,
|
||||
PPC970_DGroup_Cracked;
|
||||
def STBXTLS_32 : XForm_8<31, 215, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"stbx $rS, $rA, $rB", IIC_LdStStore, []>,
|
||||
PPC970_DGroup_Cracked;
|
||||
def STHXTLS_32 : XForm_8<31, 407, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"sthx $rS, $rA, $rB", IIC_LdStStore, []>,
|
||||
PPC970_DGroup_Cracked;
|
||||
def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
|
||||
"stwx $rS, $rA, $rB", IIC_LdStStore, []>,
|
||||
PPC970_DGroup_Cracked;
|
||||
|
||||
}
|
||||
|
||||
let isCommutable = 1 in
|
||||
defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
|
||||
"addc", "$rT, $rA, $rB", IIC_IntGeneral,
|
||||
|
169
test/CodeGen/PowerPC/tls-pie-xform.ll
Normal file
169
test/CodeGen/PowerPC/tls-pie-xform.ll
Normal file
@ -0,0 +1,169 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK
|
||||
|
||||
@var_char = external thread_local local_unnamed_addr global i8, align 1
|
||||
@var_short = external thread_local local_unnamed_addr global i16, align 2
|
||||
@var_int = external thread_local local_unnamed_addr global i32, align 4
|
||||
@var_long_long = external thread_local local_unnamed_addr global i64, align 8
|
||||
|
||||
define dso_local zeroext i8 @test_char_one() {
|
||||
; CHECK-LABEL: test_char_one:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 3, 2, var_char@got@tprel@ha
|
||||
; CHECK-NEXT: ld 3, var_char@got@tprel@l(3)
|
||||
; CHECK-NEXT: lbzx 3, 3, var_char@tls
|
||||
entry:
|
||||
%0 = load i8, i8* @var_char, align 1, !tbaa !4
|
||||
ret i8 %0
|
||||
}
|
||||
|
||||
define dso_local void @test_char_two(i32 signext %a) {
|
||||
; CHECK-LABEL: test_char_two:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 4, 2, var_char@got@tprel@ha
|
||||
; CHECK-NEXT: ld 4, var_char@got@tprel@l(4)
|
||||
; CHECK-NEXT: stbx 3, 4, var_char@tls
|
||||
entry:
|
||||
%conv = trunc i32 %a to i8
|
||||
store i8 %conv, i8* @var_char, align 1, !tbaa !4
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local zeroext i8 @test_char_three(i8 zeroext %a) {
|
||||
; CHECK-LABEL: test_char_three:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 4, 2, var_char@got@tprel@ha
|
||||
; CHECK-NEXT: ld 4, var_char@got@tprel@l(4)
|
||||
; CHECK-NEXT: lbzx 5, 4, var_char@tls
|
||||
; CHECK: stbx {{[0-9]+}}, 4, var_char@tls
|
||||
entry:
|
||||
%0 = load i8, i8* @var_char, align 1, !tbaa !4
|
||||
%add = add i8 %0, %a
|
||||
store i8 %add, i8* @var_char, align 1, !tbaa !4
|
||||
ret i8 %add
|
||||
}
|
||||
|
||||
define dso_local signext i16 @test_short_one() {
|
||||
; CHECK-LABEL: test_short_one:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 3, 2, var_short@got@tprel@ha
|
||||
; CHECK-NEXT: ld 3, var_short@got@tprel@l(3)
|
||||
; CHECK-NEXT: lhzx 3, 3, var_short@tls
|
||||
entry:
|
||||
%0 = load i16, i16* @var_short, align 2, !tbaa !7
|
||||
ret i16 %0
|
||||
}
|
||||
|
||||
define dso_local void @test_short_two(i32 signext %a) {
|
||||
; CHECK-LABEL: test_short_two:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 4, 2, var_short@got@tprel@ha
|
||||
; CHECK-NEXT: ld 4, var_short@got@tprel@l(4)
|
||||
; CHECK-NEXT: sthx 3, 4, var_short@tls
|
||||
entry:
|
||||
%conv = trunc i32 %a to i16
|
||||
store i16 %conv, i16* @var_short, align 2, !tbaa !7
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local signext i16 @test_short_three(i16 signext %a) {
|
||||
; CHECK-LABEL: test_short_three:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 4, 2, var_short@got@tprel@ha
|
||||
; CHECK-NEXT: ld 4, var_short@got@tprel@l(4)
|
||||
; CHECK-NEXT: lhzx 5, 4, var_short@tls
|
||||
; CHECK: sthx {{[0-9]+}}, 4, var_short@tls
|
||||
entry:
|
||||
%0 = load i16, i16* @var_short, align 2, !tbaa !7
|
||||
%add = add i16 %0, %a
|
||||
store i16 %add, i16* @var_short, align 2, !tbaa !7
|
||||
ret i16 %add
|
||||
}
|
||||
|
||||
define dso_local signext i32 @test_int_one() {
|
||||
; CHECK-LABEL: test_int_one:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 3, 2, var_int@got@tprel@ha
|
||||
; CHECK-NEXT: ld 3, var_int@got@tprel@l(3)
|
||||
; CHECK-NEXT: lwzx 3, 3, var_int@tls
|
||||
entry:
|
||||
%0 = load i32, i32* @var_int, align 4, !tbaa !9
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
define dso_local void @test_int_two(i32 signext %a) {
|
||||
; CHECK-LABEL: test_int_two:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 4, 2, var_int@got@tprel@ha
|
||||
; CHECK-NEXT: ld 4, var_int@got@tprel@l(4)
|
||||
; CHECK-NEXT: stwx 3, 4, var_int@tls
|
||||
entry:
|
||||
store i32 %a, i32* @var_int, align 4, !tbaa !9
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local signext i32 @test_int_three(i32 signext %a) {
|
||||
; CHECK-LABEL: test_int_three:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 4, 2, var_int@got@tprel@ha
|
||||
; CHECK-NEXT: ld 4, var_int@got@tprel@l(4)
|
||||
; CHECK-NEXT: lwzx 5, 4, var_int@tls
|
||||
; CHECK: stwx {{[0-9]+}}, 4, var_int@tls
|
||||
entry:
|
||||
%0 = load i32, i32* @var_int, align 4, !tbaa !9
|
||||
%add = add nsw i32 %0, %a
|
||||
store i32 %add, i32* @var_int, align 4, !tbaa !9
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
define dso_local i64 @test_longlong_one() {
|
||||
; CHECK-LABEL: test_longlong_one:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 3, 2, var_long_long@got@tprel@ha
|
||||
; CHECK-NEXT: ld 3, var_long_long@got@tprel@l(3)
|
||||
; CHECK-NEXT: ldx 3, 3, var_long_long@tls
|
||||
entry:
|
||||
%0 = load i64, i64* @var_long_long, align 8, !tbaa !11
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define dso_local void @test_longlong_two(i32 signext %a) {
|
||||
; CHECK-LABEL: test_longlong_two:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 4, 2, var_long_long@got@tprel@ha
|
||||
; CHECK-NEXT: ld 4, var_long_long@got@tprel@l(4)
|
||||
; CHECK-NEXT: stdx 3, 4, var_long_long@tls
|
||||
entry:
|
||||
%conv = sext i32 %a to i64
|
||||
store i64 %conv, i64* @var_long_long, align 8, !tbaa !11
|
||||
ret void
|
||||
}
|
||||
|
||||
define dso_local i64 @test_longlong_three(i64 %a) {
|
||||
; CHECK-LABEL: test_longlong_three:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK: addis 4, 2, var_long_long@got@tprel@ha
|
||||
; CHECK-NEXT: ld 4, var_long_long@got@tprel@l(4)
|
||||
; CHECK-NEXT: ldx 5, 4, var_long_long@tls
|
||||
; CHECK: stdx {{[0-9]+}}, 4, var_long_long@tls
|
||||
entry:
|
||||
%0 = load i64, i64* @var_long_long, align 8, !tbaa !11
|
||||
%add = add nsw i64 %0, %a
|
||||
store i64 %add, i64* @var_long_long, align 8, !tbaa !11
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0, !1, !2}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{i32 7, !"PIC Level", i32 1}
|
||||
!2 = !{i32 7, !"PIE Level", i32 1}
|
||||
!4 = !{!5, !5, i64 0}
|
||||
!5 = !{!"omnipotent char", !6, i64 0}
|
||||
!6 = !{!"Simple C/C++ TBAA"}
|
||||
!7 = !{!8, !8, i64 0}
|
||||
!8 = !{!"short", !5, i64 0}
|
||||
!9 = !{!10, !10, i64 0}
|
||||
!10 = !{!"int", !5, i64 0}
|
||||
!11 = !{!12, !12, i64 0}
|
||||
!12 = !{!"long long", !5, i64 0}
|
Loading…
Reference in New Issue
Block a user