1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

[PowerPC] Optimize TLS initial-exec sequence to use X-Form loads/stores

This patch adds new load/store instructions for integer scalar types
which can be used for X-Form when fed by add with an @tls relocation.

Differential Revision: https://reviews.llvm.org/D43315

llvm-svn: 327635
This commit is contained in:
Zaara Syeda 2018-03-15 15:34:41 +00:00
parent 9dafbecd2e
commit 840541d258
3 changed files with 324 additions and 2 deletions

View File

@ -101,6 +101,11 @@ static cl::opt<bool> EnableBranchHint(
cl::desc("Enable static hinting of branches on ppc"), cl::desc("Enable static hinting of branches on ppc"),
cl::Hidden); cl::Hidden);
static cl::opt<bool> EnableTLSOpt(
"ppc-tls-opt", cl::init(true),
cl::desc("Enable tls optimization peephole"),
cl::Hidden);
enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
@ -199,6 +204,14 @@ namespace {
bool tryBitPermutation(SDNode *N); bool tryBitPermutation(SDNode *N);
bool tryIntCompareInGPR(SDNode *N); bool tryIntCompareInGPR(SDNode *N);
// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
// an X-Form load instruction with the offset being a relocation coming from
// the PPCISD::ADD_TLS.
bool tryTLSXFormLoad(LoadSDNode *N);
// tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
// an X-Form store instruction with the offset being a relocation coming from
// the PPCISD::ADD_TLS.
bool tryTLSXFormStore(StoreSDNode *N);
/// SelectCC - Select a comparison of the specified values with the /// SelectCC - Select a comparison of the specified values with the
/// specified condition code, returning the CR# of the expression. /// specified condition code, returning the CR# of the expression.
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
@ -582,6 +595,90 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
return false; return false;
} }
bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
SDValue Base = ST->getBasePtr();
if (Base.getOpcode() != PPCISD::ADD_TLS)
return false;
SDValue Offset = ST->getOffset();
if (!Offset.isUndef())
return false;
SDLoc dl(ST);
EVT MemVT = ST->getMemoryVT();
EVT RegVT = ST->getValue().getValueType();
unsigned Opcode;
switch (MemVT.getSimpleVT().SimpleTy) {
default:
return false;
case MVT::i8: {
Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
break;
}
case MVT::i16: {
Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
break;
}
case MVT::i32: {
Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
break;
}
case MVT::i64: {
Opcode = PPC::STDXTLS;
break;
}
}
SDValue Chain = ST->getChain();
SDVTList VTs = ST->getVTList();
SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
Chain};
SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
transferMemOperands(ST, MN);
ReplaceNode(ST, MN);
return true;
}
bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
SDValue Base = LD->getBasePtr();
if (Base.getOpcode() != PPCISD::ADD_TLS)
return false;
SDValue Offset = LD->getOffset();
if (!Offset.isUndef())
return false;
SDLoc dl(LD);
EVT MemVT = LD->getMemoryVT();
EVT RegVT = LD->getValueType(0);
unsigned Opcode;
switch (MemVT.getSimpleVT().SimpleTy) {
default:
return false;
case MVT::i8: {
Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
break;
}
case MVT::i16: {
Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
break;
}
case MVT::i32: {
Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
break;
}
case MVT::i64: {
Opcode = PPC::LDXTLS;
break;
}
}
SDValue Chain = LD->getChain();
SDVTList VTs = LD->getVTList();
SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
transferMemOperands(LD, MN);
ReplaceNode(LD, MN);
return true;
}
/// Turn an or of two masked values into the rotate left word immediate then /// Turn an or of two masked values into the rotate left word immediate then
/// mask insert (rlwimi) instruction. /// mask insert (rlwimi) instruction.
bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
@ -3949,14 +4046,28 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
} }
} }
case ISD::STORE: {
// Change TLS initial-exec D-form stores to X-form stores.
StoreSDNode *ST = cast<StoreSDNode>(N);
if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
ST->getAddressingMode() != ISD::PRE_INC)
if (tryTLSXFormStore(ST))
return;
break;
}
case ISD::LOAD: { case ISD::LOAD: {
// Handle preincrement loads. // Handle preincrement loads.
LoadSDNode *LD = cast<LoadSDNode>(N); LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT(); EVT LoadedVT = LD->getMemoryVT();
// Normal loads are handled by code generated from the .td file. // Normal loads are handled by code generated from the .td file.
if (LD->getAddressingMode() != ISD::PRE_INC) if (LD->getAddressingMode() != ISD::PRE_INC) {
// Change TLS initial-exec D-form loads to X-form loads.
if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
if (tryTLSXFormLoad(LD))
return;
break; break;
}
SDValue Offset = LD->getOffset(); SDValue Offset = LD->getOffset();
if (Offset.getOpcode() == ISD::TargetConstant || if (Offset.getOpcode() == ISD::TargetConstant ||

View File

@ -499,7 +499,49 @@ defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc_nox0:$rA, tlsreg:$rB), def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc_nox0:$rA, tlsreg:$rB),
"add $rT, $rA, $rB", IIC_IntSimple, "add $rT, $rA, $rB", IIC_IntSimple,
[(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
let mayLoad = 1 in {
def LBZXTLS : XForm_1<31, 87, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
def LHZXTLS : XForm_1<31, 279, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
def LWZXTLS : XForm_1<31, 23, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
def LDXTLS : XForm_1<31, 21, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"ldx $rD, $rA, $rB", IIC_LdStLD, []>, isPPC64;
def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
"lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
}
let mayStore = 1 in {
def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"stbx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
def STHXTLS : XForm_8<31, 407, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"sthx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
def STWXTLS : XForm_8<31, 151, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"stwx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
def STDXTLS : XForm_8<31, 149, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"stdx $rS, $rA, $rB", IIC_LdStSTD, []>, isPPC64,
PPC970_DGroup_Cracked;
def STBXTLS_32 : XForm_8<31, 215, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"stbx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
def STHXTLS_32 : XForm_8<31, 407, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"sthx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
"stwx $rS, $rA, $rB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
}
let isCommutable = 1 in let isCommutable = 1 in
defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"addc", "$rT, $rA, $rB", IIC_IntGeneral, "addc", "$rT, $rA, $rB", IIC_IntGeneral,

View File

@ -0,0 +1,169 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK
@var_char = external thread_local local_unnamed_addr global i8, align 1
@var_short = external thread_local local_unnamed_addr global i16, align 2
@var_int = external thread_local local_unnamed_addr global i32, align 4
@var_long_long = external thread_local local_unnamed_addr global i64, align 8
define dso_local zeroext i8 @test_char_one() {
; CHECK-LABEL: test_char_one:
; CHECK: # %bb.0: # %entry
; CHECK: addis 3, 2, var_char@got@tprel@ha
; CHECK-NEXT: ld 3, var_char@got@tprel@l(3)
; CHECK-NEXT: lbzx 3, 3, var_char@tls
entry:
%0 = load i8, i8* @var_char, align 1, !tbaa !4
ret i8 %0
}
define dso_local void @test_char_two(i32 signext %a) {
; CHECK-LABEL: test_char_two:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_char@got@tprel@ha
; CHECK-NEXT: ld 4, var_char@got@tprel@l(4)
; CHECK-NEXT: stbx 3, 4, var_char@tls
entry:
%conv = trunc i32 %a to i8
store i8 %conv, i8* @var_char, align 1, !tbaa !4
ret void
}
define dso_local zeroext i8 @test_char_three(i8 zeroext %a) {
; CHECK-LABEL: test_char_three:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_char@got@tprel@ha
; CHECK-NEXT: ld 4, var_char@got@tprel@l(4)
; CHECK-NEXT: lbzx 5, 4, var_char@tls
; CHECK: stbx {{[0-9]+}}, 4, var_char@tls
entry:
%0 = load i8, i8* @var_char, align 1, !tbaa !4
%add = add i8 %0, %a
store i8 %add, i8* @var_char, align 1, !tbaa !4
ret i8 %add
}
define dso_local signext i16 @test_short_one() {
; CHECK-LABEL: test_short_one:
; CHECK: # %bb.0: # %entry
; CHECK: addis 3, 2, var_short@got@tprel@ha
; CHECK-NEXT: ld 3, var_short@got@tprel@l(3)
; CHECK-NEXT: lhzx 3, 3, var_short@tls
entry:
%0 = load i16, i16* @var_short, align 2, !tbaa !7
ret i16 %0
}
define dso_local void @test_short_two(i32 signext %a) {
; CHECK-LABEL: test_short_two:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_short@got@tprel@ha
; CHECK-NEXT: ld 4, var_short@got@tprel@l(4)
; CHECK-NEXT: sthx 3, 4, var_short@tls
entry:
%conv = trunc i32 %a to i16
store i16 %conv, i16* @var_short, align 2, !tbaa !7
ret void
}
define dso_local signext i16 @test_short_three(i16 signext %a) {
; CHECK-LABEL: test_short_three:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_short@got@tprel@ha
; CHECK-NEXT: ld 4, var_short@got@tprel@l(4)
; CHECK-NEXT: lhzx 5, 4, var_short@tls
; CHECK: sthx {{[0-9]+}}, 4, var_short@tls
entry:
%0 = load i16, i16* @var_short, align 2, !tbaa !7
%add = add i16 %0, %a
store i16 %add, i16* @var_short, align 2, !tbaa !7
ret i16 %add
}
define dso_local signext i32 @test_int_one() {
; CHECK-LABEL: test_int_one:
; CHECK: # %bb.0: # %entry
; CHECK: addis 3, 2, var_int@got@tprel@ha
; CHECK-NEXT: ld 3, var_int@got@tprel@l(3)
; CHECK-NEXT: lwzx 3, 3, var_int@tls
entry:
%0 = load i32, i32* @var_int, align 4, !tbaa !9
ret i32 %0
}
define dso_local void @test_int_two(i32 signext %a) {
; CHECK-LABEL: test_int_two:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_int@got@tprel@ha
; CHECK-NEXT: ld 4, var_int@got@tprel@l(4)
; CHECK-NEXT: stwx 3, 4, var_int@tls
entry:
store i32 %a, i32* @var_int, align 4, !tbaa !9
ret void
}
define dso_local signext i32 @test_int_three(i32 signext %a) {
; CHECK-LABEL: test_int_three:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_int@got@tprel@ha
; CHECK-NEXT: ld 4, var_int@got@tprel@l(4)
; CHECK-NEXT: lwzx 5, 4, var_int@tls
; CHECK: stwx {{[0-9]+}}, 4, var_int@tls
entry:
%0 = load i32, i32* @var_int, align 4, !tbaa !9
%add = add nsw i32 %0, %a
store i32 %add, i32* @var_int, align 4, !tbaa !9
ret i32 %add
}
define dso_local i64 @test_longlong_one() {
; CHECK-LABEL: test_longlong_one:
; CHECK: # %bb.0: # %entry
; CHECK: addis 3, 2, var_long_long@got@tprel@ha
; CHECK-NEXT: ld 3, var_long_long@got@tprel@l(3)
; CHECK-NEXT: ldx 3, 3, var_long_long@tls
entry:
%0 = load i64, i64* @var_long_long, align 8, !tbaa !11
ret i64 %0
}
define dso_local void @test_longlong_two(i32 signext %a) {
; CHECK-LABEL: test_longlong_two:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_long_long@got@tprel@ha
; CHECK-NEXT: ld 4, var_long_long@got@tprel@l(4)
; CHECK-NEXT: stdx 3, 4, var_long_long@tls
entry:
%conv = sext i32 %a to i64
store i64 %conv, i64* @var_long_long, align 8, !tbaa !11
ret void
}
define dso_local i64 @test_longlong_three(i64 %a) {
; CHECK-LABEL: test_longlong_three:
; CHECK: # %bb.0: # %entry
; CHECK: addis 4, 2, var_long_long@got@tprel@ha
; CHECK-NEXT: ld 4, var_long_long@got@tprel@l(4)
; CHECK-NEXT: ldx 5, 4, var_long_long@tls
; CHECK: stdx {{[0-9]+}}, 4, var_long_long@tls
entry:
%0 = load i64, i64* @var_long_long, align 8, !tbaa !11
%add = add nsw i64 %0, %a
store i64 %add, i64* @var_long_long, align 8, !tbaa !11
ret i64 %add
}
!llvm.module.flags = !{!0, !1, !2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 1}
!2 = !{i32 7, !"PIE Level", i32 1}
!4 = !{!5, !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}
!7 = !{!8, !8, i64 0}
!8 = !{!"short", !5, i64 0}
!9 = !{!10, !10, i64 0}
!10 = !{!"int", !5, i64 0}
!11 = !{!12, !12, i64 0}
!12 = !{!"long long", !5, i64 0}