1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[CXX_FAST_TLS] fix issues with O0 on ARM, AArch64 and X86.

Since at O0, explicit copies via SplitCSR may not be removed even if
they are unnecessary, we choose not to use SplitCSR at O0.

llvm-svn: 263855
This commit is contained in:
Manman Ren 2016-03-18 23:38:49 +00:00
parent a213e383a5
commit 73603a411f
6 changed files with 221 additions and 2 deletions

View File

@ -473,7 +473,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// We split CSR if the target supports it for the given function
// and the function has only return exits.
if (TLI->supportSplitCSR(MF)) {
if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) {
FuncInfo->SplitCSR = true;
// Collect all the return blocks.

View File

@ -2469,6 +2469,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::PreserveMost:
case CallingConv::CXX_FAST_TLS:
if (!Subtarget->isTargetDarwin())
return CC_AArch64_AAPCS;
return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;

View File

@ -1847,6 +1847,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
}
// Fallthrough
case CallingConv::C:
case CallingConv::CXX_FAST_TLS:
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() &&

View File

@ -3,6 +3,8 @@
; Shrink wrapping currently does not kick in because we have a TLS CALL
; in the entry block and it will clobber the link register.
; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck --check-prefix=CHECK-O0 %s
%struct.S = type { i8 }
@sg = internal thread_local global %struct.S zeroinitializer, align 1
@ -76,6 +78,52 @@ __tls_init.exit:
; CHECK-NOT: ldp d29, d28
; CHECK-NOT: ldp d31, d30
; CHECK-O0-LABEL: _ZTW2sg
; CHECK-O0: stp d31, d30
; CHECK-O0: stp d29, d28
; CHECK-O0: stp d27, d26
; CHECK-O0: stp d25, d24
; CHECK-O0: stp d23, d22
; CHECK-O0: stp d21, d20
; CHECK-O0: stp d19, d18
; CHECK-O0: stp d17, d16
; CHECK-O0: stp d7, d6
; CHECK-O0: stp d5, d4
; CHECK-O0: stp d3, d2
; CHECK-O0: stp d1, d0
; CHECK-O0: stp x14, x13
; CHECK-O0: stp x12, x11
; CHECK-O0: stp x10, x9
; CHECK-O0: stp x8, x7
; CHECK-O0: stp x6, x5
; CHECK-O0: stp x4, x3
; CHECK-O0: stp x2, x1
; CHECK-O0: blr
; CHECK-O0: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]]
; CHECK-O0: blr
; CHECK-O0: tlv_atexit
; CHECK-O0: [[BB_end]]:
; CHECK-O0: blr
; CHECK-O0: ldp x2, x1
; CHECK-O0: ldp x4, x3
; CHECK-O0: ldp x6, x5
; CHECK-O0: ldp x8, x7
; CHECK-O0: ldp x10, x9
; CHECK-O0: ldp x12, x11
; CHECK-O0: ldp x14, x13
; CHECK-O0: ldp d1, d0
; CHECK-O0: ldp d3, d2
; CHECK-O0: ldp d5, d4
; CHECK-O0: ldp d7, d6
; CHECK-O0: ldp d17, d16
; CHECK-O0: ldp d19, d18
; CHECK-O0: ldp d21, d20
; CHECK-O0: ldp d23, d22
; CHECK-O0: ldp d25, d24
; CHECK-O0: ldp d27, d26
; CHECK-O0: ldp d29, d28
; CHECK-O0: ldp d31, d30
; CHECK-LABEL: _ZTW4sum1
; CHECK-NOT: stp d31, d30
; CHECK-NOT: stp d29, d28
@ -98,6 +146,64 @@ __tls_init.exit:
; CHECK-NOT: stp x4, x3
; CHECK-NOT: stp x2, x1
; CHECK: blr
; CHECK-O0-LABEL: _ZTW4sum1
; CHECK-O0-NOT: vstr
; CHECK-O0-NOT: vldr
define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
ret i32* @sum1
}
; Make sure at O0, we don't generate spilling/reloading of the CSRs.
; CHECK-O0-LABEL: tls_test2
; CHECK-O0-NOT: stp d31, d30
; CHECK-O0-NOT: stp d29, d28
; CHECK-O0-NOT: stp d27, d26
; CHECK-O0-NOT: stp d25, d24
; CHECK-O0-NOT: stp d23, d22
; CHECK-O0-NOT: stp d21, d20
; CHECK-O0-NOT: stp d19, d18
; CHECK-O0-NOT: stp d17, d16
; CHECK-O0-NOT: stp d7, d6
; CHECK-O0-NOT: stp d5, d4
; CHECK-O0-NOT: stp d3, d2
; CHECK-O0-NOT: stp d1, d0
; CHECK-O0-NOT: stp x20, x19
; CHECK-O0-NOT: stp x14, x13
; CHECK-O0-NOT: stp x12, x11
; CHECK-O0-NOT: stp x10, x9
; CHECK-O0-NOT: stp x8, x7
; CHECK-O0-NOT: stp x6, x5
; CHECK-O0-NOT: stp x4, x3
; CHECK-O0-NOT: stp x2, x1
; CHECK-O0: bl {{.*}}tls_helper
; CHECK-O0-NOT: ldp x2, x1
; CHECK-O0-NOT: ldp x4, x3
; CHECK-O0-NOT: ldp x6, x5
; CHECK-O0-NOT: ldp x8, x7
; CHECK-O0-NOT: ldp x10, x9
; CHECK-O0-NOT: ldp x12, x11
; CHECK-O0-NOT: ldp x14, x13
; CHECK-O0-NOT: ldp x20, x19
; CHECK-O0-NOT: ldp d1, d0
; CHECK-O0-NOT: ldp d3, d2
; CHECK-O0-NOT: ldp d5, d4
; CHECK-O0-NOT: ldp d7, d6
; CHECK-O0-NOT: ldp d17, d16
; CHECK-O0-NOT: ldp d19, d18
; CHECK-O0-NOT: ldp d21, d20
; CHECK-O0-NOT: ldp d23, d22
; CHECK-O0-NOT: ldp d25, d24
; CHECK-O0-NOT: ldp d27, d26
; CHECK-O0-NOT: ldp d29, d28
; CHECK-O0-NOT: ldp d31, d30
; CHECK-O0: ret
%class.C = type { i32 }
@tC = internal thread_local global %class.C zeroinitializer, align 4
declare cxx_fast_tlscc void @tls_helper()
define cxx_fast_tlscc %class.C* @tls_test2() #1 {
call cxx_fast_tlscc void @tls_helper()
ret %class.C* @tC
}
attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
attributes #1 = { nounwind }

View File

@ -3,6 +3,9 @@
; RUN: llc < %s -mtriple=armv7-apple-ios8.0 | FileCheck %s
; RUN: llc < %s -mtriple=armv7-apple-ios8.0 -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s
; RUN: llc < %s -mtriple=armv7k-apple-watchos2.0 -O0 | FileCheck --check-prefix=CHECK-O0 --check-prefix=WATCH-O0 %s
; RUN: llc < %s -mtriple=armv7-apple-ios8.0 -O0 | FileCheck --check-prefix=CHECK-O0 --check-prefix=IOS-O0 %s
%struct.S = type { i8 }
@sg = internal thread_local global %struct.S zeroinitializer, align 1
@ -10,6 +13,9 @@
@__tls_guard = internal thread_local unnamed_addr global i1 false
@sum1 = internal thread_local global i32 0, align 4
%class.C = type { i32 }
@tC = internal thread_local global %class.C zeroinitializer, align 4
declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
@ -36,7 +42,7 @@ __tls_init.exit:
; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
; CHECK: blx
; CHECK: bne [[BB_end:.?LBB0_[0-9]+]]
; CHECK; blx
; CHECK: blx
; CHECK: tlv_atexit
; CHECK: [[BB_end]]:
; CHECK: blx
@ -46,12 +52,54 @@ __tls_init.exit:
; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc}
; CHECK: pop {lr}
; CHECK-O0-LABEL: _ZTW2sg
; WATCH-O0: push {r1, r2, r3, r6, r7, lr}
; IOS-O0: push {r1, r2, r3, r7, lr}
; CHECK-O0: push {r9, r12}
; CHECK-O0: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
; CHECK-O0: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
; CHECK-O0: blx
; CHECK-O0: bne [[BB_end:.?LBB0_[0-9]+]]
; CHECK-O0: blx
; CHECK-O0: tlv_atexit
; CHECK-O0: [[BB_end]]:
; CHECK-O0: blx
; CHECK-O0: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
; CHECK-O0: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
; CHECK-O0: pop {r9, r12}
; WATCH-O0: pop {r1, r2, r3, r6, r7, pc}
; IOS-O0: pop {r1, r2, r3, r7, pc}
; CHECK-LABEL: _ZTW4sum1
; CHECK-NOT: push {r1, r2, r3, r4, r7, lr}
; CHECK-NOT: push {r9, r12}
; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
; CHECK: blx
; CHECK-O0-LABEL: _ZTW4sum1
; CHECK-O0-NOT: vpush
; CHECK-O0-NOT: vstr
; CHECK-O0-NOT: vpop
; CHECK-O0-NOT: vldr
; CHECK-O0: pop
define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
ret i32* @sum1
}
; Make sure at O0, we don't generate spilling/reloading of the CSRs.
; CHECK-O0-LABEL: tls_test2
; CHECK-O0: push
; CHECK-O0-NOT: vpush
; CHECK-O0-NOT: vstr
; CHECK-O0: tls_helper
; CHECK-O0-NOT: vpop
; CHECK-O0-NOT: vldr
; CHECK-O0: pop
declare cxx_fast_tlscc void @tls_helper()
define cxx_fast_tlscc %class.C* @tls_test2() #1 {
call cxx_fast_tlscc void @tls_helper()
ret %class.C* @tC
}
attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
attributes #1 = { nounwind }

View File

@ -39,6 +39,27 @@ declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
; CHECK-NOT: popq %r9
; CHECK-NOT: popq %r10
; CHECK-NOT: popq %r11
; CHECK-O0-LABEL: _ZTW2sg
; CHECK-O0: pushq %r11
; CHECK-O0: pushq %r10
; CHECK-O0: pushq %r9
; CHECK-O0: pushq %r8
; CHECK-O0: pushq %rsi
; CHECK-O0: pushq %rdx
; CHECK-O0: pushq %rcx
; CHECK-O0: callq
; CHECK-O0: jne
; CHECK-O0: callq
; CHECK-O0: tlv_atexit
; CHECK-O0: callq
; CHECK-O0: popq %rcx
; CHECK-O0: popq %rdx
; CHECK-O0: popq %rsi
; CHECK-O0: popq %r8
; CHECK-O0: popq %r9
; CHECK-O0: popq %r10
; CHECK-O0: popq %r11
define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
%.b.i = load i1, i1* @__tls_guard, align 1
br i1 %.b.i, label %__tls_init.exit, label %init.i
@ -63,6 +84,24 @@ __tls_init.exit:
; CHECK-NOT: pushq %rcx
; CHECK-NOT: pushq %rbx
; CHECK: callq
; CHECK-O0-LABEL: _ZTW4sum1
; CHECK-O0-NOT: pushq %r11
; CHECK-O0-NOT: pushq %r10
; CHECK-O0-NOT: pushq %r9
; CHECK-O0-NOT: pushq %r8
; CHECK-O0-NOT: pushq %rsi
; CHECK-O0-NOT: pushq %rdx
; CHECK-O0-NOT: pushq %rcx
; CHECK-O0-NOT: pushq %rbx
; CHECK-O0-NOT: movq %r11
; CHECK-O0-NOT: movq %r10
; CHECK-O0-NOT: movq %r9
; CHECK-O0-NOT: movq %r8
; CHECK-O0-NOT: movq %rsi
; CHECK-O0-NOT: movq %rdx
; CHECK-O0-NOT: movq %rcx
; CHECK-O0-NOT: movq %rbx
; CHECK-O0: callq
define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
ret i32* @sum1
}
@ -76,4 +115,28 @@ define cxx_fast_tlscc i32* @_ZTW4sum2() #0 {
ret i32* @sum1
}
; Make sure at O0, we don't generate spilling/reloading of the CSRs.
; CHECK-O0-LABEL: tls_test2
; CHECK-O0-NOT: pushq %r11
; CHECK-O0-NOT: pushq %r10
; CHECK-O0-NOT: pushq %r9
; CHECK-O0-NOT: pushq %r8
; CHECK-O0-NOT: pushq %rsi
; CHECK-O0-NOT: pushq %rdx
; CHECK-O0: callq {{.*}}tls_helper
; CHECK-O0-NOT: popq %rdx
; CHECK-O0-NOT: popq %rsi
; CHECK-O0-NOT: popq %r8
; CHECK-O0-NOT: popq %r9
; CHECK-O0-NOT: popq %r10
; CHECK-O0-NOT: popq %r11
; CHECK-O0: ret
%class.C = type { i32 }
@tC = internal thread_local global %class.C zeroinitializer, align 4
declare cxx_fast_tlscc void @tls_helper()
define cxx_fast_tlscc %class.C* @tls_test2() #1 {
call cxx_fast_tlscc void @tls_helper()
ret %class.C* @tC
}
attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
attributes #1 = { nounwind }