mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-23 13:02:52 +02:00
eb996ef33b
Summary: This change extends MachineCopyPropagation to do COPY source forwarding and adds an additional run of the pass to the default pass pipeline just after register allocation. This version of this patch uses the newly added MachineOperand::isRenamable bit to avoid forwarding registers is such a way as to violate constraints that aren't captured in the Machine IR (e.g. ABI or ISA constraints). This change is a continuation of the work started in D30751. Reviewers: qcolombet, javed.absar, MatzeB, jonpa, tstellar Subscribers: tpr, mgorny, mcrosier, nhaehnle, nemanjai, jyknight, hfinkel, arsenm, inouehrs, eraman, sdardis, guyblank, fedor.sergeev, aheejin, dschuff, jfb, myatsina, llvm-commits Differential Revision: https://reviews.llvm.org/D41835 llvm-svn: 323991
368 lines
13 KiB
LLVM
368 lines
13 KiB
LLVM
; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
|
|
; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
|
|
|
|
; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
|
|
; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
|
|
|
|
declare void @bar(i32)
|
|
|
|
; Test a simple indirect call and tail call.
|
|
define void @icall_reg(void (i32)* %fp, i32 %x) #0 {
|
|
entry:
|
|
tail call void @bar(i32 %x)
|
|
tail call void %fp(i32 %x)
|
|
tail call void @bar(i32 %x)
|
|
tail call void %fp(i32 %x)
|
|
ret void
|
|
}
|
|
|
|
; X64-LABEL: icall_reg:
|
|
; X64-DAG: movq %rdi, %[[fp:[^ ]*]]
|
|
; X64-DAG: movl %esi, %[[x:[^ ]*]]
|
|
; X64: movl %esi, %edi
|
|
; X64: callq bar
|
|
; X64-DAG: movl %[[x]], %edi
|
|
; X64-DAG: movq %[[fp]], %r11
|
|
; X64: callq __llvm_retpoline_r11
|
|
; X64: movl %[[x]], %edi
|
|
; X64: callq bar
|
|
; X64-DAG: movl %[[x]], %edi
|
|
; X64-DAG: movq %[[fp]], %r11
|
|
; X64: jmp __llvm_retpoline_r11 # TAILCALL
|
|
|
|
; X64FAST-LABEL: icall_reg:
|
|
; X64FAST: callq bar
|
|
; X64FAST: callq __llvm_retpoline_r11
|
|
; X64FAST: callq bar
|
|
; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
|
|
|
|
; X86-LABEL: icall_reg:
|
|
; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]]
|
|
; X86-DAG: movl 16(%esp), %[[x:[^ ]*]]
|
|
; X86: pushl %[[x]]
|
|
; X86: calll bar
|
|
; X86: movl %[[fp]], %eax
|
|
; X86: pushl %[[x]]
|
|
; X86: calll __llvm_retpoline_eax
|
|
; X86: pushl %[[x]]
|
|
; X86: calll bar
|
|
; X86: movl %[[fp]], %eax
|
|
; X86: pushl %[[x]]
|
|
; X86: calll __llvm_retpoline_eax
|
|
; X86-NOT: # TAILCALL
|
|
|
|
; X86FAST-LABEL: icall_reg:
|
|
; X86FAST: calll bar
|
|
; X86FAST: calll __llvm_retpoline_eax
|
|
; X86FAST: calll bar
|
|
; X86FAST: calll __llvm_retpoline_eax
|
|
|
|
|
|
@global_fp = external global void (i32)*
|
|
|
|
; Test an indirect call through a global variable.
|
|
define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
|
|
%fp1 = load void (i32)*, void (i32)** @global_fp
|
|
call void %fp1(i32 %x)
|
|
%fp2 = load void (i32)*, void (i32)** @global_fp
|
|
tail call void %fp2(i32 %x)
|
|
ret void
|
|
}
|
|
|
|
; X64-LABEL: icall_global_fp:
|
|
; X64-DAG: movl %edi, %[[x:[^ ]*]]
|
|
; X64-DAG: movq global_fp(%rip), %r11
|
|
; X64: callq __llvm_retpoline_r11
|
|
; X64-DAG: movl %[[x]], %edi
|
|
; X64-DAG: movq global_fp(%rip), %r11
|
|
; X64: jmp __llvm_retpoline_r11 # TAILCALL
|
|
|
|
; X64FAST-LABEL: icall_global_fp:
|
|
; X64FAST: movq global_fp(%rip), %r11
|
|
; X64FAST: callq __llvm_retpoline_r11
|
|
; X64FAST: movq global_fp(%rip), %r11
|
|
; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
|
|
|
|
; X86-LABEL: icall_global_fp:
|
|
; X86: movl global_fp, %eax
|
|
; X86: pushl 4(%esp)
|
|
; X86: calll __llvm_retpoline_eax
|
|
; X86: addl $4, %esp
|
|
; X86: movl global_fp, %eax
|
|
; X86: jmp __llvm_retpoline_eax # TAILCALL
|
|
|
|
; X86FAST-LABEL: icall_global_fp:
|
|
; X86FAST: calll __llvm_retpoline_eax
|
|
; X86FAST: jmp __llvm_retpoline_eax # TAILCALL
|
|
|
|
|
|
%struct.Foo = type { void (%struct.Foo*)** }
|
|
|
|
; Test an indirect call through a vtable.
|
|
define void @vcall(%struct.Foo* %obj) #0 {
|
|
%vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
|
|
%vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
|
|
%vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
|
|
%fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
|
|
tail call void %fp(%struct.Foo* %obj)
|
|
tail call void %fp(%struct.Foo* %obj)
|
|
ret void
|
|
}
|
|
|
|
; X64-LABEL: vcall:
|
|
; X64: movq %rdi, %[[obj:[^ ]*]]
|
|
; X64: movq (%rdi), %[[vptr:[^ ]*]]
|
|
; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]
|
|
; X64: movq %[[fp]], %r11
|
|
; X64: callq __llvm_retpoline_r11
|
|
; X64-DAG: movq %[[obj]], %rdi
|
|
; X64-DAG: movq %[[fp]], %r11
|
|
; X64: jmp __llvm_retpoline_r11 # TAILCALL
|
|
|
|
; X64FAST-LABEL: vcall:
|
|
; X64FAST: callq __llvm_retpoline_r11
|
|
; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
|
|
|
|
; X86-LABEL: vcall:
|
|
; X86: movl 8(%esp), %[[obj:[^ ]*]]
|
|
; X86: movl (%[[obj]]), %[[vptr:[^ ]*]]
|
|
; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]]
|
|
; X86: movl %[[fp]], %eax
|
|
; X86: pushl %[[obj]]
|
|
; X86: calll __llvm_retpoline_eax
|
|
; X86: addl $4, %esp
|
|
; X86: movl %[[fp]], %eax
|
|
; X86: jmp __llvm_retpoline_eax # TAILCALL
|
|
|
|
; X86FAST-LABEL: vcall:
|
|
; X86FAST: calll __llvm_retpoline_eax
|
|
; X86FAST: jmp __llvm_retpoline_eax # TAILCALL
|
|
|
|
|
|
declare void @direct_callee()
|
|
|
|
define void @direct_tail() #0 {
|
|
tail call void @direct_callee()
|
|
ret void
|
|
}
|
|
|
|
; X64-LABEL: direct_tail:
|
|
; X64: jmp direct_callee # TAILCALL
|
|
; X64FAST-LABEL: direct_tail:
|
|
; X64FAST: jmp direct_callee # TAILCALL
|
|
; X86-LABEL: direct_tail:
|
|
; X86: jmp direct_callee # TAILCALL
|
|
; X86FAST-LABEL: direct_tail:
|
|
; X86FAST: jmp direct_callee # TAILCALL
|
|
|
|
|
|
declare void @nonlazybind_callee() #1
|
|
|
|
define void @nonlazybind_caller() #0 {
|
|
call void @nonlazybind_callee()
|
|
tail call void @nonlazybind_callee()
|
|
ret void
|
|
}
|
|
|
|
; X64-LABEL: nonlazybind_caller:
|
|
; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
|
|
; X64: movq %[[REG]], %r11
|
|
; X64: callq __llvm_retpoline_r11
|
|
; X64: movq %[[REG]], %r11
|
|
; X64: jmp __llvm_retpoline_r11 # TAILCALL
|
|
; X64FAST-LABEL: nonlazybind_caller:
|
|
; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11
|
|
; X64FAST: callq __llvm_retpoline_r11
|
|
; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11
|
|
; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
|
|
; X86-LABEL: nonlazybind_caller:
|
|
; X86: calll nonlazybind_callee@PLT
|
|
; X86: jmp nonlazybind_callee@PLT # TAILCALL
|
|
; X86FAST-LABEL: nonlazybind_caller:
|
|
; X86FAST: calll nonlazybind_callee@PLT
|
|
; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL
|
|
|
|
|
|
@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0),
|
|
i8* blockaddress(@indirectbr_rewrite, %bb1),
|
|
i8* blockaddress(@indirectbr_rewrite, %bb2),
|
|
i8* blockaddress(@indirectbr_rewrite, %bb3),
|
|
i8* blockaddress(@indirectbr_rewrite, %bb4),
|
|
i8* blockaddress(@indirectbr_rewrite, %bb5),
|
|
i8* blockaddress(@indirectbr_rewrite, %bb6),
|
|
i8* blockaddress(@indirectbr_rewrite, %bb7),
|
|
i8* blockaddress(@indirectbr_rewrite, %bb8),
|
|
i8* blockaddress(@indirectbr_rewrite, %bb9)]
|
|
|
|
; Check that when retpolines are enabled a function with indirectbr gets
|
|
; rewritten to use switch, and that in turn doesn't get lowered as a jump
|
|
; table.
|
|
define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 {
|
|
; X64-LABEL: indirectbr_rewrite:
|
|
; X64-NOT: jmpq
|
|
; X86-LABEL: indirectbr_rewrite:
|
|
; X86-NOT: jmpl
|
|
entry:
|
|
%i0 = load i64, i64* %p
|
|
%target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0
|
|
%target0 = load i8*, i8** %target.i0
|
|
indirectbr i8* %target0, [label %bb1, label %bb3]
|
|
|
|
bb0:
|
|
store volatile i64 0, i64* %sink
|
|
br label %latch
|
|
|
|
bb1:
|
|
store volatile i64 1, i64* %sink
|
|
br label %latch
|
|
|
|
bb2:
|
|
store volatile i64 2, i64* %sink
|
|
br label %latch
|
|
|
|
bb3:
|
|
store volatile i64 3, i64* %sink
|
|
br label %latch
|
|
|
|
bb4:
|
|
store volatile i64 4, i64* %sink
|
|
br label %latch
|
|
|
|
bb5:
|
|
store volatile i64 5, i64* %sink
|
|
br label %latch
|
|
|
|
bb6:
|
|
store volatile i64 6, i64* %sink
|
|
br label %latch
|
|
|
|
bb7:
|
|
store volatile i64 7, i64* %sink
|
|
br label %latch
|
|
|
|
bb8:
|
|
store volatile i64 8, i64* %sink
|
|
br label %latch
|
|
|
|
bb9:
|
|
store volatile i64 9, i64* %sink
|
|
br label %latch
|
|
|
|
latch:
|
|
%i.next = load i64, i64* %p
|
|
%target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next
|
|
%target.next = load i8*, i8** %target.i.next
|
|
; Potentially hit a full 10 successors here so that even if we rewrite as
|
|
; a switch it will try to be lowered with a jump table.
|
|
indirectbr i8* %target.next, [label %bb0,
|
|
label %bb1,
|
|
label %bb2,
|
|
label %bb3,
|
|
label %bb4,
|
|
label %bb5,
|
|
label %bb6,
|
|
label %bb7,
|
|
label %bb8,
|
|
label %bb9]
|
|
}
|
|
|
|
; Lastly check that the necessary thunks were emitted.
|
|
;
|
|
; X64-LABEL: .section .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat
|
|
; X64-NEXT: .hidden __llvm_retpoline_r11
|
|
; X64-NEXT: .weak __llvm_retpoline_r11
|
|
; X64: __llvm_retpoline_r11:
|
|
; X64-NEXT: # {{.*}} # %entry
|
|
; X64-NEXT: callq [[CALL_TARGET:.*]]
|
|
; X64-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
|
|
; X64-NEXT: # %entry
|
|
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X64-NEXT: pause
|
|
; X64-NEXT: lfence
|
|
; X64-NEXT: jmp [[CAPTURE_SPEC]]
|
|
; X64-NEXT: .p2align 4, 0x90
|
|
; X64-NEXT: [[CALL_TARGET]]: # Block address taken
|
|
; X64-NEXT: # %entry
|
|
; X64-NEXT: movq %r11, (%rsp)
|
|
; X64-NEXT: retq
|
|
;
|
|
; X86-LABEL: .section .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat
|
|
; X86-NEXT: .hidden __llvm_retpoline_eax
|
|
; X86-NEXT: .weak __llvm_retpoline_eax
|
|
; X86: __llvm_retpoline_eax:
|
|
; X86-NEXT: # {{.*}} # %entry
|
|
; X86-NEXT: calll [[CALL_TARGET:.*]]
|
|
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
|
|
; X86-NEXT: # %entry
|
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-NEXT: pause
|
|
; X86-NEXT: lfence
|
|
; X86-NEXT: jmp [[CAPTURE_SPEC]]
|
|
; X86-NEXT: .p2align 4, 0x90
|
|
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
|
|
; X86-NEXT: # %entry
|
|
; X86-NEXT: movl %eax, (%esp)
|
|
; X86-NEXT: retl
|
|
;
|
|
; X86-LABEL: .section .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat
|
|
; X86-NEXT: .hidden __llvm_retpoline_ecx
|
|
; X86-NEXT: .weak __llvm_retpoline_ecx
|
|
; X86: __llvm_retpoline_ecx:
|
|
; X86-NEXT: # {{.*}} # %entry
|
|
; X86-NEXT: calll [[CALL_TARGET:.*]]
|
|
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
|
|
; X86-NEXT: # %entry
|
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-NEXT: pause
|
|
; X86-NEXT: lfence
|
|
; X86-NEXT: jmp [[CAPTURE_SPEC]]
|
|
; X86-NEXT: .p2align 4, 0x90
|
|
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
|
|
; X86-NEXT: # %entry
|
|
; X86-NEXT: movl %ecx, (%esp)
|
|
; X86-NEXT: retl
|
|
;
|
|
; X86-LABEL: .section .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat
|
|
; X86-NEXT: .hidden __llvm_retpoline_edx
|
|
; X86-NEXT: .weak __llvm_retpoline_edx
|
|
; X86: __llvm_retpoline_edx:
|
|
; X86-NEXT: # {{.*}} # %entry
|
|
; X86-NEXT: calll [[CALL_TARGET:.*]]
|
|
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
|
|
; X86-NEXT: # %entry
|
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-NEXT: pause
|
|
; X86-NEXT: lfence
|
|
; X86-NEXT: jmp [[CAPTURE_SPEC]]
|
|
; X86-NEXT: .p2align 4, 0x90
|
|
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
|
|
; X86-NEXT: # %entry
|
|
; X86-NEXT: movl %edx, (%esp)
|
|
; X86-NEXT: retl
|
|
;
|
|
; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
|
|
; X86-NEXT: .hidden __llvm_retpoline_push
|
|
; X86-NEXT: .weak __llvm_retpoline_push
|
|
; X86: __llvm_retpoline_push:
|
|
; X86-NEXT: # {{.*}} # %entry
|
|
; X86-NEXT: calll [[CALL_TARGET:.*]]
|
|
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
|
|
; X86-NEXT: # %entry
|
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-NEXT: pause
|
|
; X86-NEXT: lfence
|
|
; X86-NEXT: jmp [[CAPTURE_SPEC]]
|
|
; X86-NEXT: .p2align 4, 0x90
|
|
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
|
|
; X86-NEXT: # %entry
|
|
; X86-NEXT: addl $4, %esp
|
|
; X86-NEXT: pushl 4(%esp)
|
|
; X86-NEXT: pushl 4(%esp)
|
|
; X86-NEXT: popl 8(%esp)
|
|
; X86-NEXT: popl (%esp)
|
|
; X86-NEXT: retl
|
|
|
|
|
|
attributes #0 = { "target-features"="+retpoline" }
|
|
attributes #1 = { nonlazybind }
|