mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
ad84da6d6d
See https://reviews.llvm.org/D74651 for the preallocated IR constructs and LangRef changes. In X86TargetLowering::LowerCall(), if a call is preallocated, record each argument's offset from the stack pointer and the total stack adjustment. Associate the call Value with an integer index. Store the info in X86MachineFunctionInfo with the integer index as the key. This adds two new target independent ISDOpcodes and two new target dependent Opcodes corresponding to @llvm.call.preallocated.{setup,arg}. The setup ISelDAG node takes in a chain and outputs a chain and a SrcValue of the preallocated call Value. It is lowered to a target dependent node with the SrcValue replaced with the integer index key by looking in X86MachineFunctionInfo. In X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to an %esp adjustment, the exact amount determined by looking in X86MachineFunctionInfo with the integer index key. The arg ISelDAG node takes in a chain, a SrcValue of the preallocated call Value, and the arg index int constant. It produces a chain and the pointer fo the arg. It is lowered to a target dependent node with the SrcValue replaced with the integer index key by looking in X86MachineFunctionInfo. In X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to a lea of the stack pointer plus an offset determined by looking in X86MachineFunctionInfo with the integer index key. Force any function containing a preallocated call to use the frame pointer. Does not yet handle a setup without a call, or a conditional call. Does not yet handle musttail. That requires a LangRef change first. Tried to look at all references to inalloca and see if they apply to preallocated. I've made preallocated versions of tests testing inalloca whenever possible and when they make sense (e.g. not alloca related, inalloca edge cases). Aside from the tests added here, I checked that this codegen produces correct code for something like ``` struct A { A(); A(A&&); ~A(); }; void bar() { foo(foo(foo(foo(foo(A(), 4), 5), 6), 7), 8); } ``` by replacing the inalloca version of the .ll file with the appropriate preallocated code. Running the executable produces the same results as using the current inalloca implementation. Reverted due to unexpectedly passing tests, added REQUIRES: asserts for reland. Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D77689
304 lines
7.0 KiB
LLVM
304 lines
7.0 KiB
LLVM
; RUN: llc -mtriple=i686-windows < %s | FileCheck %s
|
|
|
|
declare void @addrof_i1(i1*)
|
|
declare void @addrof_i32(i32*)
|
|
declare void @addrof_i64(i64*)
|
|
declare void @addrof_i128(i128*)
|
|
declare void @addrof_i32_x3(i32*, i32*, i32*)
|
|
|
|
define void @simple(i32 %x) {
|
|
entry:
|
|
%x.addr = alloca i32
|
|
store i32 %x, i32* %x.addr
|
|
call void @addrof_i32(i32* %x.addr)
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: _simple:
|
|
; CHECK: leal 4(%esp), %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i32
|
|
; CHECK: retl
|
|
|
|
|
|
; We need to load %x before calling addrof_i32 now because it could mutate %x in
|
|
; place.
|
|
|
|
define i32 @use_arg(i32 %x) {
|
|
entry:
|
|
%x.addr = alloca i32
|
|
store i32 %x, i32* %x.addr
|
|
call void @addrof_i32(i32* %x.addr)
|
|
ret i32 %x
|
|
}
|
|
|
|
; CHECK-LABEL: _use_arg:
|
|
; CHECK: pushl %[[csr:[^ ]*]]
|
|
; CHECK-DAG: movl 8(%esp), %[[csr]]
|
|
; CHECK-DAG: leal 8(%esp), %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i32
|
|
; CHECK: movl %[[csr]], %eax
|
|
; CHECK: popl %[[csr]]
|
|
; CHECK: retl
|
|
|
|
; We won't copy elide for types needing legalization such as i64 or i1.
|
|
|
|
define i64 @split_i64(i64 %x) {
|
|
entry:
|
|
%x.addr = alloca i64, align 4
|
|
store i64 %x, i64* %x.addr, align 4
|
|
call void @addrof_i64(i64* %x.addr)
|
|
ret i64 %x
|
|
}
|
|
|
|
; CHECK-LABEL: _split_i64:
|
|
; CHECK: pushl %[[csr2:[^ ]*]]
|
|
; CHECK: pushl %[[csr1:[^ ]*]]
|
|
; CHECK-DAG: movl 12(%esp), %[[csr1]]
|
|
; CHECK-DAG: movl 16(%esp), %[[csr2]]
|
|
; CHECK-DAG: leal 12(%esp), %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i64
|
|
; CHECK: addl $4, %esp
|
|
; CHECK-DAG: movl %[[csr1]], %eax
|
|
; CHECK-DAG: movl %[[csr2]], %edx
|
|
; CHECK: popl %[[csr1]]
|
|
; CHECK: popl %[[csr2]]
|
|
; CHECK: retl
|
|
|
|
define i1 @i1_arg(i1 %x) {
|
|
%x.addr = alloca i1
|
|
store i1 %x, i1* %x.addr
|
|
call void @addrof_i1(i1* %x.addr)
|
|
ret i1 %x
|
|
}
|
|
|
|
; CHECK-LABEL: _i1_arg:
|
|
; CHECK: pushl %ebx
|
|
; CHECK: movb 8(%esp), %bl
|
|
; CHECK: leal 8(%esp), %eax
|
|
; CHECK: pushl %eax
|
|
; CHECK: calll _addrof_i1
|
|
; CHECK: addl $4, %esp
|
|
; CHECK: movl %ebx, %eax
|
|
; CHECK: popl %ebx
|
|
; CHECK: retl
|
|
|
|
; We can't copy elide when an i64 is split between registers and memory in a
|
|
; fastcc function.
|
|
|
|
define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) {
|
|
entry:
|
|
%x.addr = alloca i64, align 4
|
|
store i64 %x, i64* %x.addr, align 4
|
|
call void @addrof_i64(i64* %x.addr)
|
|
ret i64 %x
|
|
}
|
|
|
|
; CHECK-LABEL: _fastcc_split_i64:
|
|
; CHECK-DAG: movl %edx, %[[r1:[^ ]*]]
|
|
; CHECK-DAG: movl 20(%esp), %[[r2:[^ ]*]]
|
|
; CHECK-DAG: movl %[[r2]], 4(%esp)
|
|
; CHECK-DAG: movl %edx, (%esp)
|
|
; CHECK: movl %esp, %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i64
|
|
; CHECK: retl
|
|
|
|
|
|
; We can't copy elide when it would reduce the user requested alignment.
|
|
|
|
define void @high_alignment(i32 %x) {
|
|
entry:
|
|
%x.p = alloca i32, align 128
|
|
store i32 %x, i32* %x.p
|
|
call void @addrof_i32(i32* %x.p)
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: _high_alignment:
|
|
; CHECK: andl $-128, %esp
|
|
; CHECK: movl 8(%ebp), %[[reg:[^ ]*]]
|
|
; CHECK: movl %[[reg]], (%esp)
|
|
; CHECK: movl %esp, %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i32
|
|
; CHECK: retl
|
|
|
|
|
|
; We can't copy elide when it would reduce the ABI required alignment.
|
|
; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC
|
|
; doesn't guarantee it.
|
|
|
|
define void @abi_alignment(i64 %x) {
|
|
entry:
|
|
%x.p = alloca i64
|
|
store i64 %x, i64* %x.p
|
|
call void @addrof_i64(i64* %x.p)
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: _abi_alignment:
|
|
; CHECK: andl $-8, %esp
|
|
; CHECK: movl 8(%ebp), %[[reg:[^ ]*]]
|
|
; CHECK: movl %[[reg]], (%esp)
|
|
; CHECK: movl %esp, %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i64
|
|
; CHECK: retl
|
|
|
|
|
|
; The code we generate for this is unimportant. This is mostly a crash test.
|
|
|
|
define void @split_i128(i128* %sret, i128 %x) {
|
|
entry:
|
|
%x.addr = alloca i128
|
|
store i128 %x, i128* %x.addr
|
|
call void @addrof_i128(i128* %x.addr)
|
|
store i128 %x, i128* %sret
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: _split_i128:
|
|
; CHECK: pushl %ebp
|
|
; CHECK: calll _addrof_i128
|
|
; CHECK: retl
|
|
|
|
|
|
; Check that we load all of x, y, and z before the call.
|
|
|
|
define i32 @three_args(i32 %x, i32 %y, i32 %z) {
|
|
entry:
|
|
%z.addr = alloca i32, align 4
|
|
%y.addr = alloca i32, align 4
|
|
%x.addr = alloca i32, align 4
|
|
store i32 %z, i32* %z.addr, align 4
|
|
store i32 %y, i32* %y.addr, align 4
|
|
store i32 %x, i32* %x.addr, align 4
|
|
call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr)
|
|
%s1 = add i32 %x, %y
|
|
%sum = add i32 %s1, %z
|
|
ret i32 %sum
|
|
}
|
|
|
|
; CHECK-LABEL: _three_args:
|
|
; CHECK: pushl %[[csr:[^ ]*]]
|
|
; CHECK-DAG: movl {{[0-9]+}}(%esp), %[[csr]]
|
|
; CHECK-DAG: addl {{[0-9]+}}(%esp), %[[csr]]
|
|
; CHECK-DAG: addl {{[0-9]+}}(%esp), %[[csr]]
|
|
; CHECK-DAG: leal 8(%esp), %[[x:[^ ]*]]
|
|
; CHECK-DAG: leal 12(%esp), %[[y:[^ ]*]]
|
|
; CHECK-DAG: leal 16(%esp), %[[z:[^ ]*]]
|
|
; CHECK: pushl %[[z]]
|
|
; CHECK: pushl %[[y]]
|
|
; CHECK: pushl %[[x]]
|
|
; CHECK: calll _addrof_i32_x3
|
|
; CHECK: movl %[[csr]], %eax
|
|
; CHECK: popl %[[csr]]
|
|
; CHECK: retl
|
|
|
|
|
|
define void @two_args_same_alloca(i32 %x, i32 %y) {
|
|
entry:
|
|
%x.addr = alloca i32
|
|
store i32 %x, i32* %x.addr
|
|
store i32 %y, i32* %x.addr
|
|
call void @addrof_i32(i32* %x.addr)
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: _two_args_same_alloca:
|
|
; CHECK: movl 8(%esp), {{.*}}
|
|
; CHECK: movl {{.*}}, 4(%esp)
|
|
; CHECK: leal 4(%esp), %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i32
|
|
; CHECK: retl
|
|
|
|
|
|
define void @avoid_byval(i32* byval %x) {
|
|
entry:
|
|
%x.p.p = alloca i32*
|
|
store i32* %x, i32** %x.p.p
|
|
call void @addrof_i32(i32* %x)
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: _avoid_byval:
|
|
; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i32
|
|
; CHECK: retl
|
|
|
|
|
|
define void @avoid_inalloca(i32* inalloca %x) {
|
|
entry:
|
|
%x.p.p = alloca i32*
|
|
store i32* %x, i32** %x.p.p
|
|
call void @addrof_i32(i32* %x)
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: _avoid_inalloca:
|
|
; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i32
|
|
; CHECK: retl
|
|
|
|
define void @avoid_preallocated(i32* preallocated(i32) %x) {
|
|
entry:
|
|
%x.p.p = alloca i32*
|
|
store i32* %x, i32** %x.p.p
|
|
call void @addrof_i32(i32* %x)
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: _avoid_preallocated:
|
|
; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i32
|
|
; CHECK: retl
|
|
|
|
; Don't elide the copy when the alloca is escaped with a store.
|
|
define void @escape_with_store(i32 %x) {
|
|
%x1 = alloca i32
|
|
%x2 = alloca i32*
|
|
store i32* %x1, i32** %x2
|
|
%x3 = load i32*, i32** %x2
|
|
store i32 0, i32* %x3
|
|
store i32 %x, i32* %x1
|
|
call void @addrof_i32(i32* %x1)
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: _escape_with_store:
|
|
; CHECK: movl {{.*}}(%esp), %[[reg:[^ ]*]]
|
|
; CHECK: movl %[[reg]], [[offs:[0-9]*]](%esp)
|
|
; CHECK: calll _addrof_i32
|
|
|
|
|
|
; This test case exposed issues with the use of TokenFactor.
|
|
|
|
define void @sret_and_elide(i32* sret %sret, i32 %v) {
|
|
%v.p = alloca i32
|
|
store i32 %v, i32* %v.p
|
|
call void @addrof_i32(i32* %v.p)
|
|
store i32 %v, i32* %sret
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: _sret_and_elide:
|
|
; CHECK: pushl
|
|
; CHECK: pushl
|
|
; CHECK: movl 12(%esp), %[[sret:[^ ]*]]
|
|
; CHECK: movl 16(%esp), %[[v:[^ ]*]]
|
|
; CHECK: leal 16(%esp), %[[reg:[^ ]*]]
|
|
; CHECK: pushl %[[reg]]
|
|
; CHECK: calll _addrof_i32
|
|
; CHECK: movl %[[v]], (%[[sret]])
|
|
; CHECK: movl %[[sret]], %eax
|
|
; CHECK: popl
|
|
; CHECK: popl
|
|
; CHECK: retl
|