mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
ca1236f15d
D29668 enabled to avoid a useless copy of the argument value into an alloca if the caller places it in memory (as it often happens on x86) by directly forwarding the pointer to it. This optimization is illegal if the type contains padding bytes: if a truncating store into the alloca is replaced the upper bits are filled with garbage and produce code misbehaving at runtime. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D102153
423 lines
12 KiB
LLVM
423 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=i686-windows < %s | FileCheck %s
|
|
|
|
declare void @addrof_i1(i1*)
|
|
declare void @addrof_i32(i32*)
|
|
declare void @addrof_i64(i64*)
|
|
declare void @addrof_i128(i128*)
|
|
declare void @addrof_i32_x3(i32*, i32*, i32*)
|
|
|
|
define void @simple(i32 %x) {
|
|
; CHECK-LABEL: simple:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i32
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.addr = alloca i32
|
|
store i32 %x, i32* %x.addr
|
|
call void @addrof_i32(i32* %x.addr)
|
|
ret void
|
|
}
|
|
|
|
; We need to load %x before calling addrof_i32 now because it could mutate %x in
|
|
; place.
|
|
|
|
define i32 @use_arg(i32 %x) {
|
|
; CHECK-LABEL: use_arg:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %esi
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i32
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
; CHECK-NEXT: popl %esi
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.addr = alloca i32
|
|
store i32 %x, i32* %x.addr
|
|
call void @addrof_i32(i32* %x.addr)
|
|
ret i32 %x
|
|
}
|
|
|
|
; We won't copy elide for types needing legalization such as i64 or i1.
|
|
|
|
define i64 @split_i64(i64 %x) {
|
|
; CHECK-LABEL: split_i64:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %edi
|
|
; CHECK-NEXT: pushl %esi
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i64
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
; CHECK-NEXT: movl %edi, %edx
|
|
; CHECK-NEXT: popl %esi
|
|
; CHECK-NEXT: popl %edi
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.addr = alloca i64, align 4
|
|
store i64 %x, i64* %x.addr, align 4
|
|
call void @addrof_i64(i64* %x.addr)
|
|
ret i64 %x
|
|
}
|
|
|
|
define i1 @i1_arg(i1 %x) {
|
|
; CHECK-LABEL: i1_arg:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushl %ebx
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl
|
|
; CHECK-NEXT: movl %ebx, %eax
|
|
; CHECK-NEXT: andb $1, %al
|
|
; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i1
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: movl %ebx, %eax
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: popl %ebx
|
|
; CHECK-NEXT: retl
|
|
%x.addr = alloca i1
|
|
store i1 %x, i1* %x.addr
|
|
call void @addrof_i1(i1* %x.addr)
|
|
ret i1 %x
|
|
}
|
|
|
|
; We can't copy elide when an i64 is split between registers and memory in a
|
|
; fastcc function.
|
|
|
|
define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) {
|
|
; CHECK-LABEL: fastcc_split_i64:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %edi
|
|
; CHECK-NEXT: pushl %esi
|
|
; CHECK-NEXT: subl $8, %esp
|
|
; CHECK-NEXT: movl %edx, %esi
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: movl %edx, (%esp)
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i64
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
; CHECK-NEXT: movl %edi, %edx
|
|
; CHECK-NEXT: addl $8, %esp
|
|
; CHECK-NEXT: popl %esi
|
|
; CHECK-NEXT: popl %edi
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.addr = alloca i64, align 4
|
|
store i64 %x, i64* %x.addr, align 4
|
|
call void @addrof_i64(i64* %x.addr)
|
|
ret i64 %x
|
|
}
|
|
|
|
; We can't copy elide when it would reduce the user requested alignment.
|
|
|
|
define void @high_alignment(i32 %x) {
|
|
; CHECK-LABEL: high_alignment:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %ebp
|
|
; CHECK-NEXT: movl %esp, %ebp
|
|
; CHECK-NEXT: andl $-128, %esp
|
|
; CHECK-NEXT: subl $128, %esp
|
|
; CHECK-NEXT: movl 8(%ebp), %eax
|
|
; CHECK-NEXT: movl %eax, (%esp)
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i32
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: movl %ebp, %esp
|
|
; CHECK-NEXT: popl %ebp
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.p = alloca i32, align 128
|
|
store i32 %x, i32* %x.p
|
|
call void @addrof_i32(i32* %x.p)
|
|
ret void
|
|
}
|
|
|
|
; We can't copy elide when it would reduce the ABI required alignment.
|
|
; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC
|
|
; doesn't guarantee it.
|
|
|
|
define void @abi_alignment(i64 %x) {
|
|
; CHECK-LABEL: abi_alignment:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %ebp
|
|
; CHECK-NEXT: movl %esp, %ebp
|
|
; CHECK-NEXT: andl $-8, %esp
|
|
; CHECK-NEXT: subl $8, %esp
|
|
; CHECK-NEXT: movl 8(%ebp), %eax
|
|
; CHECK-NEXT: movl 12(%ebp), %ecx
|
|
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: movl %eax, (%esp)
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i64
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: movl %ebp, %esp
|
|
; CHECK-NEXT: popl %ebp
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.p = alloca i64
|
|
store i64 %x, i64* %x.p
|
|
call void @addrof_i64(i64* %x.p)
|
|
ret void
|
|
}
|
|
|
|
; The code we generate for this is unimportant. This is mostly a crash test.
|
|
|
|
define void @split_i128(i128* %sret, i128 %x) {
|
|
; CHECK-LABEL: split_i128:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %ebp
|
|
; CHECK-NEXT: movl %esp, %ebp
|
|
; CHECK-NEXT: pushl %ebx
|
|
; CHECK-NEXT: pushl %edi
|
|
; CHECK-NEXT: pushl %esi
|
|
; CHECK-NEXT: andl $-8, %esp
|
|
; CHECK-NEXT: subl $32, %esp
|
|
; CHECK-NEXT: movl 12(%ebp), %eax
|
|
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
; CHECK-NEXT: movl 16(%ebp), %ebx
|
|
; CHECK-NEXT: movl 20(%ebp), %esi
|
|
; CHECK-NEXT: movl 24(%ebp), %edi
|
|
; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i128
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: movl 8(%ebp), %eax
|
|
; CHECK-NEXT: movl %edi, 12(%eax)
|
|
; CHECK-NEXT: movl %esi, 8(%eax)
|
|
; CHECK-NEXT: movl %ebx, 4(%eax)
|
|
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
|
; CHECK-NEXT: movl %ecx, (%eax)
|
|
; CHECK-NEXT: leal -12(%ebp), %esp
|
|
; CHECK-NEXT: popl %esi
|
|
; CHECK-NEXT: popl %edi
|
|
; CHECK-NEXT: popl %ebx
|
|
; CHECK-NEXT: popl %ebp
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.addr = alloca i128
|
|
store i128 %x, i128* %x.addr
|
|
call void @addrof_i128(i128* %x.addr)
|
|
store i128 %x, i128* %sret
|
|
ret void
|
|
}
|
|
|
|
; Check that we load all of x, y, and z before the call.
|
|
|
|
define i32 @three_args(i32 %x, i32 %y, i32 %z) {
|
|
; CHECK-LABEL: three_args:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %esi
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi
|
|
; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %ecx
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: pushl %ecx
|
|
; CHECK-NEXT: pushl %edx
|
|
; CHECK-NEXT: calll _addrof_i32_x3
|
|
; CHECK-NEXT: addl $12, %esp
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
; CHECK-NEXT: popl %esi
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%z.addr = alloca i32, align 4
|
|
%y.addr = alloca i32, align 4
|
|
%x.addr = alloca i32, align 4
|
|
store i32 %z, i32* %z.addr, align 4
|
|
store i32 %y, i32* %y.addr, align 4
|
|
store i32 %x, i32* %x.addr, align 4
|
|
call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr)
|
|
%s1 = add i32 %x, %y
|
|
%sum = add i32 %s1, %z
|
|
ret i32 %sum
|
|
}
|
|
|
|
define void @two_args_same_alloca(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: two_args_same_alloca:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i32
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.addr = alloca i32
|
|
store i32 %x, i32* %x.addr
|
|
store i32 %y, i32* %x.addr
|
|
call void @addrof_i32(i32* %x.addr)
|
|
ret void
|
|
}
|
|
|
|
define void @avoid_byval(i32* byval(i32) %x) {
|
|
; CHECK-LABEL: avoid_byval:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: movl %eax, (%esp)
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i32
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: popl %eax
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.p.p = alloca i32*
|
|
store i32* %x, i32** %x.p.p
|
|
call void @addrof_i32(i32* %x)
|
|
ret void
|
|
}
|
|
|
|
define void @avoid_inalloca(i32* inalloca(i32) %x) {
|
|
; CHECK-LABEL: avoid_inalloca:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: movl %eax, (%esp)
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i32
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: popl %eax
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.p.p = alloca i32*
|
|
store i32* %x, i32** %x.p.p
|
|
call void @addrof_i32(i32* %x)
|
|
ret void
|
|
}
|
|
|
|
define void @avoid_preallocated(i32* preallocated(i32) %x) {
|
|
; CHECK-LABEL: avoid_preallocated:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: movl %eax, (%esp)
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i32
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: popl %eax
|
|
; CHECK-NEXT: retl
|
|
entry:
|
|
%x.p.p = alloca i32*
|
|
store i32* %x, i32** %x.p.p
|
|
call void @addrof_i32(i32* %x)
|
|
ret void
|
|
}
|
|
|
|
; Don't elide the copy when the alloca is escaped with a store.
|
|
define void @escape_with_store(i32 %x) {
|
|
; CHECK-LABEL: escape_with_store:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: subl $8, %esp
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: movl %esp, %ecx
|
|
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: movl %eax, (%esp)
|
|
; CHECK-NEXT: pushl %ecx
|
|
; CHECK-NEXT: calll _addrof_i32
|
|
; CHECK-NEXT: addl $12, %esp
|
|
; CHECK-NEXT: retl
|
|
%x1 = alloca i32
|
|
%x2 = alloca i32*
|
|
store i32* %x1, i32** %x2
|
|
%x3 = load i32*, i32** %x2
|
|
store i32 0, i32* %x3
|
|
store i32 %x, i32* %x1
|
|
call void @addrof_i32(i32* %x1)
|
|
ret void
|
|
}
|
|
|
|
; This test case exposed issues with the use of TokenFactor.
|
|
|
|
define void @sret_and_elide(i32* sret(i32) %sret, i32 %v) {
|
|
; CHECK-LABEL: sret_and_elide:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushl %edi
|
|
; CHECK-NEXT: pushl %esi
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i32
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: movl %edi, (%esi)
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
; CHECK-NEXT: popl %esi
|
|
; CHECK-NEXT: popl %edi
|
|
; CHECK-NEXT: retl
|
|
%v.p = alloca i32
|
|
store i32 %v, i32* %v.p
|
|
call void @addrof_i32(i32* %v.p)
|
|
store i32 %v, i32* %sret
|
|
ret void
|
|
}
|
|
|
|
define void @avoid_partially_initialized_alloca(i32 %x) {
|
|
; CHECK-LABEL: avoid_partially_initialized_alloca:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushl %ebp
|
|
; CHECK-NEXT: movl %esp, %ebp
|
|
; CHECK-NEXT: andl $-8, %esp
|
|
; CHECK-NEXT: subl $8, %esp
|
|
; CHECK-NEXT: movl 8(%ebp), %eax
|
|
; CHECK-NEXT: movl %eax, (%esp)
|
|
; CHECK-NEXT: movl %esp, %eax
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: calll _addrof_i32
|
|
; CHECK-NEXT: addl $4, %esp
|
|
; CHECK-NEXT: movl %ebp, %esp
|
|
; CHECK-NEXT: popl %ebp
|
|
; CHECK-NEXT: retl
|
|
%a = alloca i64
|
|
%p = bitcast i64* %a to i32*
|
|
store i32 %x, i32* %p
|
|
call void @addrof_i32(i32* %p)
|
|
ret void
|
|
}
|
|
|
|
; Ensure no copy elision happens as the two i3 values fed into icmp may have
|
|
; garbage in the upper bits, a truncation is needed.
|
|
|
|
define i1 @use_i3(i3 %a1, i3 %a2) {
|
|
; CHECK-LABEL: use_i3:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pushl %eax
|
|
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
|
|
; CHECK-NEXT: andb $7, %al
|
|
; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
|
|
; CHECK-NEXT: andb $7, %cl
|
|
; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp)
|
|
; CHECK-NEXT: cmpb %cl, %al
|
|
; CHECK-NEXT: sete %al
|
|
; CHECK-NEXT: popl %ecx
|
|
; CHECK-NEXT: retl
|
|
%tmp = alloca i3
|
|
store i3 %a2, i3* %tmp
|
|
%val = load i3, i3* %tmp
|
|
%res = icmp eq i3 %a1, %val
|
|
ret i1 %res
|
|
}
|
|
|