1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-23 04:52:54 +02:00
llvm-mirror/test/CodeGen/X86/cmpxchg-clobber-flags.ll
Chandler Carruth 29abad3787 [x86] Switch EFLAGS copy lowering to use reg-reg form of testing for
a zero register.

Previously I tried this and saw LLVM unable to transform this to fold
with memory operands such as spill slot rematerialization. However, it
clearly works as shown in this patch. We turn these into `cmpb $0,
<mem>` when useful for folding a memory operand without issue. This form
has no disadvantage compared to `testb $-1, <mem>`. So overall, this is
likely no worse and may be slightly smaller in some cases due to the
`testb %reg, %reg` form.

Differential Revision: https://reviews.llvm.org/D45475

llvm-svn: 330269
2018-04-18 15:52:50 +00:00

280 lines
10 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF
; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
declare i32 @foo()
declare i32 @bar(i64)
; In the following case when using fast scheduling we get a long chain of
; EFLAGS save/restore due to a sequence of:
; cmpxchg8b (implicit-def eflags)
; eax = copy eflags
; adjcallstackdown32
; ...
; use of eax
; During PEI the adjcallstackdown32 is replaced with the subl which
; clobbers eflags, effectively interfering in the liveness interval. However,
; we then promote these copies into independent conditions in GPRs that avoids
; repeated saving and restoring logic and can be trivially managed by the
; register allocator.
define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
; 32-GOOD-RA-LABEL: test_intervening_call:
; 32-GOOD-RA: # %bb.0: # %entry
; 32-GOOD-RA-NEXT: pushl %ebx
; 32-GOOD-RA-NEXT: pushl %esi
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
; 32-GOOD-RA-NEXT: setne %bl
; 32-GOOD-RA-NEXT: subl $8, %esp
; 32-GOOD-RA-NEXT: pushl %edx
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: calll bar
; 32-GOOD-RA-NEXT: addl $16, %esp
; 32-GOOD-RA-NEXT: testb %bl, %bl
; 32-GOOD-RA-NEXT: jne .LBB0_3
; 32-GOOD-RA-NEXT: # %bb.1: # %t
; 32-GOOD-RA-NEXT: movl $42, %eax
; 32-GOOD-RA-NEXT: jmp .LBB0_2
; 32-GOOD-RA-NEXT: .LBB0_3: # %f
; 32-GOOD-RA-NEXT: xorl %eax, %eax
; 32-GOOD-RA-NEXT: .LBB0_2: # %t
; 32-GOOD-RA-NEXT: xorl %edx, %edx
; 32-GOOD-RA-NEXT: addl $4, %esp
; 32-GOOD-RA-NEXT: popl %esi
; 32-GOOD-RA-NEXT: popl %ebx
; 32-GOOD-RA-NEXT: retl
;
; 32-FAST-RA-LABEL: test_intervening_call:
; 32-FAST-RA: # %bb.0: # %entry
; 32-FAST-RA-NEXT: pushl %ebx
; 32-FAST-RA-NEXT: pushl %esi
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi)
; 32-FAST-RA-NEXT: setne %bl
; 32-FAST-RA-NEXT: subl $8, %esp
; 32-FAST-RA-NEXT: pushl %edx
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: calll bar
; 32-FAST-RA-NEXT: addl $16, %esp
; 32-FAST-RA-NEXT: testb %bl, %bl
; 32-FAST-RA-NEXT: jne .LBB0_3
; 32-FAST-RA-NEXT: # %bb.1: # %t
; 32-FAST-RA-NEXT: movl $42, %eax
; 32-FAST-RA-NEXT: jmp .LBB0_2
; 32-FAST-RA-NEXT: .LBB0_3: # %f
; 32-FAST-RA-NEXT: xorl %eax, %eax
; 32-FAST-RA-NEXT: .LBB0_2: # %t
; 32-FAST-RA-NEXT: xorl %edx, %edx
; 32-FAST-RA-NEXT: addl $4, %esp
; 32-FAST-RA-NEXT: popl %esi
; 32-FAST-RA-NEXT: popl %ebx
; 32-FAST-RA-NEXT: retl
;
; 64-ALL-LABEL: test_intervening_call:
; 64-ALL: # %bb.0: # %entry
; 64-ALL-NEXT: pushq %rbx
; 64-ALL-NEXT: movq %rsi, %rax
; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi)
; 64-ALL-NEXT: setne %bl
; 64-ALL-NEXT: movq %rax, %rdi
; 64-ALL-NEXT: callq bar
; 64-ALL-NEXT: testb %bl, %bl
; 64-ALL-NEXT: jne .LBB0_2
; 64-ALL-NEXT: # %bb.1: # %t
; 64-ALL-NEXT: movl $42, %eax
; 64-ALL-NEXT: popq %rbx
; 64-ALL-NEXT: retq
; 64-ALL-NEXT: .LBB0_2: # %f
; 64-ALL-NEXT: xorl %eax, %eax
; 64-ALL-NEXT: popq %rbx
; 64-ALL-NEXT: retq
entry:
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
%v = extractvalue { i64, i1 } %cx, 0
%p = extractvalue { i64, i1 } %cx, 1
call i32 @bar(i64 %v)
br i1 %p, label %t, label %f
t:
ret i64 42
f:
ret i64 0
}
; Interesting in producing a clobber without any function calls.
define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind {
; 32-ALL-LABEL: test_control_flow:
; 32-ALL: # %bb.0: # %entry
; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; 32-ALL-NEXT: jle .LBB1_6
; 32-ALL-NEXT: # %bb.1: # %loop_start
; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-ALL-NEXT: .p2align 4, 0x90
; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i
; 32-ALL-NEXT: # =>This Loop Header: Depth=1
; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2
; 32-ALL-NEXT: movl (%ecx), %edx
; 32-ALL-NEXT: .p2align 4, 0x90
; 32-ALL-NEXT: .LBB1_3: # %while.cond.i
; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1
; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2
; 32-ALL-NEXT: movl %edx, %eax
; 32-ALL-NEXT: xorl %edx, %edx
; 32-ALL-NEXT: testl %eax, %eax
; 32-ALL-NEXT: je .LBB1_3
; 32-ALL-NEXT: # %bb.4: # %while.body.i
; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1
; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx)
; 32-ALL-NEXT: jne .LBB1_2
; 32-ALL-NEXT: # %bb.5:
; 32-ALL-NEXT: xorl %eax, %eax
; 32-ALL-NEXT: .LBB1_6: # %cond.end
; 32-ALL-NEXT: retl
;
; 64-ALL-LABEL: test_control_flow:
; 64-ALL: # %bb.0: # %entry
; 64-ALL-NEXT: cmpl %edx, %esi
; 64-ALL-NEXT: jle .LBB1_5
; 64-ALL-NEXT: .p2align 4, 0x90
; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i
; 64-ALL-NEXT: # =>This Loop Header: Depth=1
; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2
; 64-ALL-NEXT: movl (%rdi), %ecx
; 64-ALL-NEXT: .p2align 4, 0x90
; 64-ALL-NEXT: .LBB1_2: # %while.cond.i
; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1
; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2
; 64-ALL-NEXT: movl %ecx, %eax
; 64-ALL-NEXT: xorl %ecx, %ecx
; 64-ALL-NEXT: testl %eax, %eax
; 64-ALL-NEXT: je .LBB1_2
; 64-ALL-NEXT: # %bb.3: # %while.body.i
; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1
; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi)
; 64-ALL-NEXT: jne .LBB1_1
; 64-ALL-NEXT: # %bb.4:
; 64-ALL-NEXT: xorl %esi, %esi
; 64-ALL-NEXT: .LBB1_5: # %cond.end
; 64-ALL-NEXT: movl %esi, %eax
; 64-ALL-NEXT: retq
entry:
%cmp = icmp sgt i32 %i, %j
br i1 %cmp, label %loop_start, label %cond.end
loop_start:
br label %while.condthread-pre-split.i
while.condthread-pre-split.i:
%.pr.i = load i32, i32* %p, align 4
br label %while.cond.i
while.cond.i:
%0 = phi i32 [ %.pr.i, %while.condthread-pre-split.i ], [ 0, %while.cond.i ]
%tobool.i = icmp eq i32 %0, 0
br i1 %tobool.i, label %while.cond.i, label %while.body.i
while.body.i:
%.lcssa = phi i32 [ %0, %while.cond.i ]
%1 = cmpxchg i32* %p, i32 %.lcssa, i32 %.lcssa seq_cst seq_cst
%2 = extractvalue { i32, i1 } %1, 1
br i1 %2, label %cond.end.loopexit, label %while.condthread-pre-split.i
cond.end.loopexit:
br label %cond.end
cond.end:
%cond = phi i32 [ %i, %entry ], [ 0, %cond.end.loopexit ]
ret i32 %cond
}
; This one is an interesting case because CMOV doesn't have a chain
; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind {
; 32-GOOD-RA-LABEL: test_feed_cmov:
; 32-GOOD-RA: # %bb.0: # %entry
; 32-GOOD-RA-NEXT: pushl %ebx
; 32-GOOD-RA-NEXT: pushl %esi
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx)
; 32-GOOD-RA-NEXT: sete %bl
; 32-GOOD-RA-NEXT: calll foo
; 32-GOOD-RA-NEXT: testb %bl, %bl
; 32-GOOD-RA-NEXT: jne .LBB2_2
; 32-GOOD-RA-NEXT: # %bb.1: # %entry
; 32-GOOD-RA-NEXT: movl %eax, %esi
; 32-GOOD-RA-NEXT: .LBB2_2: # %entry
; 32-GOOD-RA-NEXT: movl %esi, %eax
; 32-GOOD-RA-NEXT: addl $4, %esp
; 32-GOOD-RA-NEXT: popl %esi
; 32-GOOD-RA-NEXT: popl %ebx
; 32-GOOD-RA-NEXT: retl
;
; 32-FAST-RA-LABEL: test_feed_cmov:
; 32-FAST-RA: # %bb.0: # %entry
; 32-FAST-RA-NEXT: pushl %ebx
; 32-FAST-RA-NEXT: pushl %esi
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx)
; 32-FAST-RA-NEXT: sete %bl
; 32-FAST-RA-NEXT: calll foo
; 32-FAST-RA-NEXT: testb %bl, %bl
; 32-FAST-RA-NEXT: jne .LBB2_2
; 32-FAST-RA-NEXT: # %bb.1: # %entry
; 32-FAST-RA-NEXT: movl %eax, %esi
; 32-FAST-RA-NEXT: .LBB2_2: # %entry
; 32-FAST-RA-NEXT: movl %esi, %eax
; 32-FAST-RA-NEXT: addl $4, %esp
; 32-FAST-RA-NEXT: popl %esi
; 32-FAST-RA-NEXT: popl %ebx
; 32-FAST-RA-NEXT: retl
;
; 64-ALL-LABEL: test_feed_cmov:
; 64-ALL: # %bb.0: # %entry
; 64-ALL-NEXT: pushq %rbp
; 64-ALL-NEXT: pushq %rbx
; 64-ALL-NEXT: pushq %rax
; 64-ALL-NEXT: movl %edx, %ebx
; 64-ALL-NEXT: movl %esi, %eax
; 64-ALL-NEXT: lock cmpxchgl %edx, (%rdi)
; 64-ALL-NEXT: sete %bpl
; 64-ALL-NEXT: callq foo
; 64-ALL-NEXT: testb %bpl, %bpl
; 64-ALL-NEXT: cmovnel %ebx, %eax
; 64-ALL-NEXT: addq $8, %rsp
; 64-ALL-NEXT: popq %rbx
; 64-ALL-NEXT: popq %rbp
; 64-ALL-NEXT: retq
entry:
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
%success = extractvalue { i32, i1 } %res, 1
%rhs = call i32 @foo()
%ret = select i1 %success, i32 %new, i32 %rhs
ret i32 %ret
}