1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[X86] X86CallFrameOptimization - generalize slow push code path

Replace the explicit isAtom() || isSLM() test with the more general (and more specific) slowTwoMemOps() check to avoid the use of the PUSHrmm push from memory case.

This is actually very tricky to test in anything but quite complex code, but the atomic-idempotent.ll tests seem to be the most straightforward to use.

Differential Revision: https://reviews.llvm.org/D76239
This commit is contained in:
Simon Pilgrim 2020-03-29 11:01:42 +01:00
parent 527a4d99f8
commit ae5e10d787
2 changed files with 434 additions and 115 deletions

View File

@ -549,7 +549,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
// If PUSHrmm is not slow on this target, try to fold the source of the
// push into the instruction.
bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
bool SlowPUSHrmm = STI->slowTwoMemOps();
// Check that this is legal to fold. Right now, we're extremely
// conservative about that.

View File

@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X64
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SSE2
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-ATOM
; On x86, an atomic rmw operation that does not modify the value in memory
; (such as atomic add 0) can be replaced by an mfence followed by a mov.
@ -14,12 +18,30 @@ define i8 @add8(i8* %p) {
; X64-NEXT: movb (%rdi), %al
; X64-NEXT: retq
;
; X86-LABEL: add8:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mfence
; X86-NEXT: movb (%eax), %al
; X86-NEXT: retl
; X86-SSE2-LABEL: add8:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: mfence
; X86-SSE2-NEXT: movb (%eax), %al
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: add8:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLM-NEXT: xorl %eax, %eax
; X86-SLM-NEXT: lock xaddb %al, (%ecx)
; X86-SLM-NEXT: # kill: def $al killed $al killed $eax
; X86-SLM-NEXT: retl
;
; X86-ATOM-LABEL: add8:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: xorl %eax, %eax
; X86-ATOM-NEXT: lock xaddb %al, (%ecx)
; X86-ATOM-NEXT: # kill: def $al killed $al killed $eax
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
%1 = atomicrmw add i8* %p, i8 0 monotonic
ret i8 %1
}
@ -31,12 +53,36 @@ define i16 @or16(i16* %p) {
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: retq
;
; X86-LABEL: or16:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mfence
; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: retl
; X86-SSE2-LABEL: or16:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: mfence
; X86-SSE2-NEXT: movzwl (%eax), %eax
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: or16:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLM-NEXT: movzwl (%ecx), %eax
; X86-SLM-NEXT: .p2align 4, 0x90
; X86-SLM-NEXT: .LBB1_1: # %atomicrmw.start
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-SLM-NEXT: lock cmpxchgw %ax, (%ecx)
; X86-SLM-NEXT: jne .LBB1_1
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
; X86-SLM-NEXT: retl
;
; X86-ATOM-LABEL: or16:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: movzwl (%ecx), %eax
; X86-ATOM-NEXT: .p2align 4, 0x90
; X86-ATOM-NEXT: .LBB1_1: # %atomicrmw.start
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-ATOM-NEXT: lock cmpxchgw %ax, (%ecx)
; X86-ATOM-NEXT: jne .LBB1_1
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
; X86-ATOM-NEXT: retl
%1 = atomicrmw or i16* %p, i16 0 acquire
ret i16 %1
}
@ -48,12 +94,36 @@ define i32 @xor32(i32* %p) {
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: retq
;
; X86-LABEL: xor32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mfence
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: retl
; X86-SSE2-LABEL: xor32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: mfence
; X86-SSE2-NEXT: movl (%eax), %eax
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: xor32:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLM-NEXT: movl (%ecx), %eax
; X86-SLM-NEXT: .p2align 4, 0x90
; X86-SLM-NEXT: .LBB2_1: # %atomicrmw.start
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
; X86-SLM-NEXT: jne .LBB2_1
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
; X86-SLM-NEXT: retl
;
; X86-ATOM-LABEL: xor32:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: movl (%ecx), %eax
; X86-ATOM-NEXT: .p2align 4, 0x90
; X86-ATOM-NEXT: .LBB2_1: # %atomicrmw.start
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
; X86-ATOM-NEXT: jne .LBB2_1
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
; X86-ATOM-NEXT: retl
%1 = atomicrmw xor i32* %p, i32 0 release
ret i32 %1
}
@ -105,44 +175,124 @@ define i128 @or128(i128* %p) {
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; X86-LABEL: or128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_offset %esi, -16
; X86-NEXT: .cfi_offset %edi, -12
; X86-NEXT: movl 8(%ebp), %esi
; X86-NEXT: movl %esp, %eax
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: pushl 12(%ebp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll __sync_fetch_and_or_16
; X86-NEXT: addl $20, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl $4
; X86-SSE2-LABEL: or128:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
; X86-SSE2-NEXT: andl $-8, %esp
; X86-SSE2-NEXT: subl $16, %esp
; X86-SSE2-NEXT: .cfi_offset %esi, -16
; X86-SSE2-NEXT: .cfi_offset %edi, -12
; X86-SSE2-NEXT: movl 8(%ebp), %esi
; X86-SSE2-NEXT: movl %esp, %eax
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl 12(%ebp)
; X86-SSE2-NEXT: pushl %eax
; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
; X86-SSE2-NEXT: addl $20, %esp
; X86-SSE2-NEXT: movl (%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE2-NEXT: movl %edi, 8(%esi)
; X86-SSE2-NEXT: movl %edx, 12(%esi)
; X86-SSE2-NEXT: movl %eax, (%esi)
; X86-SSE2-NEXT: movl %ecx, 4(%esi)
; X86-SSE2-NEXT: movl %esi, %eax
; X86-SSE2-NEXT: leal -8(%ebp), %esp
; X86-SSE2-NEXT: popl %esi
; X86-SSE2-NEXT: popl %edi
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
; X86-SSE2-NEXT: retl $4
;
; X86-SLM-LABEL: or128:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: pushl %ebp
; X86-SLM-NEXT: .cfi_def_cfa_offset 8
; X86-SLM-NEXT: .cfi_offset %ebp, -8
; X86-SLM-NEXT: movl %esp, %ebp
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
; X86-SLM-NEXT: pushl %edi
; X86-SLM-NEXT: pushl %esi
; X86-SLM-NEXT: andl $-8, %esp
; X86-SLM-NEXT: subl $16, %esp
; X86-SLM-NEXT: .cfi_offset %esi, -16
; X86-SLM-NEXT: .cfi_offset %edi, -12
; X86-SLM-NEXT: movl 8(%ebp), %esi
; X86-SLM-NEXT: movl 12(%ebp), %eax
; X86-SLM-NEXT: movl %esp, %ecx
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl %eax
; X86-SLM-NEXT: pushl %ecx
; X86-SLM-NEXT: calll __sync_fetch_and_or_16
; X86-SLM-NEXT: addl $20, %esp
; X86-SLM-NEXT: movl (%esp), %eax
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SLM-NEXT: movl %edi, 8(%esi)
; X86-SLM-NEXT: movl %edx, 12(%esi)
; X86-SLM-NEXT: movl %eax, (%esi)
; X86-SLM-NEXT: movl %ecx, 4(%esi)
; X86-SLM-NEXT: movl %esi, %eax
; X86-SLM-NEXT: leal -8(%ebp), %esp
; X86-SLM-NEXT: popl %esi
; X86-SLM-NEXT: popl %edi
; X86-SLM-NEXT: popl %ebp
; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
; X86-SLM-NEXT: retl $4
;
; X86-ATOM-LABEL: or128:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: pushl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
; X86-ATOM-NEXT: leal (%esp), %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
; X86-ATOM-NEXT: pushl %edi
; X86-ATOM-NEXT: pushl %esi
; X86-ATOM-NEXT: andl $-8, %esp
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
; X86-ATOM-NEXT: .cfi_offset %esi, -16
; X86-ATOM-NEXT: .cfi_offset %edi, -12
; X86-ATOM-NEXT: movl 8(%ebp), %esi
; X86-ATOM-NEXT: movl 12(%ebp), %eax
; X86-ATOM-NEXT: movl %esp, %ecx
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl %eax
; X86-ATOM-NEXT: pushl %ecx
; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
; X86-ATOM-NEXT: movl (%esp), %ecx
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-ATOM-NEXT: movl %eax, 8(%esi)
; X86-ATOM-NEXT: movl %edi, 12(%esi)
; X86-ATOM-NEXT: movl %ecx, (%esi)
; X86-ATOM-NEXT: movl %esi, %eax
; X86-ATOM-NEXT: movl %edx, 4(%esi)
; X86-ATOM-NEXT: leal -8(%ebp), %esp
; X86-ATOM-NEXT: popl %esi
; X86-ATOM-NEXT: popl %edi
; X86-ATOM-NEXT: popl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
; X86-ATOM-NEXT: retl $4
%1 = atomicrmw or i128* %p, i128 0 monotonic
ret i128 %1
}
@ -155,49 +305,137 @@ define i32 @and32 (i32* %p) {
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: retq
;
; X86-LABEL: and32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mfence
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: retl
; X86-SSE2-LABEL: and32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: mfence
; X86-SSE2-NEXT: movl (%eax), %eax
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: and32:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLM-NEXT: movl (%ecx), %eax
; X86-SLM-NEXT: .p2align 4, 0x90
; X86-SLM-NEXT: .LBB5_1: # %atomicrmw.start
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
; X86-SLM-NEXT: jne .LBB5_1
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
; X86-SLM-NEXT: retl
;
; X86-ATOM-LABEL: and32:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: movl (%ecx), %eax
; X86-ATOM-NEXT: .p2align 4, 0x90
; X86-ATOM-NEXT: .LBB5_1: # %atomicrmw.start
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
; X86-ATOM-NEXT: jne .LBB5_1
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
; X86-ATOM-NEXT: retl
%1 = atomicrmw and i32* %p, i32 -1 acq_rel
ret i32 %1
}
define void @or32_nouse_monotonic(i32* %p) {
; CHECK-LABEL: or32_nouse_monotonic:
; CHECK: # %bb.0:
; CHECK-NEXT: #MEMBARRIER
; CHECK-NEXT: ret{{[l|q]}}
; X64-LABEL: or32_nouse_monotonic:
; X64: # %bb.0:
; X64-NEXT: #MEMBARRIER
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or32_nouse_monotonic:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: #MEMBARRIER
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or32_nouse_monotonic:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: #MEMBARRIER
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 monotonic
ret void
}
define void @or32_nouse_acquire(i32* %p) {
; CHECK-LABEL: or32_nouse_acquire:
; CHECK: # %bb.0:
; CHECK-NEXT: #MEMBARRIER
; CHECK-NEXT: ret{{[l|q]}}
; X64-LABEL: or32_nouse_acquire:
; X64: # %bb.0:
; X64-NEXT: #MEMBARRIER
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or32_nouse_acquire:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: #MEMBARRIER
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or32_nouse_acquire:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: #MEMBARRIER
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 acquire
ret void
}
define void @or32_nouse_release(i32* %p) {
; CHECK-LABEL: or32_nouse_release:
; CHECK: # %bb.0:
; CHECK-NEXT: #MEMBARRIER
; CHECK-NEXT: ret{{[l|q]}}
; X64-LABEL: or32_nouse_release:
; X64: # %bb.0:
; X64-NEXT: #MEMBARRIER
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or32_nouse_release:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: #MEMBARRIER
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or32_nouse_release:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: #MEMBARRIER
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 release
ret void
}
define void @or32_nouse_acq_rel(i32* %p) {
; CHECK-LABEL: or32_nouse_acq_rel:
; CHECK: # %bb.0:
; CHECK-NEXT: #MEMBARRIER
; CHECK-NEXT: ret{{[l|q]}}
; X64-LABEL: or32_nouse_acq_rel:
; X64: # %bb.0:
; X64-NEXT: #MEMBARRIER
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or32_nouse_acq_rel:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: #MEMBARRIER
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or32_nouse_acq_rel:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: #MEMBARRIER
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 acq_rel
ret void
}
@ -208,10 +446,21 @@ define void @or32_nouse_seq_cst(i32* %p) {
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
;
; X86-LABEL: or32_nouse_seq_cst:
; X86: # %bb.0:
; X86-NEXT: lock orl $0, (%esp)
; X86-NEXT: retl
; X86-GENERIC-LABEL: or32_nouse_seq_cst:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: lock orl $0, (%esp)
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or32_nouse_seq_cst:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: lock orl $0, (%esp)
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 seq_cst
ret void
}
@ -264,28 +513,76 @@ define void @or128_nouse_seq_cst(i128* %p) {
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; X86-LABEL: or128_nouse_seq_cst:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: movl %esp, %eax
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: pushl 8(%ebp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll __sync_fetch_and_or_16
; X86-NEXT: addl $20, %esp
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
; X86-SSE2-LABEL: or128_nouse_seq_cst:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: andl $-8, %esp
; X86-SSE2-NEXT: subl $16, %esp
; X86-SSE2-NEXT: movl %esp, %eax
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl 8(%ebp)
; X86-SSE2-NEXT: pushl %eax
; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
; X86-SSE2-NEXT: addl $20, %esp
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: or128_nouse_seq_cst:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: pushl %ebp
; X86-SLM-NEXT: .cfi_def_cfa_offset 8
; X86-SLM-NEXT: .cfi_offset %ebp, -8
; X86-SLM-NEXT: movl %esp, %ebp
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
; X86-SLM-NEXT: andl $-8, %esp
; X86-SLM-NEXT: subl $16, %esp
; X86-SLM-NEXT: movl 8(%ebp), %eax
; X86-SLM-NEXT: movl %esp, %ecx
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl %eax
; X86-SLM-NEXT: pushl %ecx
; X86-SLM-NEXT: calll __sync_fetch_and_or_16
; X86-SLM-NEXT: addl $20, %esp
; X86-SLM-NEXT: movl %ebp, %esp
; X86-SLM-NEXT: popl %ebp
; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
; X86-SLM-NEXT: retl
;
; X86-ATOM-LABEL: or128_nouse_seq_cst:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: pushl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
; X86-ATOM-NEXT: leal (%esp), %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
; X86-ATOM-NEXT: andl $-8, %esp
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
; X86-ATOM-NEXT: movl 8(%ebp), %eax
; X86-ATOM-NEXT: movl %esp, %ecx
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl %eax
; X86-ATOM-NEXT: pushl %ecx
; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
; X86-ATOM-NEXT: movl %ebp, %esp
; X86-ATOM-NEXT: popl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
; X86-ATOM-NEXT: retl
atomicrmw or i128* %p, i128 0 seq_cst
ret void
}
@ -297,10 +594,21 @@ define void @or16_nouse_seq_cst(i16* %p) {
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
;
; X86-LABEL: or16_nouse_seq_cst:
; X86: # %bb.0:
; X86-NEXT: lock orl $0, (%esp)
; X86-NEXT: retl
; X86-GENERIC-LABEL: or16_nouse_seq_cst:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: lock orl $0, (%esp)
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or16_nouse_seq_cst:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: lock orl $0, (%esp)
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i16* %p, i16 0 seq_cst
ret void
}
@ -311,10 +619,21 @@ define void @or8_nouse_seq_cst(i8* %p) {
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
;
; X86-LABEL: or8_nouse_seq_cst:
; X86: # %bb.0:
; X86-NEXT: lock orl $0, (%esp)
; X86-NEXT: retl
; X86-GENERIC-LABEL: or8_nouse_seq_cst:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: lock orl $0, (%esp)
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or8_nouse_seq_cst:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: lock orl $0, (%esp)
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i8* %p, i8 0 seq_cst
ret void
}