mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] X86CallFrameOptimization - generalize slow push code path
Replace the explicit isAtom() || isSLM() test with the more general (and more specific) slowTwoMemOps() check to avoid the use of the PUSHrmm push from memory case. This is actually very tricky to test in anything but quite complex code, but the atomic-idempotent.ll tests seem to be the most straightforward to use. Differential Revision: https://reviews.llvm.org/D76239
This commit is contained in:
parent
527a4d99f8
commit
ae5e10d787
@ -549,7 +549,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
|
||||
|
||||
// If PUSHrmm is not slow on this target, try to fold the source of the
|
||||
// push into the instruction.
|
||||
bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
|
||||
bool SlowPUSHrmm = STI->slowTwoMemOps();
|
||||
|
||||
// Check that this is legal to fold. Right now, we're extremely
|
||||
// conservative about that.
|
||||
|
@ -1,6 +1,10 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X86
|
||||
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X64
|
||||
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SSE2
|
||||
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
|
||||
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
|
||||
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
|
||||
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-ATOM
|
||||
|
||||
; On x86, an atomic rmw operation that does not modify the value in memory
|
||||
; (such as atomic add 0) can be replaced by an mfence followed by a mov.
|
||||
@ -14,12 +18,30 @@ define i8 @add8(i8* %p) {
|
||||
; X64-NEXT: movb (%rdi), %al
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: add8:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: mfence
|
||||
; X86-NEXT: movb (%eax), %al
|
||||
; X86-NEXT: retl
|
||||
; X86-SSE2-LABEL: add8:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: mfence
|
||||
; X86-SSE2-NEXT: movb (%eax), %al
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X86-SLM-LABEL: add8:
|
||||
; X86-SLM: # %bb.0:
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLM-NEXT: xorl %eax, %eax
|
||||
; X86-SLM-NEXT: lock xaddb %al, (%ecx)
|
||||
; X86-SLM-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X86-SLM-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: add8:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-ATOM-NEXT: xorl %eax, %eax
|
||||
; X86-ATOM-NEXT: lock xaddb %al, (%ecx)
|
||||
; X86-ATOM-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw add i8* %p, i8 0 monotonic
|
||||
ret i8 %1
|
||||
}
|
||||
@ -31,12 +53,36 @@ define i16 @or16(i16* %p) {
|
||||
; X64-NEXT: movzwl (%rdi), %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: or16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: mfence
|
||||
; X86-NEXT: movzwl (%eax), %eax
|
||||
; X86-NEXT: retl
|
||||
; X86-SSE2-LABEL: or16:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: mfence
|
||||
; X86-SSE2-NEXT: movzwl (%eax), %eax
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X86-SLM-LABEL: or16:
|
||||
; X86-SLM: # %bb.0:
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLM-NEXT: movzwl (%ecx), %eax
|
||||
; X86-SLM-NEXT: .p2align 4, 0x90
|
||||
; X86-SLM-NEXT: .LBB1_1: # %atomicrmw.start
|
||||
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SLM-NEXT: lock cmpxchgw %ax, (%ecx)
|
||||
; X86-SLM-NEXT: jne .LBB1_1
|
||||
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SLM-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or16:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-ATOM-NEXT: movzwl (%ecx), %eax
|
||||
; X86-ATOM-NEXT: .p2align 4, 0x90
|
||||
; X86-ATOM-NEXT: .LBB1_1: # %atomicrmw.start
|
||||
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-ATOM-NEXT: lock cmpxchgw %ax, (%ecx)
|
||||
; X86-ATOM-NEXT: jne .LBB1_1
|
||||
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw or i16* %p, i16 0 acquire
|
||||
ret i16 %1
|
||||
}
|
||||
@ -48,12 +94,36 @@ define i32 @xor32(i32* %p) {
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: xor32:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: mfence
|
||||
; X86-NEXT: movl (%eax), %eax
|
||||
; X86-NEXT: retl
|
||||
; X86-SSE2-LABEL: xor32:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: mfence
|
||||
; X86-SSE2-NEXT: movl (%eax), %eax
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X86-SLM-LABEL: xor32:
|
||||
; X86-SLM: # %bb.0:
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLM-NEXT: movl (%ecx), %eax
|
||||
; X86-SLM-NEXT: .p2align 4, 0x90
|
||||
; X86-SLM-NEXT: .LBB2_1: # %atomicrmw.start
|
||||
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
|
||||
; X86-SLM-NEXT: jne .LBB2_1
|
||||
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SLM-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: xor32:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-ATOM-NEXT: movl (%ecx), %eax
|
||||
; X86-ATOM-NEXT: .p2align 4, 0x90
|
||||
; X86-ATOM-NEXT: .LBB2_1: # %atomicrmw.start
|
||||
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
|
||||
; X86-ATOM-NEXT: jne .LBB2_1
|
||||
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw xor i32* %p, i32 0 release
|
||||
ret i32 %1
|
||||
}
|
||||
@ -105,44 +175,124 @@ define i128 @or128(i128* %p) {
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: or128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: .cfi_offset %esi, -16
|
||||
; X86-NEXT: .cfi_offset %edi, -12
|
||||
; X86-NEXT: movl 8(%ebp), %esi
|
||||
; X86-NEXT: movl %esp, %eax
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl 12(%ebp)
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: calll __sync_fetch_and_or_16
|
||||
; X86-NEXT: addl $20, %esp
|
||||
; X86-NEXT: movl (%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl %edi, 8(%esi)
|
||||
; X86-NEXT: movl %edx, 12(%esi)
|
||||
; X86-NEXT: movl %eax, (%esi)
|
||||
; X86-NEXT: movl %ecx, 4(%esi)
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: leal -8(%ebp), %esp
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-NEXT: retl $4
|
||||
; X86-SSE2-LABEL: or128:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pushl %ebp
|
||||
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-SSE2-NEXT: movl %esp, %ebp
|
||||
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-SSE2-NEXT: pushl %edi
|
||||
; X86-SSE2-NEXT: pushl %esi
|
||||
; X86-SSE2-NEXT: andl $-8, %esp
|
||||
; X86-SSE2-NEXT: subl $16, %esp
|
||||
; X86-SSE2-NEXT: .cfi_offset %esi, -16
|
||||
; X86-SSE2-NEXT: .cfi_offset %edi, -12
|
||||
; X86-SSE2-NEXT: movl 8(%ebp), %esi
|
||||
; X86-SSE2-NEXT: movl %esp, %eax
|
||||
; X86-SSE2-NEXT: pushl $0
|
||||
; X86-SSE2-NEXT: pushl $0
|
||||
; X86-SSE2-NEXT: pushl $0
|
||||
; X86-SSE2-NEXT: pushl $0
|
||||
; X86-SSE2-NEXT: pushl 12(%ebp)
|
||||
; X86-SSE2-NEXT: pushl %eax
|
||||
; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
|
||||
; X86-SSE2-NEXT: addl $20, %esp
|
||||
; X86-SSE2-NEXT: movl (%esp), %eax
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-SSE2-NEXT: movl %edi, 8(%esi)
|
||||
; X86-SSE2-NEXT: movl %edx, 12(%esi)
|
||||
; X86-SSE2-NEXT: movl %eax, (%esi)
|
||||
; X86-SSE2-NEXT: movl %ecx, 4(%esi)
|
||||
; X86-SSE2-NEXT: movl %esi, %eax
|
||||
; X86-SSE2-NEXT: leal -8(%ebp), %esp
|
||||
; X86-SSE2-NEXT: popl %esi
|
||||
; X86-SSE2-NEXT: popl %edi
|
||||
; X86-SSE2-NEXT: popl %ebp
|
||||
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-SSE2-NEXT: retl $4
|
||||
;
|
||||
; X86-SLM-LABEL: or128:
|
||||
; X86-SLM: # %bb.0:
|
||||
; X86-SLM-NEXT: pushl %ebp
|
||||
; X86-SLM-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SLM-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-SLM-NEXT: movl %esp, %ebp
|
||||
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-SLM-NEXT: pushl %edi
|
||||
; X86-SLM-NEXT: pushl %esi
|
||||
; X86-SLM-NEXT: andl $-8, %esp
|
||||
; X86-SLM-NEXT: subl $16, %esp
|
||||
; X86-SLM-NEXT: .cfi_offset %esi, -16
|
||||
; X86-SLM-NEXT: .cfi_offset %edi, -12
|
||||
; X86-SLM-NEXT: movl 8(%ebp), %esi
|
||||
; X86-SLM-NEXT: movl 12(%ebp), %eax
|
||||
; X86-SLM-NEXT: movl %esp, %ecx
|
||||
; X86-SLM-NEXT: pushl $0
|
||||
; X86-SLM-NEXT: pushl $0
|
||||
; X86-SLM-NEXT: pushl $0
|
||||
; X86-SLM-NEXT: pushl $0
|
||||
; X86-SLM-NEXT: pushl %eax
|
||||
; X86-SLM-NEXT: pushl %ecx
|
||||
; X86-SLM-NEXT: calll __sync_fetch_and_or_16
|
||||
; X86-SLM-NEXT: addl $20, %esp
|
||||
; X86-SLM-NEXT: movl (%esp), %eax
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-SLM-NEXT: movl %edi, 8(%esi)
|
||||
; X86-SLM-NEXT: movl %edx, 12(%esi)
|
||||
; X86-SLM-NEXT: movl %eax, (%esi)
|
||||
; X86-SLM-NEXT: movl %ecx, 4(%esi)
|
||||
; X86-SLM-NEXT: movl %esi, %eax
|
||||
; X86-SLM-NEXT: leal -8(%ebp), %esp
|
||||
; X86-SLM-NEXT: popl %esi
|
||||
; X86-SLM-NEXT: popl %edi
|
||||
; X86-SLM-NEXT: popl %ebp
|
||||
; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-SLM-NEXT: retl $4
|
||||
;
|
||||
; X86-ATOM-LABEL: or128:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: pushl %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-ATOM-NEXT: leal (%esp), %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-ATOM-NEXT: pushl %edi
|
||||
; X86-ATOM-NEXT: pushl %esi
|
||||
; X86-ATOM-NEXT: andl $-8, %esp
|
||||
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
|
||||
; X86-ATOM-NEXT: .cfi_offset %esi, -16
|
||||
; X86-ATOM-NEXT: .cfi_offset %edi, -12
|
||||
; X86-ATOM-NEXT: movl 8(%ebp), %esi
|
||||
; X86-ATOM-NEXT: movl 12(%ebp), %eax
|
||||
; X86-ATOM-NEXT: movl %esp, %ecx
|
||||
; X86-ATOM-NEXT: pushl $0
|
||||
; X86-ATOM-NEXT: pushl $0
|
||||
; X86-ATOM-NEXT: pushl $0
|
||||
; X86-ATOM-NEXT: pushl $0
|
||||
; X86-ATOM-NEXT: pushl %eax
|
||||
; X86-ATOM-NEXT: pushl %ecx
|
||||
; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
|
||||
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
|
||||
; X86-ATOM-NEXT: movl (%esp), %ecx
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-ATOM-NEXT: movl %eax, 8(%esi)
|
||||
; X86-ATOM-NEXT: movl %edi, 12(%esi)
|
||||
; X86-ATOM-NEXT: movl %ecx, (%esi)
|
||||
; X86-ATOM-NEXT: movl %esi, %eax
|
||||
; X86-ATOM-NEXT: movl %edx, 4(%esi)
|
||||
; X86-ATOM-NEXT: leal -8(%ebp), %esp
|
||||
; X86-ATOM-NEXT: popl %esi
|
||||
; X86-ATOM-NEXT: popl %edi
|
||||
; X86-ATOM-NEXT: popl %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-ATOM-NEXT: retl $4
|
||||
%1 = atomicrmw or i128* %p, i128 0 monotonic
|
||||
ret i128 %1
|
||||
}
|
||||
@ -155,49 +305,137 @@ define i32 @and32 (i32* %p) {
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: and32:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: mfence
|
||||
; X86-NEXT: movl (%eax), %eax
|
||||
; X86-NEXT: retl
|
||||
; X86-SSE2-LABEL: and32:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: mfence
|
||||
; X86-SSE2-NEXT: movl (%eax), %eax
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X86-SLM-LABEL: and32:
|
||||
; X86-SLM: # %bb.0:
|
||||
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SLM-NEXT: movl (%ecx), %eax
|
||||
; X86-SLM-NEXT: .p2align 4, 0x90
|
||||
; X86-SLM-NEXT: .LBB5_1: # %atomicrmw.start
|
||||
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
|
||||
; X86-SLM-NEXT: jne .LBB5_1
|
||||
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SLM-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: and32:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-ATOM-NEXT: movl (%ecx), %eax
|
||||
; X86-ATOM-NEXT: .p2align 4, 0x90
|
||||
; X86-ATOM-NEXT: .LBB5_1: # %atomicrmw.start
|
||||
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
|
||||
; X86-ATOM-NEXT: jne .LBB5_1
|
||||
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-ATOM-NEXT: retl
|
||||
%1 = atomicrmw and i32* %p, i32 -1 acq_rel
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define void @or32_nouse_monotonic(i32* %p) {
|
||||
; CHECK-LABEL: or32_nouse_monotonic:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: #MEMBARRIER
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
; X64-LABEL: or32_nouse_monotonic:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or32_nouse_monotonic:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or32_nouse_monotonic:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or i32* %p, i32 0 monotonic
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @or32_nouse_acquire(i32* %p) {
|
||||
; CHECK-LABEL: or32_nouse_acquire:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: #MEMBARRIER
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
; X64-LABEL: or32_nouse_acquire:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or32_nouse_acquire:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or32_nouse_acquire:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or i32* %p, i32 0 acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or32_nouse_release(i32* %p) {
|
||||
; CHECK-LABEL: or32_nouse_release:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: #MEMBARRIER
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
; X64-LABEL: or32_nouse_release:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or32_nouse_release:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or32_nouse_release:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or i32* %p, i32 0 release
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @or32_nouse_acq_rel(i32* %p) {
|
||||
; CHECK-LABEL: or32_nouse_acq_rel:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: #MEMBARRIER
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
; X64-LABEL: or32_nouse_acq_rel:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: #MEMBARRIER
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-GENERIC-LABEL: or32_nouse_acq_rel:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: #MEMBARRIER
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or32_nouse_acq_rel:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: #MEMBARRIER
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or i32* %p, i32 0 acq_rel
|
||||
ret void
|
||||
}
|
||||
@ -208,10 +446,21 @@ define void @or32_nouse_seq_cst(i32* %p) {
|
||||
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: or32_nouse_seq_cst:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: lock orl $0, (%esp)
|
||||
; X86-NEXT: retl
|
||||
; X86-GENERIC-LABEL: or32_nouse_seq_cst:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: lock orl $0, (%esp)
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or32_nouse_seq_cst:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: lock orl $0, (%esp)
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or i32* %p, i32 0 seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -264,28 +513,76 @@ define void @or128_nouse_seq_cst(i128* %p) {
|
||||
; X64-NEXT: .cfi_def_cfa_offset 8
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: or128_nouse_seq_cst:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: movl %esp, %eax
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl $0
|
||||
; X86-NEXT: pushl 8(%ebp)
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: calll __sync_fetch_and_or_16
|
||||
; X86-NEXT: addl $20, %esp
|
||||
; X86-NEXT: movl %ebp, %esp
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-NEXT: retl
|
||||
; X86-SSE2-LABEL: or128_nouse_seq_cst:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pushl %ebp
|
||||
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-SSE2-NEXT: movl %esp, %ebp
|
||||
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-SSE2-NEXT: andl $-8, %esp
|
||||
; X86-SSE2-NEXT: subl $16, %esp
|
||||
; X86-SSE2-NEXT: movl %esp, %eax
|
||||
; X86-SSE2-NEXT: pushl $0
|
||||
; X86-SSE2-NEXT: pushl $0
|
||||
; X86-SSE2-NEXT: pushl $0
|
||||
; X86-SSE2-NEXT: pushl $0
|
||||
; X86-SSE2-NEXT: pushl 8(%ebp)
|
||||
; X86-SSE2-NEXT: pushl %eax
|
||||
; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
|
||||
; X86-SSE2-NEXT: addl $20, %esp
|
||||
; X86-SSE2-NEXT: movl %ebp, %esp
|
||||
; X86-SSE2-NEXT: popl %ebp
|
||||
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X86-SLM-LABEL: or128_nouse_seq_cst:
|
||||
; X86-SLM: # %bb.0:
|
||||
; X86-SLM-NEXT: pushl %ebp
|
||||
; X86-SLM-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SLM-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-SLM-NEXT: movl %esp, %ebp
|
||||
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-SLM-NEXT: andl $-8, %esp
|
||||
; X86-SLM-NEXT: subl $16, %esp
|
||||
; X86-SLM-NEXT: movl 8(%ebp), %eax
|
||||
; X86-SLM-NEXT: movl %esp, %ecx
|
||||
; X86-SLM-NEXT: pushl $0
|
||||
; X86-SLM-NEXT: pushl $0
|
||||
; X86-SLM-NEXT: pushl $0
|
||||
; X86-SLM-NEXT: pushl $0
|
||||
; X86-SLM-NEXT: pushl %eax
|
||||
; X86-SLM-NEXT: pushl %ecx
|
||||
; X86-SLM-NEXT: calll __sync_fetch_and_or_16
|
||||
; X86-SLM-NEXT: addl $20, %esp
|
||||
; X86-SLM-NEXT: movl %ebp, %esp
|
||||
; X86-SLM-NEXT: popl %ebp
|
||||
; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-SLM-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or128_nouse_seq_cst:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: pushl %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-ATOM-NEXT: leal (%esp), %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-ATOM-NEXT: andl $-8, %esp
|
||||
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
|
||||
; X86-ATOM-NEXT: movl 8(%ebp), %eax
|
||||
; X86-ATOM-NEXT: movl %esp, %ecx
|
||||
; X86-ATOM-NEXT: pushl $0
|
||||
; X86-ATOM-NEXT: pushl $0
|
||||
; X86-ATOM-NEXT: pushl $0
|
||||
; X86-ATOM-NEXT: pushl $0
|
||||
; X86-ATOM-NEXT: pushl %eax
|
||||
; X86-ATOM-NEXT: pushl %ecx
|
||||
; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
|
||||
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
|
||||
; X86-ATOM-NEXT: movl %ebp, %esp
|
||||
; X86-ATOM-NEXT: popl %ebp
|
||||
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or i128* %p, i128 0 seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -297,10 +594,21 @@ define void @or16_nouse_seq_cst(i16* %p) {
|
||||
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: or16_nouse_seq_cst:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: lock orl $0, (%esp)
|
||||
; X86-NEXT: retl
|
||||
; X86-GENERIC-LABEL: or16_nouse_seq_cst:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: lock orl $0, (%esp)
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or16_nouse_seq_cst:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: lock orl $0, (%esp)
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or i16* %p, i16 0 seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -311,10 +619,21 @@ define void @or8_nouse_seq_cst(i8* %p) {
|
||||
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: or8_nouse_seq_cst:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: lock orl $0, (%esp)
|
||||
; X86-NEXT: retl
|
||||
; X86-GENERIC-LABEL: or8_nouse_seq_cst:
|
||||
; X86-GENERIC: # %bb.0:
|
||||
; X86-GENERIC-NEXT: lock orl $0, (%esp)
|
||||
; X86-GENERIC-NEXT: retl
|
||||
;
|
||||
; X86-ATOM-LABEL: or8_nouse_seq_cst:
|
||||
; X86-ATOM: # %bb.0:
|
||||
; X86-ATOM-NEXT: lock orl $0, (%esp)
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: nop
|
||||
; X86-ATOM-NEXT: retl
|
||||
atomicrmw or i8* %p, i8 0 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user