; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix FAST_INC ; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix SLOW_INC ; This file checks that atomic (non-seq_cst) stores of immediate values are ; done in one mov instruction and not 2. More precisely, it makes sure that the ; immediate is not first copied uselessly into a register. ; Similarily, it checks that a binary operation of an immediate with an atomic ; variable that is stored back in that variable is done as a single instruction. ; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release) ; should be just an add instruction, instead of loading x into a register, doing ; an add and storing the result back. ; The binary operations supported are currently add, and, or, xor. ; sub is not supported because they are translated by an addition of the ; negated immediate. ; ; We also check the same patterns: ; - For inc/dec. ; - For register instead of immediate operands. ; - For floating point operations. ; seq_cst stores are left as (lock) xchgl, but we try to check every other ; attribute at least once. ; Please note that these operations do not require the lock prefix: only ; sequentially consistent stores require this kind of protection on X86. ; And even for seq_cst operations, llvm uses the xchg instruction which has ; an implicit lock prefix, so making it explicit is not required. define void @store_atomic_imm_8(i8* %p) { ; X64-LABEL: store_atomic_imm_8: ; X64: # %bb.0: ; X64-NEXT: movb $42, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: store_atomic_imm_8: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movb $42, (%eax) ; X32-NEXT: retl store atomic i8 42, i8* %p release, align 1 ret void } define void @store_atomic_imm_16(i16* %p) { ; X64-LABEL: store_atomic_imm_16: ; X64: # %bb.0: ; X64-NEXT: movw $42, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: store_atomic_imm_16: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movw $42, (%eax) ; X32-NEXT: retl store atomic i16 42, i16* %p monotonic, align 2 ret void } define void @store_atomic_imm_32(i32* %p) { ; X64-LABEL: store_atomic_imm_32: ; X64: # %bb.0: ; X64-NEXT: movl $42, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: store_atomic_imm_32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl $42, (%eax) ; X32-NEXT: retl ; On 32 bits, there is an extra movl for each of those functions ; (probably for alignment reasons). store atomic i32 42, i32* %p release, align 4 ret void } define void @store_atomic_imm_64(i64* %p) { ; X64-LABEL: store_atomic_imm_64: ; X64: # %bb.0: ; X64-NEXT: movq $42, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: store_atomic_imm_64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: movl $42, %ebx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB3_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB3_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; These are implemented with a CAS loop on 32 bit architectures, and thus ; cannot be optimized in the same way as the others. store atomic i64 42, i64* %p release, align 8 ret void } ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov, ; even on X64, one must use movabsq that can only target a register. define void @store_atomic_imm_64_big(i64* %p) { ; X64-LABEL: store_atomic_imm_64_big: ; X64: # %bb.0: ; X64-NEXT: movabsq $100000000000, %rax # imm = 0x174876E800 ; X64-NEXT: movq %rax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: store_atomic_imm_64_big: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: movl $23, %ecx ; X32-NEXT: movl $1215752192, %ebx # imm = 0x4876E800 ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB4_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB4_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl store atomic i64 100000000000, i64* %p monotonic, align 8 ret void } ; It would be incorrect to replace a lock xchgl by a movl define void @store_atomic_imm_32_seq_cst(i32* %p) { ; X64-LABEL: store_atomic_imm_32_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl $42, %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: store_atomic_imm_32_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl $42, %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl store atomic i32 42, i32* %p seq_cst, align 4 ret void } ; ----- ADD ----- define void @add_8i(i8* %p) { ; X64-LABEL: add_8i: ; X64: # %bb.0: ; X64-NEXT: addb $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_8i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: addb $2, (%eax) ; X32-NEXT: retl %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = add i8 %1, 2 store atomic i8 %2, i8* %p release, align 1 ret void } define void @add_8r(i8* %p, i8 %v) { ; X64-LABEL: add_8r: ; X64: # %bb.0: ; X64-NEXT: addb %sil, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_8r: ; X32: # %bb.0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addb %al, (%ecx) ; X32-NEXT: retl %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = add i8 %1, %v store atomic i8 %2, i8* %p release, align 1 ret void } define void @add_16i(i16* %p) { ; X64-LABEL: add_16i: ; X64: # %bb.0: ; X64-NEXT: addw $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_16i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: addw $2, (%eax) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = add i16 %1, 2 store atomic i16 %2, i16* %p release, align 2 ret void } define void @add_16r(i16* %p, i16 %v) { ; X64-LABEL: add_16r: ; X64: # %bb.0: ; X64-NEXT: addw %si, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_16r: ; X32: # %bb.0: ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addw %ax, (%ecx) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = add i16 %1, %v store atomic i16 %2, i16* %p release, align 2 ret void } define void @add_32i(i32* %p) { ; X64-LABEL: add_32i: ; X64: # %bb.0: ; X64-NEXT: addl $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_32i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: addl $2, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = add i32 %1, 2 store atomic i32 %2, i32* %p monotonic, align 4 ret void } define void @add_32r(i32* %p, i32 %v) { ; X64-LABEL: add_32r: ; X64: # %bb.0: ; X64-NEXT: addl %esi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_32r: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addl %eax, (%ecx) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = add i32 %1, %v store atomic i32 %2, i32* %p monotonic, align 4 ret void } ; The following is a corner case where the load is added to itself. The pattern ; matching should not fold this. We only test with 32-bit add, but the same ; applies to other sizes and operations. define void @add_32r_self(i32* %p) { ; X64-LABEL: add_32r_self: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: addl %eax, %eax ; X64-NEXT: movl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_32r_self: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: addl %ecx, %ecx ; X32-NEXT: movl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = add i32 %1, %1 store atomic i32 %2, i32* %p monotonic, align 4 ret void } ; The following is a corner case where the load's result is returned. The ; optimizer isn't allowed to duplicate the load because it's atomic. define i32 @add_32r_ret_load(i32* %p, i32 %v) { ; X64-LABEL: add_32r_ret_load: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: addl %eax, %esi ; X64-NEXT: movl %esi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_32r_ret_load: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: addl %eax, %edx ; X32-NEXT: movl %edx, (%ecx) ; X32-NEXT: retl ; More code here, we just don't want it to load from P. %1 = load atomic i32, i32* %p acquire, align 4 %2 = add i32 %1, %v store atomic i32 %2, i32* %p monotonic, align 4 ret i32 %1 } define void @add_64i(i64* %p) { ; X64-LABEL: add_64i: ; X64: # %bb.0: ; X64-NEXT: addq $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_64i: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $2, %ebx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB14_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB14_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'addq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = add i64 %1, 2 store atomic i64 %2, i64* %p release, align 8 ret void } define void @add_64r(i64* %p, i64 %v) { ; X64-LABEL: add_64r: ; X64: # %bb.0: ; X64-NEXT: addq %rsi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_64r: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB15_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB15_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'addq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = add i64 %1, %v store atomic i64 %2, i64* %p release, align 8 ret void } define void @add_32i_seq_cst(i32* %p) { ; X64-LABEL: add_32i_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: addl $2, %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_32i_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: addl $2, %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = add i32 %1, 2 store atomic i32 %2, i32* %p seq_cst, align 4 ret void } define void @add_32r_seq_cst(i32* %p, i32 %v) { ; X64-LABEL: add_32r_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: addl %esi, %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: add_32r_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = add i32 %1, %v store atomic i32 %2, i32* %p seq_cst, align 4 ret void } ; ----- SUB ----- define void @sub_8r(i8* %p, i8 %v) { ; X64-LABEL: sub_8r: ; X64: # %bb.0: ; X64-NEXT: subb %sil, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: sub_8r: ; X32: # %bb.0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subb %al, (%ecx) ; X32-NEXT: retl %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = sub i8 %1, %v store atomic i8 %2, i8* %p release, align 1 ret void } define void @sub_16r(i16* %p, i16 %v) { ; X64-LABEL: sub_16r: ; X64: # %bb.0: ; X64-NEXT: subw %si, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: sub_16r: ; X32: # %bb.0: ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subw %ax, (%ecx) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = sub i16 %1, %v store atomic i16 %2, i16* %p release, align 2 ret void } define void @sub_32r(i32* %p, i32 %v) { ; X64-LABEL: sub_32r: ; X64: # %bb.0: ; X64-NEXT: subl %esi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: sub_32r: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subl %eax, (%ecx) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = sub i32 %1, %v store atomic i32 %2, i32* %p monotonic, align 4 ret void } ; The following is a corner case where the load is subed to itself. The pattern ; matching should not fold this. We only test with 32-bit sub, but the same ; applies to other sizes and operations. define void @sub_32r_self(i32* %p) { ; X64-LABEL: sub_32r_self: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl $0, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: sub_32r_self: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: movl $0, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = sub i32 %1, %1 store atomic i32 %2, i32* %p monotonic, align 4 ret void } ; The following is a corner case where the load's result is returned. The ; optimizer isn't allowed to duplicate the load because it's atomic. define i32 @sub_32r_ret_load(i32* %p, i32 %v) { ; X64-LABEL: sub_32r_ret_load: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: subl %esi, %ecx ; X64-NEXT: movl %ecx, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: sub_32r_ret_load: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %eax ; X32-NEXT: movl %eax, %edx ; X32-NEXT: subl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl %edx, (%ecx) ; X32-NEXT: retl ; More code here, we just don't want it to load from P. %1 = load atomic i32, i32* %p acquire, align 4 %2 = sub i32 %1, %v store atomic i32 %2, i32* %p monotonic, align 4 ret i32 %1 } define void @sub_64r(i64* %p, i64 %v) { ; X64-LABEL: sub_64r: ; X64: # %bb.0: ; X64-NEXT: subq %rsi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: sub_64r: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB23_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB23_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'subq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = sub i64 %1, %v store atomic i64 %2, i64* %p release, align 8 ret void } define void @sub_32r_seq_cst(i32* %p, i32 %v) { ; X64-LABEL: sub_32r_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: subl %esi, %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: sub_32r_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = sub i32 %1, %v store atomic i32 %2, i32* %p seq_cst, align 4 ret void } ; ----- AND ----- define void @and_8i(i8* %p) { ; X64-LABEL: and_8i: ; X64: # %bb.0: ; X64-NEXT: andb $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: and_8i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: andb $2, (%eax) ; X32-NEXT: retl %1 = load atomic i8, i8* %p monotonic, align 1 %2 = and i8 %1, 2 store atomic i8 %2, i8* %p release, align 1 ret void } define void @and_8r(i8* %p, i8 %v) { ; X64-LABEL: and_8r: ; X64: # %bb.0: ; X64-NEXT: andb %sil, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: and_8r: ; X32: # %bb.0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: andb %al, (%ecx) ; X32-NEXT: retl %1 = load atomic i8, i8* %p monotonic, align 1 %2 = and i8 %1, %v store atomic i8 %2, i8* %p release, align 1 ret void } define void @and_16i(i16* %p) { ; X64-LABEL: and_16i: ; X64: # %bb.0: ; X64-NEXT: andw $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: and_16i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: andw $2, (%eax) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = and i16 %1, 2 store atomic i16 %2, i16* %p release, align 2 ret void } define void @and_16r(i16* %p, i16 %v) { ; X64-LABEL: and_16r: ; X64: # %bb.0: ; X64-NEXT: andw %si, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: and_16r: ; X32: # %bb.0: ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: andw %ax, (%ecx) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = and i16 %1, %v store atomic i16 %2, i16* %p release, align 2 ret void } define void @and_32i(i32* %p) { ; X64-LABEL: and_32i: ; X64: # %bb.0: ; X64-NEXT: andl $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: and_32i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: andl $2, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = and i32 %1, 2 store atomic i32 %2, i32* %p release, align 4 ret void } define void @and_32r(i32* %p, i32 %v) { ; X64-LABEL: and_32r: ; X64: # %bb.0: ; X64-NEXT: andl %esi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: and_32r: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: andl %eax, (%ecx) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = and i32 %1, %v store atomic i32 %2, i32* %p release, align 4 ret void } define void @and_64i(i64* %p) { ; X64-LABEL: and_64i: ; X64: # %bb.0: ; X64-NEXT: andq $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: and_64i: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: andl $2, %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB31_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB31_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'andq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = and i64 %1, 2 store atomic i64 %2, i64* %p release, align 8 ret void } define void @and_64r(i64* %p, i64 %v) { ; X64-LABEL: and_64r: ; X64: # %bb.0: ; X64-NEXT: andq %rsi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: and_64r: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: andl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB32_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB32_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'andq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = and i64 %1, %v store atomic i64 %2, i64* %p release, align 8 ret void } define void @and_32i_seq_cst(i32* %p) { ; X64-LABEL: and_32i_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: andl $2, %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: and_32i_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: andl $2, %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = and i32 %1, 2 store atomic i32 %2, i32* %p seq_cst, align 4 ret void } define void @and_32r_seq_cst(i32* %p, i32 %v) { ; X64-LABEL: and_32r_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: andl %esi, %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: and_32r_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = and i32 %1, %v store atomic i32 %2, i32* %p seq_cst, align 4 ret void } ; ----- OR ----- define void @or_8i(i8* %p) { ; X64-LABEL: or_8i: ; X64: # %bb.0: ; X64-NEXT: orb $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: or_8i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: orb $2, (%eax) ; X32-NEXT: retl %1 = load atomic i8, i8* %p acquire, align 1 %2 = or i8 %1, 2 store atomic i8 %2, i8* %p release, align 1 ret void } define void @or_8r(i8* %p, i8 %v) { ; X64-LABEL: or_8r: ; X64: # %bb.0: ; X64-NEXT: orb %sil, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: or_8r: ; X32: # %bb.0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: orb %al, (%ecx) ; X32-NEXT: retl %1 = load atomic i8, i8* %p acquire, align 1 %2 = or i8 %1, %v store atomic i8 %2, i8* %p release, align 1 ret void } define void @or_16i(i16* %p) { ; X64-LABEL: or_16i: ; X64: # %bb.0: ; X64-NEXT: orw $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: or_16i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: orw $2, (%eax) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = or i16 %1, 2 store atomic i16 %2, i16* %p release, align 2 ret void } define void @or_16r(i16* %p, i16 %v) { ; X64-LABEL: or_16r: ; X64: # %bb.0: ; X64-NEXT: orw %si, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: or_16r: ; X32: # %bb.0: ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: orw %ax, (%ecx) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = or i16 %1, %v store atomic i16 %2, i16* %p release, align 2 ret void } define void @or_32i(i32* %p) { ; X64-LABEL: or_32i: ; X64: # %bb.0: ; X64-NEXT: orl $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: or_32i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: orl $2, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = or i32 %1, 2 store atomic i32 %2, i32* %p release, align 4 ret void } define void @or_32r(i32* %p, i32 %v) { ; X64-LABEL: or_32r: ; X64: # %bb.0: ; X64-NEXT: orl %esi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: or_32r: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: orl %eax, (%ecx) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = or i32 %1, %v store atomic i32 %2, i32* %p release, align 4 ret void } define void @or_64i(i64* %p) { ; X64-LABEL: or_64i: ; X64: # %bb.0: ; X64-NEXT: orq $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: or_64i: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: orl $2, %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB41_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB41_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'orq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = or i64 %1, 2 store atomic i64 %2, i64* %p release, align 8 ret void } define void @or_64r(i64* %p, i64 %v) { ; X64-LABEL: or_64r: ; X64: # %bb.0: ; X64-NEXT: orq %rsi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: or_64r: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: orl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB42_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB42_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'orq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = or i64 %1, %v store atomic i64 %2, i64* %p release, align 8 ret void } define void @or_32i_seq_cst(i32* %p) { ; X64-LABEL: or_32i_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: orl $2, %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: or_32i_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: orl $2, %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = or i32 %1, 2 store atomic i32 %2, i32* %p seq_cst, align 4 ret void } define void @or_32r_seq_cst(i32* %p, i32 %v) { ; X64-LABEL: or_32r_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: orl %esi, %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: or_32r_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = or i32 %1, %v store atomic i32 %2, i32* %p seq_cst, align 4 ret void } ; ----- XOR ----- define void @xor_8i(i8* %p) { ; X64-LABEL: xor_8i: ; X64: # %bb.0: ; X64-NEXT: xorb $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: xor_8i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorb $2, (%eax) ; X32-NEXT: retl %1 = load atomic i8, i8* %p acquire, align 1 %2 = xor i8 %1, 2 store atomic i8 %2, i8* %p release, align 1 ret void } define void @xor_8r(i8* %p, i8 %v) { ; X64-LABEL: xor_8r: ; X64: # %bb.0: ; X64-NEXT: xorb %sil, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: xor_8r: ; X32: # %bb.0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xorb %al, (%ecx) ; X32-NEXT: retl %1 = load atomic i8, i8* %p acquire, align 1 %2 = xor i8 %1, %v store atomic i8 %2, i8* %p release, align 1 ret void } define void @xor_16i(i16* %p) { ; X64-LABEL: xor_16i: ; X64: # %bb.0: ; X64-NEXT: xorw $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: xor_16i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorw $2, (%eax) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = xor i16 %1, 2 store atomic i16 %2, i16* %p release, align 2 ret void } define void @xor_16r(i16* %p, i16 %v) { ; X64-LABEL: xor_16r: ; X64: # %bb.0: ; X64-NEXT: xorw %si, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: xor_16r: ; X32: # %bb.0: ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xorw %ax, (%ecx) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = xor i16 %1, %v store atomic i16 %2, i16* %p release, align 2 ret void } define void @xor_32i(i32* %p) { ; X64-LABEL: xor_32i: ; X64: # %bb.0: ; X64-NEXT: xorl $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: xor_32i: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl $2, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = xor i32 %1, 2 store atomic i32 %2, i32* %p release, align 4 ret void } define void @xor_32r(i32* %p, i32 %v) { ; X64-LABEL: xor_32r: ; X64: # %bb.0: ; X64-NEXT: xorl %esi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: xor_32r: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xorl %eax, (%ecx) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = xor i32 %1, %v store atomic i32 %2, i32* %p release, align 4 ret void } define void @xor_64i(i64* %p) { ; X64-LABEL: xor_64i: ; X64: # %bb.0: ; X64-NEXT: xorq $2, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: xor_64i: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: xorl $2, %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB51_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB51_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'xorq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = xor i64 %1, 2 store atomic i64 %2, i64* %p release, align 8 ret void } define void @xor_64r(i64* %p, i64 %v) { ; X64-LABEL: xor_64r: ; X64: # %bb.0: ; X64-NEXT: xorq %rsi, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: xor_64r: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xorl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB52_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB52_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'xorq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = xor i64 %1, %v store atomic i64 %2, i64* %p release, align 8 ret void } define void @xor_32i_seq_cst(i32* %p) { ; X64-LABEL: xor_32i_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: xorl $2, %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: xor_32i_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: xorl $2, %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = xor i32 %1, 2 store atomic i32 %2, i32* %p seq_cst, align 4 ret void } define void @xor_32r_seq_cst(i32* %p, i32 %v) { ; X64-LABEL: xor_32r_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: xorl %esi, %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: xor_32r_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = xor i32 %1, %v store atomic i32 %2, i32* %p seq_cst, align 4 ret void } ; ----- INC ----- define void @inc_8(i8* %p) { ; FAST_INC-LABEL: inc_8: ; FAST_INC: # %bb.0: ; FAST_INC-NEXT: incb (%rdi) ; FAST_INC-NEXT: retq ; ; X32-LABEL: inc_8: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: incb (%eax) ; X32-NEXT: retl ; ; SLOW_INC-LABEL: inc_8: ; SLOW_INC: # %bb.0: ; SLOW_INC-NEXT: addb $1, (%rdi) ; SLOW_INC-NEXT: retq %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = add i8 %1, 1 store atomic i8 %2, i8* %p release, align 1 ret void } define void @inc_16(i16* %p) { ; FAST_INC-LABEL: inc_16: ; FAST_INC: # %bb.0: ; FAST_INC-NEXT: incw (%rdi) ; FAST_INC-NEXT: retq ; ; X32-LABEL: inc_16: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: incw (%eax) ; X32-NEXT: retl ; ; SLOW_INC-LABEL: inc_16: ; SLOW_INC: # %bb.0: ; SLOW_INC-NEXT: addw $1, (%rdi) ; SLOW_INC-NEXT: retq %1 = load atomic i16, i16* %p acquire, align 2 %2 = add i16 %1, 1 store atomic i16 %2, i16* %p release, align 2 ret void } define void @inc_32(i32* %p) { ; FAST_INC-LABEL: inc_32: ; FAST_INC: # %bb.0: ; FAST_INC-NEXT: incl (%rdi) ; FAST_INC-NEXT: retq ; ; X32-LABEL: inc_32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: incl (%eax) ; X32-NEXT: retl ; ; SLOW_INC-LABEL: inc_32: ; SLOW_INC: # %bb.0: ; SLOW_INC-NEXT: addl $1, (%rdi) ; SLOW_INC-NEXT: retq %1 = load atomic i32, i32* %p acquire, align 4 %2 = add i32 %1, 1 store atomic i32 %2, i32* %p monotonic, align 4 ret void } define void @inc_64(i64* %p) { ; FAST_INC-LABEL: inc_64: ; FAST_INC: # %bb.0: ; FAST_INC-NEXT: incq (%rdi) ; FAST_INC-NEXT: retq ; ; X32-LABEL: inc_64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $1, %ebx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB58_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB58_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; ; SLOW_INC-LABEL: inc_64: ; SLOW_INC: # %bb.0: ; SLOW_INC-NEXT: addq $1, (%rdi) ; SLOW_INC-NEXT: retq ; We do not check X86-32 as it cannot do 'incq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = add i64 %1, 1 store atomic i64 %2, i64* %p release, align 8 ret void } define void @inc_32_seq_cst(i32* %p) { ; FAST_INC-LABEL: inc_32_seq_cst: ; FAST_INC: # %bb.0: ; FAST_INC-NEXT: movl (%rdi), %eax ; FAST_INC-NEXT: incl %eax ; FAST_INC-NEXT: xchgl %eax, (%rdi) ; FAST_INC-NEXT: retq ; ; X32-LABEL: inc_32_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: incl %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl ; ; SLOW_INC-LABEL: inc_32_seq_cst: ; SLOW_INC: # %bb.0: ; SLOW_INC-NEXT: movl (%rdi), %eax ; SLOW_INC-NEXT: addl $1, %eax ; SLOW_INC-NEXT: xchgl %eax, (%rdi) ; SLOW_INC-NEXT: retq %1 = load atomic i32, i32* %p monotonic, align 4 %2 = add i32 %1, 1 store atomic i32 %2, i32* %p seq_cst, align 4 ret void } ; ----- DEC ----- define void @dec_8(i8* %p) { ; FAST_INC-LABEL: dec_8: ; FAST_INC: # %bb.0: ; FAST_INC-NEXT: decb (%rdi) ; FAST_INC-NEXT: retq ; ; X32-LABEL: dec_8: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: decb (%eax) ; X32-NEXT: retl ; ; SLOW_INC-LABEL: dec_8: ; SLOW_INC: # %bb.0: ; SLOW_INC-NEXT: addb $-1, (%rdi) ; SLOW_INC-NEXT: retq %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = sub i8 %1, 1 store atomic i8 %2, i8* %p release, align 1 ret void } define void @dec_16(i16* %p) { ; FAST_INC-LABEL: dec_16: ; FAST_INC: # %bb.0: ; FAST_INC-NEXT: decw (%rdi) ; FAST_INC-NEXT: retq ; ; X32-LABEL: dec_16: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: decw (%eax) ; X32-NEXT: retl ; ; SLOW_INC-LABEL: dec_16: ; SLOW_INC: # %bb.0: ; SLOW_INC-NEXT: addw $-1, (%rdi) ; SLOW_INC-NEXT: retq %1 = load atomic i16, i16* %p acquire, align 2 %2 = sub i16 %1, 1 store atomic i16 %2, i16* %p release, align 2 ret void } define void @dec_32(i32* %p) { ; FAST_INC-LABEL: dec_32: ; FAST_INC: # %bb.0: ; FAST_INC-NEXT: decl (%rdi) ; FAST_INC-NEXT: retq ; ; X32-LABEL: dec_32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: decl (%eax) ; X32-NEXT: retl ; ; SLOW_INC-LABEL: dec_32: ; SLOW_INC: # %bb.0: ; SLOW_INC-NEXT: addl $-1, (%rdi) ; SLOW_INC-NEXT: retq %1 = load atomic i32, i32* %p acquire, align 4 %2 = sub i32 %1, 1 store atomic i32 %2, i32* %p monotonic, align 4 ret void } define void @dec_64(i64* %p) { ; FAST_INC-LABEL: dec_64: ; FAST_INC: # %bb.0: ; FAST_INC-NEXT: decq (%rdi) ; FAST_INC-NEXT: retq ; ; X32-LABEL: dec_64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $-1, %ebx ; X32-NEXT: adcl $-1, %ecx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB63_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB63_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; ; SLOW_INC-LABEL: dec_64: ; SLOW_INC: # %bb.0: ; SLOW_INC-NEXT: addq $-1, (%rdi) ; SLOW_INC-NEXT: retq ; We do not check X86-32 as it cannot do 'decq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = sub i64 %1, 1 store atomic i64 %2, i64* %p release, align 8 ret void } define void @dec_32_seq_cst(i32* %p) { ; FAST_INC-LABEL: dec_32_seq_cst: ; FAST_INC: # %bb.0: ; FAST_INC-NEXT: movl (%rdi), %eax ; FAST_INC-NEXT: decl %eax ; FAST_INC-NEXT: xchgl %eax, (%rdi) ; FAST_INC-NEXT: retq ; ; X32-LABEL: dec_32_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: decl %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl ; ; SLOW_INC-LABEL: dec_32_seq_cst: ; SLOW_INC: # %bb.0: ; SLOW_INC-NEXT: movl (%rdi), %eax ; SLOW_INC-NEXT: addl $-1, %eax ; SLOW_INC-NEXT: xchgl %eax, (%rdi) ; SLOW_INC-NEXT: retq %1 = load atomic i32, i32* %p monotonic, align 4 %2 = sub i32 %1, 1 store atomic i32 %2, i32* %p seq_cst, align 4 ret void } ; ----- NOT ----- define void @not_8(i8* %p) { ; X64-LABEL: not_8: ; X64: # %bb.0: ; X64-NEXT: notb (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: not_8: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: notb (%eax) ; X32-NEXT: retl %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = xor i8 %1, -1 store atomic i8 %2, i8* %p release, align 1 ret void } define void @not_16(i16* %p) { ; X64-LABEL: not_16: ; X64: # %bb.0: ; X64-NEXT: notw (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: not_16: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: notw (%eax) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = xor i16 %1, -1 store atomic i16 %2, i16* %p release, align 2 ret void } define void @not_32(i32* %p) { ; X64-LABEL: not_32: ; X64: # %bb.0: ; X64-NEXT: notl (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: not_32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: notl (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = xor i32 %1, -1 store atomic i32 %2, i32* %p monotonic, align 4 ret void } define void @not_64(i64* %p) { ; X64-LABEL: not_64: ; X64: # %bb.0: ; X64-NEXT: notq (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: not_64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: notl %ecx ; X32-NEXT: notl %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB68_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB68_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'notq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = xor i64 %1, -1 store atomic i64 %2, i64* %p release, align 8 ret void } define void @not_32_seq_cst(i32* %p) { ; X64-LABEL: not_32_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: notl %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: not_32_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: notl %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = xor i32 %1, -1 store atomic i32 %2, i32* %p seq_cst, align 4 ret void } ; ----- NEG ----- define void @neg_8(i8* %p) { ; X64-LABEL: neg_8: ; X64: # %bb.0: ; X64-NEXT: negb (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: neg_8: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: negb (%eax) ; X32-NEXT: retl %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = sub i8 0, %1 store atomic i8 %2, i8* %p release, align 1 ret void } define void @neg_16(i16* %p) { ; X64-LABEL: neg_16: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: negl %eax ; X64-NEXT: movw %ax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: neg_16: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movzwl (%eax), %ecx ; X32-NEXT: negl %ecx ; X32-NEXT: movw %cx, (%eax) ; X32-NEXT: retl %1 = load atomic i16, i16* %p acquire, align 2 %2 = sub i16 0, %1 store atomic i16 %2, i16* %p release, align 2 ret void } define void @neg_32(i32* %p) { ; X64-LABEL: neg_32: ; X64: # %bb.0: ; X64-NEXT: negl (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: neg_32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: negl (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p acquire, align 4 %2 = sub i32 0, %1 store atomic i32 %2, i32* %p monotonic, align 4 ret void } define void @neg_64(i64* %p) { ; X64-LABEL: neg_64: ; X64: # %bb.0: ; X64-NEXT: negq (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: neg_64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: pushl %edi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 16 ; X32-NEXT: .cfi_offset %esi, -16 ; X32-NEXT: .cfi_offset %edi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: xorl %esi, %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%edi) ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: negl %ebx ; X32-NEXT: sbbl %edx, %esi ; X32-NEXT: movl (%edi), %eax ; X32-NEXT: movl 4(%edi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB73_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl %esi, %ecx ; X32-NEXT: lock cmpxchg8b (%edi) ; X32-NEXT: jne .LBB73_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: popl %edi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do neg check X86-32 as it canneg do 'negq'. %1 = load atomic i64, i64* %p acquire, align 8 %2 = sub i64 0, %1 store atomic i64 %2, i64* %p release, align 8 ret void } define void @neg_32_seq_cst(i32* %p) { ; X64-LABEL: neg_32_seq_cst: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: negl %eax ; X64-NEXT: xchgl %eax, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: neg_32_seq_cst: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: negl %ecx ; X32-NEXT: xchgl %ecx, (%eax) ; X32-NEXT: retl %1 = load atomic i32, i32* %p monotonic, align 4 %2 = sub i32 0, %1 store atomic i32 %2, i32* %p seq_cst, align 4 ret void } ; ----- FADD ----- define void @fadd_32r(float* %loc, float %val) { ; X64-LABEL: fadd_32r: ; X64: # %bb.0: ; X64-NEXT: addss (%rdi), %xmm0 ; X64-NEXT: movss %xmm0, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: fadd_32r: ; X32: # %bb.0: ; X32-NEXT: subl $8, %esp ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %ecx ; X32-NEXT: movl %ecx, (%esp) ; X32-NEXT: flds (%esp) ; X32-NEXT: fadds {{[0-9]+}}(%esp) ; X32-NEXT: fstps {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, (%eax) ; X32-NEXT: addl $8, %esp ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; Don't check x86-32. ; LLVM's SSE handling is conservative on x86-32 even without using atomics. %floc = bitcast float* %loc to i32* %1 = load atomic i32, i32* %floc seq_cst, align 4 %2 = bitcast i32 %1 to float %add = fadd float %2, %val %3 = bitcast float %add to i32 store atomic i32 %3, i32* %floc release, align 4 ret void } define void @fadd_64r(double* %loc, double %val) { ; X64-LABEL: fadd_64r: ; X64: # %bb.0: ; X64-NEXT: addsd (%rdi), %xmm0 ; X64-NEXT: movsd %xmm0, (%rdi) ; X64-NEXT: retq ; ; X32-LABEL: fadd_64r: ; X32: # %bb.0: ; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp ; X32-NEXT: .cfi_offset %esi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: movl 8(%ebp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: fldl {{[0-9]+}}(%esp) ; X32-NEXT: faddl 12(%ebp) ; X32-NEXT: fstpl (%esp) ; X32-NEXT: movl (%esp), %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB76_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB76_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; Don't check x86-32 (see comment above). %floc = bitcast double* %loc to i64* %1 = load atomic i64, i64* %floc seq_cst, align 8 %2 = bitcast i64 %1 to double %add = fadd double %2, %val %3 = bitcast double %add to i64 store atomic i64 %3, i64* %floc release, align 8 ret void } @glob32 = global float 0.000000e+00, align 4 @glob64 = global double 0.000000e+00, align 8 ; Floating-point add to a global using an immediate. define void @fadd_32g() { ; X64-LABEL: fadd_32g: ; X64: # %bb.0: ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: addss {{.*}}(%rip), %xmm0 ; X64-NEXT: movss %xmm0, {{.*}}(%rip) ; X64-NEXT: retq ; ; X32-LABEL: fadd_32g: ; X32: # %bb.0: ; X32-NEXT: subl $8, %esp ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: movl glob32, %eax ; X32-NEXT: movl %eax, (%esp) ; X32-NEXT: fld1 ; X32-NEXT: fadds (%esp) ; X32-NEXT: fstps {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, glob32 ; X32-NEXT: addl $8, %esp ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; Don't check x86-32 (see comment above). %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 %f = bitcast i32 %i to float %add = fadd float %f, 1.000000e+00 %s = bitcast float %add to i32 store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 ret void } define void @fadd_64g() { ; X64-LABEL: fadd_64g: ; X64: # %bb.0: ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: addsd {{.*}}(%rip), %xmm0 ; X64-NEXT: movsd %xmm0, {{.*}}(%rip) ; X64-NEXT: retq ; ; X32-LABEL: fadd_64g: ; X32: # %bb.0: ; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx ; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $24, %esp ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b glob64 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: fld1 ; X32-NEXT: faddl {{[0-9]+}}(%esp) ; X32-NEXT: fstpl (%esp) ; X32-NEXT: movl (%esp), %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl glob64+4, %edx ; X32-NEXT: movl glob64, %eax ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB78_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b glob64 ; X32-NEXT: jne .LBB78_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: leal -4(%ebp), %esp ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; Don't check x86-32 (see comment above). %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 %f = bitcast i64 %i to double %add = fadd double %f, 1.000000e+00 %s = bitcast double %add to i64 store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 ret void } ; Floating-point add to a hard-coded immediate location using an immediate. define void @fadd_32imm() { ; X64-LABEL: fadd_32imm: ; X64: # %bb.0: ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF ; X64-NEXT: addss (%rax), %xmm0 ; X64-NEXT: movss %xmm0, (%rax) ; X64-NEXT: retq ; ; X32-LABEL: fadd_32imm: ; X32: # %bb.0: ; X32-NEXT: subl $8, %esp ; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: movl -559038737, %eax ; X32-NEXT: movl %eax, (%esp) ; X32-NEXT: fld1 ; X32-NEXT: fadds (%esp) ; X32-NEXT: fstps {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, -559038737 ; X32-NEXT: addl $8, %esp ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; Don't check x86-32 (see comment above). %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 %f = bitcast i32 %i to float %add = fadd float %f, 1.000000e+00 %s = bitcast float %add to i32 store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 ret void } define void @fadd_64imm() { ; X64-LABEL: fadd_64imm: ; X64: # %bb.0: ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF ; X64-NEXT: addsd (%rax), %xmm0 ; X64-NEXT: movsd %xmm0, (%rax) ; X64-NEXT: retq ; ; X32-LABEL: fadd_64imm: ; X32: # %bb.0: ; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx ; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $24, %esp ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b -559038737 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: fld1 ; X32-NEXT: faddl {{[0-9]+}}(%esp) ; X32-NEXT: fstpl (%esp) ; X32-NEXT: movl (%esp), %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl -559038737, %eax ; X32-NEXT: movl -559038733, %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB80_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b -559038737 ; X32-NEXT: jne .LBB80_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: leal -4(%ebp), %esp ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; Don't check x86-32 (see comment above). %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 %f = bitcast i64 %i to double %add = fadd double %f, 1.000000e+00 %s = bitcast double %add to i64 store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 ret void } ; Floating-point add to a stack location. define void @fadd_32stack() { ; X64-LABEL: fadd_32stack: ; X64: # %bb.0: ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0 ; X64-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: retq ; ; X32-LABEL: fadd_32stack: ; X32: # %bb.0: ; X32-NEXT: subl $12, %esp ; X32-NEXT: .cfi_def_cfa_offset 16 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, (%esp) ; X32-NEXT: fld1 ; X32-NEXT: fadds (%esp) ; X32-NEXT: fstps {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: addl $12, %esp ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; Don't check x86-32 (see comment above). %ptr = alloca i32, align 4 %bc3 = bitcast i32* %ptr to float* %load = load atomic i32, i32* %ptr acquire, align 4 %bc0 = bitcast i32 %load to float %fadd = fadd float 1.000000e+00, %bc0 %bc1 = bitcast float %fadd to i32 store atomic i32 %bc1, i32* %ptr release, align 4 ret void } define void @fadd_64stack() { ; X64-LABEL: fadd_64stack: ; X64: # %bb.0: ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0 ; X64-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: retq ; ; X32-LABEL: fadd_64stack: ; X32: # %bb.0: ; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx ; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $32, %esp ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%esp) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: fld1 ; X32-NEXT: faddl {{[0-9]+}}(%esp) ; X32-NEXT: fstpl {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB82_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esp) ; X32-NEXT: jne .LBB82_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: leal -4(%ebp), %esp ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; Don't check x86-32 (see comment above). %ptr = alloca i64, align 8 %bc3 = bitcast i64* %ptr to double* %load = load atomic i64, i64* %ptr acquire, align 8 %bc0 = bitcast i64 %load to double %fadd = fadd double 1.000000e+00, %bc0 %bc1 = bitcast double %fadd to i64 store atomic i64 %bc1, i64* %ptr release, align 8 ret void } define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) { ; X64-LABEL: fadd_array: ; X64: # %bb.0: # %bb ; X64-NEXT: addsd (%rdi,%rsi,8), %xmm0 ; X64-NEXT: movsd %xmm0, (%rdi,%rsi,8) ; X64-NEXT: retq ; ; X32-LABEL: fadd_array: ; X32: # %bb.0: # %bb ; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_offset %ebp, -8 ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $24, %esp ; X32-NEXT: .cfi_offset %esi, -20 ; X32-NEXT: .cfi_offset %edi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: movl 20(%ebp), %esi ; X32-NEXT: movl 8(%ebp), %edi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: lock cmpxchg8b (%edi,%esi,8) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: fldl {{[0-9]+}}(%esp) ; X32-NEXT: faddl 12(%ebp) ; X32-NEXT: fstpl (%esp) ; X32-NEXT: movl (%esp), %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%edi,%esi,8), %eax ; X32-NEXT: movl 4(%edi,%esi,8), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB83_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%edi,%esi,8) ; X32-NEXT: jne .LBB83_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: leal -12(%ebp), %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp ; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; Don't check x86-32 (see comment above). bb: %tmp4 = getelementptr inbounds i64, i64* %arg, i64 %arg2 %tmp6 = load atomic i64, i64* %tmp4 monotonic, align 8 %tmp7 = bitcast i64 %tmp6 to double %tmp8 = fadd double %tmp7, %arg1 %tmp9 = bitcast double %tmp8 to i64 store atomic i64 %tmp9, i64* %tmp4 monotonic, align 8 ret void }