1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[X86] Use FIST for i64 atomic stores on 32-bit targets without SSE.

This commit is contained in:
Craig Topper 2020-02-23 10:21:59 -08:00
parent b70c140e59
commit 48e959027f
5 changed files with 307 additions and 482 deletions

View File

@ -27518,14 +27518,14 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
return false;
}
// TODO: In 32-bit mode, use FISTP when X87 is available?
bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
Type *MemType = SI->getValueOperand()->getType();
bool NoImplicitFloatOps =
SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE1())
!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
(Subtarget.hasSSE1() || Subtarget.hasX87()))
return false;
return needsCmpXchgNb(MemType);
@ -28286,28 +28286,52 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
if (VT == MVT::i64 && !IsTypeLegal) {
// For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE
// is enabled.
// FIXME: Use fist with X87.
bool NoImplicitFloatOps =
DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat);
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
Subtarget.hasSSE1()) {
SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
Node->getOperand(2));
MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
SclToVec = DAG.getBitcast(StVT, SclToVec);
SDVTList Tys = DAG.getVTList(MVT::Other);
SDValue Ops[] = { Node->getChain(), SclToVec, Node->getBasePtr() };
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys,
Ops, MVT::i64,
Node->getMemOperand());
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
SDValue Chain;
if (Subtarget.hasSSE1()) {
SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
Node->getOperand(2));
MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
SclToVec = DAG.getBitcast(StVT, SclToVec);
SDVTList Tys = DAG.getVTList(MVT::Other);
SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()};
Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops,
MVT::i64, Node->getMemOperand());
} else if (Subtarget.hasX87()) {
// First load this into an 80-bit X87 register using a stack temporary.
// This will put the whole integer into the significand.
SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
Chain =
DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr,
MPI, /*Align*/ 0, MachineMemOperand::MOStore);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue LdOps[] = {Chain, StackPtr};
SDValue Value =
DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI,
/*Align*/ 0, MachineMemOperand::MOLoad);
Chain = Value.getValue(1);
// If this is a sequentially consistent store, also emit an appropriate
// barrier.
if (IsSeqCst)
Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
// Now use an FIST to do the atomic store.
SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()};
Chain =
DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other),
StoreOps, MVT::i64, Node->getMemOperand());
}
return Chain;
if (Chain) {
// If this is a sequentially consistent store, also emit an appropriate
// barrier.
if (IsSeqCst)
Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
return Chain;
}
}
}

View File

@ -80,33 +80,25 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $24, %esp
; X86-NOSSE-NEXT: movl 8(%ebp), %esi
; X86-NOSSE-NEXT: fildll (%esi)
; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: movl 8(%ebp), %eax
; X86-NOSSE-NEXT: fildll (%eax)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: faddl 12(%ebp)
; X86-NOSSE-NEXT: fstpl (%esp)
; X86-NOSSE-NEXT: movl (%esp), %ebx
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl (%esi), %eax
; X86-NOSSE-NEXT: movl 4(%esi), %edx
; X86-NOSSE-NEXT: .p2align 4, 0x90
; X86-NOSSE-NEXT: .LBB1_1: # %atomicrmw.start
; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
; X86-NOSSE-NEXT: jne .LBB1_1
; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
; X86-NOSSE-NEXT: leal -8(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %ecx, (%esp)
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fildll (%esp)
; X86-NOSSE-NEXT: fistpll (%eax)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
@ -256,7 +248,6 @@ define void @fadd_64g() nounwind {
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: fildll glob64
@ -267,19 +258,14 @@ define void @fadd_64g() nounwind {
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fstpl (%esp)
; X86-NOSSE-NEXT: movl (%esp), %ebx
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl glob64+4, %edx
; X86-NOSSE-NEXT: movl glob64, %eax
; X86-NOSSE-NEXT: .p2align 4, 0x90
; X86-NOSSE-NEXT: .LBB3_1: # %atomicrmw.start
; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOSSE-NEXT: lock cmpxchg8b glob64
; X86-NOSSE-NEXT: jne .LBB3_1
; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
; X86-NOSSE-NEXT: leal -4(%ebp), %esp
; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fildll (%esp)
; X86-NOSSE-NEXT: fistpll glob64
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
@ -426,7 +412,6 @@ define void @fadd_64imm() nounwind {
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: fildll -559038737
@ -437,19 +422,14 @@ define void @fadd_64imm() nounwind {
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fstpl (%esp)
; X86-NOSSE-NEXT: movl (%esp), %ebx
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl -559038737, %eax
; X86-NOSSE-NEXT: movl -559038733, %edx
; X86-NOSSE-NEXT: .p2align 4, 0x90
; X86-NOSSE-NEXT: .LBB5_1: # %atomicrmw.start
; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOSSE-NEXT: lock cmpxchg8b -559038737
; X86-NOSSE-NEXT: jne .LBB5_1
; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
; X86-NOSSE-NEXT: leal -4(%ebp), %esp
; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fildll (%esp)
; X86-NOSSE-NEXT: fistpll -559038737
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
@ -602,10 +582,9 @@ define void @fadd_64stack() nounwind {
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $40, %esp
; X86-NOSSE-NEXT: fildll (%esp)
; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
@ -614,18 +593,13 @@ define void @fadd_64stack() nounwind {
; X86-NOSSE-NEXT: fld1
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl (%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: .p2align 4, 0x90
; X86-NOSSE-NEXT: .LBB7_1: # %atomicrmw.start
; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOSSE-NEXT: lock cmpxchg8b (%esp)
; X86-NOSSE-NEXT: jne .LBB7_1
; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
; X86-NOSSE-NEXT: leal -4(%ebp), %esp
; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fildll (%esp)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %ebp, %esp
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
@ -709,36 +683,28 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
; X86-NOSSE: # %bb.0: # %bb
; X86-NOSSE-NEXT: pushl %ebp
; X86-NOSSE-NEXT: movl %esp, %ebp
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: pushl %edi
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: andl $-8, %esp
; X86-NOSSE-NEXT: subl $32, %esp
; X86-NOSSE-NEXT: movl 20(%ebp), %esi
; X86-NOSSE-NEXT: movl 8(%ebp), %edi
; X86-NOSSE-NEXT: fildll (%edi,%esi,8)
; X86-NOSSE-NEXT: subl $40, %esp
; X86-NOSSE-NEXT: movl 20(%ebp), %eax
; X86-NOSSE-NEXT: movl 8(%ebp), %ecx
; X86-NOSSE-NEXT: fildll (%ecx,%eax,8)
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: faddl 12(%ebp)
; X86-NOSSE-NEXT: fstpl (%esp)
; X86-NOSSE-NEXT: movl (%esp), %ebx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl (%edi,%esi,8), %eax
; X86-NOSSE-NEXT: movl 4(%edi,%esi,8), %edx
; X86-NOSSE-NEXT: .p2align 4, 0x90
; X86-NOSSE-NEXT: .LBB8_1: # %atomicrmw.start
; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOSSE-NEXT: lock cmpxchg8b (%edi,%esi,8)
; X86-NOSSE-NEXT: jne .LBB8_1
; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
; X86-NOSSE-NEXT: leal -12(%ebp), %esp
; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl %edx, (%esp)
; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: fildll (%esp)
; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8)
; X86-NOSSE-NEXT: leal -4(%ebp), %esp
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %edi
; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;

View File

@ -16,27 +16,24 @@ define void @test1(i64* %ptr, i64 %val1) {
;
; NOSSE-LABEL: test1:
; NOSSE: # %bb.0:
; NOSSE-NEXT: pushl %ebx
; NOSSE-NEXT: pushl %ebp
; NOSSE-NEXT: .cfi_def_cfa_offset 8
; NOSSE-NEXT: pushl %esi
; NOSSE-NEXT: .cfi_def_cfa_offset 12
; NOSSE-NEXT: .cfi_offset %esi, -12
; NOSSE-NEXT: .cfi_offset %ebx, -8
; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; NOSSE-NEXT: movl (%esi), %eax
; NOSSE-NEXT: movl 4(%esi), %edx
; NOSSE-NEXT: .p2align 4, 0x90
; NOSSE-NEXT: .LBB0_1: # %atomicrmw.start
; NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
; NOSSE-NEXT: lock cmpxchg8b (%esi)
; NOSSE-NEXT: jne .LBB0_1
; NOSSE-NEXT: # %bb.2: # %atomicrmw.end
; NOSSE-NEXT: popl %esi
; NOSSE-NEXT: .cfi_def_cfa_offset 8
; NOSSE-NEXT: popl %ebx
; NOSSE-NEXT: .cfi_def_cfa_offset 4
; NOSSE-NEXT: .cfi_offset %ebp, -8
; NOSSE-NEXT: movl %esp, %ebp
; NOSSE-NEXT: .cfi_def_cfa_register %ebp
; NOSSE-NEXT: andl $-8, %esp
; NOSSE-NEXT: subl $8, %esp
; NOSSE-NEXT: movl 8(%ebp), %eax
; NOSSE-NEXT: movl 12(%ebp), %ecx
; NOSSE-NEXT: movl 16(%ebp), %edx
; NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
; NOSSE-NEXT: movl %ecx, (%esp)
; NOSSE-NEXT: fildll (%esp)
; NOSSE-NEXT: fistpll (%eax)
; NOSSE-NEXT: lock orl $0, (%esp)
; NOSSE-NEXT: movl %ebp, %esp
; NOSSE-NEXT: popl %ebp
; NOSSE-NEXT: .cfi_def_cfa %esp, 4
; NOSSE-NEXT: retl
store atomic i64 %val1, i64* %ptr seq_cst, align 8
ret void

View File

@ -84,27 +84,21 @@ define void @store_atomic_imm_64(i64* %p) {
;
; X32-LABEL: store_atomic_imm_64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: pushl %esi
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: movl $42, %ebx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB3_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB3_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: popl %esi
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: movl $0, {{[0-9]+}}(%esp)
; X32-NEXT: movl $42, (%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; These are implemented with a CAS loop on 32 bit architectures, and thus
; cannot be optimized in the same way as the others.
@ -123,27 +117,21 @@ define void @store_atomic_imm_64_big(i64* %p) {
;
; X32-LABEL: store_atomic_imm_64_big:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: pushl %esi
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: movl $23, %ecx
; X32-NEXT: movl $1215752192, %ebx # imm = 0x4876E800
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB4_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB4_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: popl %esi
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: movl $23, {{[0-9]+}}(%esp)
; X32-NEXT: movl $1215752192, (%esp) # imm = 0x4876E800
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
store atomic i64 100000000000, i64* %p monotonic, align 8
ret void
@ -336,30 +324,20 @@ define void @add_64i(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl $2, %ebx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB14_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB14_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: addl $2, %ecx
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -383,30 +361,20 @@ define void @add_64r(i64* %p, i64 %v) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl 12(%ebp), %ebx
; X32-NEXT: adcl 16(%ebp), %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB15_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB15_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: addl 12(%ebp), %ecx
; X32-NEXT: adcl 16(%ebp), %edx
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -576,30 +544,20 @@ define void @sub_64r(i64* %p, i64 %v) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: subl 12(%ebp), %ebx
; X32-NEXT: sbbl 16(%ebp), %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB23_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB23_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: subl 12(%ebp), %ecx
; X32-NEXT: sbbl 16(%ebp), %edx
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -751,29 +709,18 @@ define void @and_64i(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: andl $2, %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB31_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB31_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: andl $2, %ecx
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: movl $0, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -797,30 +744,20 @@ define void @and_64r(i64* %p, i64 %v) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: andl 16(%ebp), %ecx
; X32-NEXT: andl 12(%ebp), %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB32_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB32_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: andl 16(%ebp), %edx
; X32-NEXT: andl 12(%ebp), %ecx
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -993,29 +930,19 @@ define void @or_64i(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: orl $2, %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB41_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB41_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: orl $2, %ecx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -1039,30 +966,20 @@ define void @or_64r(i64* %p, i64 %v) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: orl 16(%ebp), %ecx
; X32-NEXT: orl 12(%ebp), %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB42_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB42_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: orl 16(%ebp), %edx
; X32-NEXT: orl 12(%ebp), %ecx
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -1235,29 +1152,19 @@ define void @xor_64i(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xorl $2, %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB51_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB51_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: xorl $2, %ecx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -1281,30 +1188,20 @@ define void @xor_64r(i64* %p, i64 %v) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xorl 16(%ebp), %ecx
; X32-NEXT: xorl 12(%ebp), %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB52_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB52_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: xorl 16(%ebp), %edx
; X32-NEXT: xorl 12(%ebp), %ecx
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -1438,30 +1335,20 @@ define void @inc_64(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl $1, %ebx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB58_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB58_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: addl $1, %ecx
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -1586,30 +1473,20 @@ define void @dec_64(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl $-1, %ebx
; X32-NEXT: adcl $-1, %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB63_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB63_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: addl $-1, %ecx
; X32-NEXT: adcl $-1, %edx
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -1719,30 +1596,20 @@ define void @not_64(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: notl %edx
; X32-NEXT: notl %ecx
; X32-NEXT: notl %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB68_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB68_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
@ -1844,30 +1711,20 @@ define void @neg_64(i64* %p) {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: fildll (%eax)
; X32-NEXT: fistpll {{[0-9]+}}(%esp)
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: subl (%esp), %ebx
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: subl {{[0-9]+}}(%esp), %edx
; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB73_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB73_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: movl %edx, (%esp)
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X32-NEXT: fildll (%esp)
; X32-NEXT: fistpll (%eax)
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl

View File

@ -77,26 +77,16 @@ define void @store_double(double* %fptr, double %v) {
;
; X86-NOSSE-LABEL: store_double:
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
; X86-NOSSE-NEXT: .cfi_offset %esi, -12
; X86-NOSSE-NEXT: .cfi_offset %ebx, -8
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NOSSE-NEXT: subl $12, %esp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl (%esi), %eax
; X86-NOSSE-NEXT: movl 4(%esi), %edx
; X86-NOSSE-NEXT: .p2align 4, 0x90
; X86-NOSSE-NEXT: .LBB2_1: # %atomicrmw.start
; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
; X86-NOSSE-NEXT: jne .LBB2_1
; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %ecx, (%esp)
; X86-NOSSE-NEXT: fildll (%esp)
; X86-NOSSE-NEXT: fistpll (%eax)
; X86-NOSSE-NEXT: addl $12, %esp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl
;
@ -576,26 +566,17 @@ define void @store_double_seq_cst(double* %fptr, double %v) {
;
; X86-NOSSE-LABEL: store_double_seq_cst:
; X86-NOSSE: # %bb.0:
; X86-NOSSE-NEXT: pushl %ebx
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
; X86-NOSSE-NEXT: .cfi_offset %esi, -12
; X86-NOSSE-NEXT: .cfi_offset %ebx, -8
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NOSSE-NEXT: subl $12, %esp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT: movl (%esi), %eax
; X86-NOSSE-NEXT: movl 4(%esi), %edx
; X86-NOSSE-NEXT: .p2align 4, 0x90
; X86-NOSSE-NEXT: .LBB9_1: # %atomicrmw.start
; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
; X86-NOSSE-NEXT: jne .LBB9_1
; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NOSSE-NEXT: movl %ecx, (%esp)
; X86-NOSSE-NEXT: fildll (%esp)
; X86-NOSSE-NEXT: fistpll (%eax)
; X86-NOSSE-NEXT: lock orl $0, (%esp)
; X86-NOSSE-NEXT: addl $12, %esp
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl
;