From 2e497d1ed4acb768ba3740cf289a988e2a7a1bc3 Mon Sep 17 00:00:00 2001
From: Evan Cheng <evan.cheng@apple.com>
Date: Fri, 8 Jan 2010 23:41:50 +0000
Subject: [PATCH] Fix a critical bug in 64-bit atomic operation lowering for
 32-bit. The results of the cmpxchg8b instructions are being thrown away when
 it branches back to the top of the checking loop. This means the loop always
 compares against the old value and this can result in a dead lock.

llvm-svn: 93028
---
 lib/Target/X86/X86ISelLowering.cpp         | 18 ++++++++------
 test/CodeGen/X86/2010-01-08-Atomic64Bug.ll | 29 ++++++++++++++++++++++
 2 files changed, 39 insertions(+), 8 deletions(-)
 create mode 100644 test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index cf3c467ab64..f42a4d4a82a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7831,14 +7831,16 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
     .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
 
-  unsigned tt1 = F->getRegInfo().createVirtualRegister(RC);
-  unsigned tt2 = F->getRegInfo().createVirtualRegister(RC);
+  // The subsequent operations should be using the destination registers of
+  //the PHI instructions.
   if (invSrc) {
-    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt1).addReg(t1);
-    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt2).addReg(t2);
+    t1 = F->getRegInfo().createVirtualRegister(RC);
+    t2 = F->getRegInfo().createVirtualRegister(RC);
+    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());
+    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());
   } else {
-    tt1 = t1;
-    tt2 = t2;
+    t1 = dest1Oper.getReg();
+    t2 = dest2Oper.getReg();
   }
 
   int valArgIndx = lastAddrIndx + 1;
@@ -7852,7 +7854,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   else
     MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
   if (regOpcL != X86::MOV32rr)
-    MIB.addReg(tt1);
+    MIB.addReg(t1);
   (*MIB).addOperand(*argOpers[valArgIndx]);
   assert(argOpers[valArgIndx + 1]->isReg() ==
          argOpers[valArgIndx]->isReg());
@@ -7863,7 +7865,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   else
     MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
   if (regOpcH != X86::MOV32rr)
-    MIB.addReg(tt2);
+    MIB.addReg(t2);
   (*MIB).addOperand(*argOpers[valArgIndx + 1]);
 
   MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);
diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
new file mode 100644
index 00000000000..172e1c73d56
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; rdar://r7512579
+
+; PHI defs in the atomic loop should be used by the add / adc
+; instructions. They should not be dead.
+
+define void @t(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: movl $1
+; CHECK: movl (%ebp), %eax
+; CHECK: movl 4(%ebp), %edx
+; CHECK: LBB1_1:
+; CHECK-NOT: movl $1
+; CHECK-NOT: movl $0
+; CHECK: addl
+; CHECK: adcl
+; CHECK: lock
+; CHECK: cmpxchg8b
+; CHECK: jne
+  tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) ; <i64> [#uses=0]
+  tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  ret void
+}
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind