Revert r265450 "[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC."

It caused ASan 32-bit tests to hang (PR27245). llvm-svn: 265559
2024-11-24 03:33:20 +01:00 · 2016-04-06 16:44:38 +00:00 · 2016-04-06 16:44:38 +00:00 · f357d442d6
commit f357d442d6
parent f8a916174c
2 changed files with 30 additions and 81 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -26135,56 +26135,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
  return SDValue();
 }

-/// Combine:
-///   (brcond/cmov/setcc .., (cmp (atomic_load_op ..), 0), cc)
-/// to:
-///   (brcond/cmov/setcc .., (LOCKed op ..), cc)
-/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
-/// Note that this is only legal for some op/cc combinations.
-static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode CC,
-                                       SelectionDAG &DAG) {
-  // This combine only operates on CMP-like nodes.
-  if (!(Cmp.getOpcode() == X86ISD::CMP ||
-        (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
-    return SDValue();
-
-  SDValue LHS = Cmp.getOperand(0);
-  SDValue RHS = Cmp.getOperand(1);
-
-  if (!LHS.hasOneUse())
-    return SDValue();
-
-  // FIXME: We can do this for XOR/OR/AND as well, but only if they survive
-  // AtomicExpand. Currently, we choose to expand them to cmpxchg if they
-  // have any users. Could we relax that to ignore (icmp x,0) users?
-  switch (LHS->getOpcode()) {
-  case ISD::ATOMIC_LOAD_ADD:
-  case ISD::ATOMIC_LOAD_SUB:
-    break;
-  default:
-    return SDValue();
-  }
-
-  auto *C = dyn_cast<ConstantSDNode>(RHS);
-  if (!C || C->getZExtValue() != 0)
-    return SDValue();
-
-  // Don't do this for all condition codes, as OF/CF are cleared by (CMP x,0)
-  // but might be set by arithmetic. Furthermore, we might later select INC/DEC,
-  // which don't modify CF (though CCs using CF should have been optimized out).
-  // SF/ZF are safe as they are set the same way.
-  // Note that in theory, the transformation is also valid for P/NP.
-  if (CC != X86::COND_E && CC != X86::COND_NE && CC != X86::COND_S &&
-      CC != X86::COND_NS)
-    return SDValue();
-
-  SDValue LockOp = lowerAtomicArithWithLOCK(LHS, DAG);
-  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(0),
-                                DAG.getUNDEF(LHS.getValueType()));
-  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LockOp.getValue(1));
-  return LockOp;
-}
-
 // Check whether a boolean test is testing a boolean value generated by
 // X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
 // code.
@ -26356,16 +26306,6 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
  return true;
 }

-/// Optimize an EFLAGS definition used according to the condition code \p CC
-/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
-/// uses of chain values.
-static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
-                                  SelectionDAG &DAG) {
-  if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
-    return R;
-  return combineSetCCAtomicArith(EFLAGS, CC, DAG);
-}
-
 /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
 static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
                           TargetLowering::DAGCombinerInfo &DCI,
@ -26392,14 +26332,15 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
    }
  }

-  // Try to simplify the EFLAGS and condition code operands.
-  // We can't always do this as FCMOV only supports a subset of X86 cond.
-  if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) {
-    if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
-      SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
-        Flags};
-      return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
-    }
+  SDValue Flags;
+
+  Flags = checkBoolTestSetCCCombine(Cond, CC);
+  if (Flags.getNode() &&
+      // Extra check as FCMOV only supports a subset of X86 cond.
+      (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
+    SDValue Ops[] = { FalseOp, TrueOp,
+                      DAG.getConstant(CC, DL, MVT::i8), Flags };
+    return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
  }

  // If this is a select between two integer constants, try to do some
@ -29325,8 +29266,7 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
  if (CC == X86::COND_B)
    return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));

-  // Try to simplify the EFLAGS and condition code operands.
-  if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
+  if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
    SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
    return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
  }
@ -29339,16 +29279,15 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
                             TargetLowering::DAGCombinerInfo &DCI,
                             const X86Subtarget &Subtarget) {
  SDLoc DL(N);
+  SDValue Chain = N->getOperand(0);
+  SDValue Dest = N->getOperand(1);
  SDValue EFLAGS = N->getOperand(3);
  X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));

-  // Try to simplify the EFLAGS and condition code operands.
-  // Make sure to not keep references to operands, as combineSetCCEFLAGS can
-  // RAUW them under us.
-  if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
+  if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
    SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
-    return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
-                       N->getOperand(1), Cond, Flags);
+    return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
+                       Flags);
  }

  return SDValue();
--- a/test/CodeGen/X86/atomic-eflags-reuse.ll
+++ b/test/CodeGen/X86/atomic-eflags-reuse.ll
@ -4,7 +4,9 @@
 define i8 @test_add_1_setcc_ne(i64* %p) #0 {
 ; CHECK-LABEL: test_add_1_setcc_ne:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    lock incq (%rdi)
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
 entry:
@ -17,7 +19,9 @@ entry:
 define i8 @test_sub_1_setcc_eq(i64* %p) #0 {
 ; CHECK-LABEL: test_sub_1_setcc_eq:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    movq $-1, %rax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
 entry:
@ -45,7 +49,9 @@ entry:
 define i8 @test_sub_10_setcc_sge(i64* %p) #0 {
 ; CHECK-LABEL: test_sub_10_setcc_sge:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    lock addq $-10, (%rdi)
+; CHECK-NEXT:    movq $-10, %rax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    setns %al
 ; CHECK-NEXT:    retq
 entry:
@ -60,7 +66,9 @@ entry:
 define i32 @test_add_10_brcond_sge(i64* %p, i32 %a0, i32 %a1) #0 {
 ; CHECK-LABEL: test_add_10_brcond_sge:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    lock addq $10, (%rdi)
+; CHECK-NEXT:    movl $10, %eax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    js .LBB4_2
 ; CHECK-NEXT:  # BB#1: # %t
 ; CHECK-NEXT:    movl %esi, %eax
@ -81,7 +89,9 @@ f:
 define i32 @test_sub_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 {
 ; CHECK-LABEL: test_sub_1_cmov_slt:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    movq $-1, %rax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    cmovnsl %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    retq