When adding the carry bit to another value on X86, exploit the fact that the carry-materialization

(sbbl x, x) sets the registers to 0 or ~0. Combined with two's complement arithmetic, we can fold the intermediate AND and the ADD into a single SUB. This fixes <rdar://problem/8449754>. llvm-svn: 114460
2024-11-25 04:02:41 +01:00 · 2010-09-21 18:41:19 +00:00 · 2010-09-21 18:41:19 +00:00 · 97a8fdc19c
commit 97a8fdc19c
parent d09d4a9693
2 changed files with 37 additions and 0 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -1021,6 +1021,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
  setTargetDAGCombine(ISD::OR);
  setTargetDAGCombine(ISD::STORE);
  setTargetDAGCombine(ISD::ZERO_EXTEND);
+  setTargetDAGCombine(ISD::ADD);
  if (Subtarget->is64Bit())
    setTargetDAGCombine(ISD::MUL);

@ -10452,6 +10453,27 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
  return SDValue();
 }

+/// PerformAddCombine - Optimize ADD when combined with X86 opcodes.
+static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+  
+  EVT VT = N->getValueType(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op1->getOpcode() == ISD::AND) {
+    SDValue AndOp0 = Op1->getOperand(0);
+    ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(Op1->getOperand(1)); 
+    // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+    if (AndOp0->getOpcode() == X86ISD::SETCC_CARRY &&
+        AndOp1 && AndOp1->getZExtValue() == 1) {
+      DebugLoc DL = N->getDebugLoc();
+      return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+    }
+  }
+  
+  return SDValue();
+}

 /// PerformMulCombine - Optimize a single multiply with constant into two
 /// in order to implement it with two cheaper instructions, e.g.
@ -10938,6 +10960,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                        return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
  case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
  case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
+  case ISD::ADD:            return PerformAddCombine(N, DAG, DCI);
  case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
  case ISD::SHL:
  case ISD::SRA:
--- a/test/CodeGen/X86/add-of-carry.ll
+++ b/test/CodeGen/X86/add-of-carry.ll
@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8449754>
+
+define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+; CHECK:	sbbl	%ecx, %ecx
+; CHECK-NOT: addl
+; CHECK: subl	%ecx, %eax
+  %add4 = add i32 %x, %sum
+  %cmp = icmp ult i32 %add4, %x
+  %inc = zext i1 %cmp to i32
+  %z.0 = add i32 %add4, %inc
+  ret i32 %z.0
+}