Add sub/mul overflow intrinsics. This currently doesn't have a

target-independent way of determining overflow on multiplication. It's very tricky. Patch by Zoltan Varga! llvm-svn: 60800
2025-01-31 20:51:52 +01:00 · 2008-12-09 22:08:41 +00:00 · 2008-12-09 22:08:41 +00:00 · 4c8fb3a0cc
commit 4c8fb3a0cc
parent 23369496bc
15 changed files with 332 additions and 92 deletions
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@ -259,6 +259,12 @@ namespace ISD {
    // These nodes are generated from the llvm.[su]add.with.overflow intrinsics.
    SADDO, UADDO,

+    // Same for subtraction
+    SSUBO, USUBO,
+
+    // Same for multiplication
+    SMULO, UMULO,
+
    // Simple binary floating point operators.
    FADD, FSUB, FMUL, FDIV, FREM,

--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@ -305,6 +305,16 @@ def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
 def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
                                       [LLVMMatchType<0>, LLVMMatchType<0>]>;

+def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>]>;
+def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>]>;
+
+def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>]>;
+def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>]>;
+
 //===------------------------- Atomic Intrinsics --------------------------===//
 //
 def int_memory_barrier : Intrinsic<[llvm_void_ty],
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -1487,7 +1487,7 @@ private:
  MVT TransformToType[MVT::LAST_VALUETYPE];

  // Defines the capacity of the TargetLowering::OpActions table
-  static const int OpActionsCapacity = 218;
+  static const int OpActionsCapacity = 222;

  /// OpActions - For each operation and each value type, keep a LegalizeAction
  /// that indicates how instruction selection should deal with the operation.
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -4234,7 +4234,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
    break;
  }

-  case ISD::SADDO: {
+  case ISD::SADDO:
+  case ISD::SSUBO: {
    MVT VT = Node->getValueType(0);
    switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
    default: assert(0 && "This action not supported for this op yet!");
@ -4246,7 +4247,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
      SDValue LHS = LegalizeOp(Node->getOperand(0));
      SDValue RHS = LegalizeOp(Node->getOperand(1));

-      SDValue Sum = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS);
+      SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? 
+                                ISD::ADD : ISD::SUB, LHS.getValueType(),
+                                LHS, RHS);
      MVT OType = Node->getValueType(1);

      SDValue Zero = DAG.getConstant(0, LHS.getValueType());
@ -4255,16 +4258,21 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
      //   RHSSign -> RHS >= 0
      //   SumSign -> Sum >= 0
      //
+      //   Add:
      //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+      //   Sub:
+      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
      //
      SDValue LHSSign = DAG.getSetCC(OType, LHS, Zero, ISD::SETGE);
      SDValue RHSSign = DAG.getSetCC(OType, RHS, Zero, ISD::SETGE);
-      SDValue SignsEq = DAG.getSetCC(OType, LHSSign, RHSSign, ISD::SETEQ);
+      SDValue SignsMatch = DAG.getSetCC(OType, LHSSign, RHSSign, 
+                                        Node->getOpcode() == ISD::SADDO ? 
+                                        ISD::SETEQ : ISD::SETNE);

      SDValue SumSign = DAG.getSetCC(OType, Sum, Zero, ISD::SETGE);
      SDValue SumSignNE = DAG.getSetCC(OType, LHSSign, SumSign, ISD::SETNE);

-      SDValue Cmp = DAG.getNode(ISD::AND, OType, SignsEq, SumSignNE);
+      SDValue Cmp = DAG.getNode(ISD::AND, OType, SignsMatch, SumSignNE);

      MVT ValueVTs[] = { LHS.getValueType(), OType };
      SDValue Ops[] = { Sum, Cmp };
@ -4280,7 +4288,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {

    break;
  }
-  case ISD::UADDO: {
+  case ISD::UADDO:
+  case ISD::USUBO: {
    MVT VT = Node->getValueType(0);
    switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
    default: assert(0 && "This action not supported for this op yet!");
@ -4292,9 +4301,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
      SDValue LHS = LegalizeOp(Node->getOperand(0));
      SDValue RHS = LegalizeOp(Node->getOperand(1));

-      SDValue Sum = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS);
+      SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+                                ISD::ADD : ISD::SUB, LHS.getValueType(),
+                                LHS, RHS);
      MVT OType = Node->getValueType(1);
-      SDValue Cmp = DAG.getSetCC(OType, Sum, LHS, ISD::SETULT);
+      SDValue Cmp = DAG.getSetCC(OType, Sum, LHS,
+                                 Node->getOpcode () == ISD::UADDO ? 
+                                 ISD::SETULT : ISD::SETUGT);

      MVT ValueVTs[] = { LHS.getValueType(), OType };
      SDValue Ops[] = { Sum, Cmp };
@ -4310,6 +4323,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {

    break;
  }
+  case ISD::SMULO:
+  case ISD::UMULO: {
+    MVT VT = Node->getValueType(0);
+    switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+    default: assert(0 && "This action is not supported at all!");
+    case TargetLowering::Custom:
+      Result = TLI.LowerOperation(Op, DAG);
+      if (Result.getNode()) break;
+      // Fall Thru
+    case TargetLowering::Legal:
+      // FIXME: According to Hacker's Delight, this can be implemented in
+      // target independent lowering, but it would be inefficient, since it
+      // requires a division + a branch
+      assert(0 && "Target independent lowering is not supported for SMULO/UMULO!");	
+    break;
+    }
+    break;
+  }
+
  }
  
  assert(Result.getValueType() == Op.getValueType() &&
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@ -92,7 +92,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
  case ISD::UREM:        Result = PromoteIntRes_UDIV(N); break;

  case ISD::SADDO:
-  case ISD::UADDO:       Result = PromoteIntRes_XADDO(N, ResNo); break;
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:       Result = PromoteIntRes_XALUO(N, ResNo); break;

  case ISD::ATOMIC_LOAD_ADD_8:
  case ISD::ATOMIC_LOAD_SUB_8:
@ -518,7 +522,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
  return DAG.getNode(N->getOpcode(), LHS.getValueType(), LHS, RHS);
 }

-SDValue DAGTypeLegalizer::PromoteIntRes_XADDO(SDNode *N, unsigned ResNo) {
+SDValue DAGTypeLegalizer::PromoteIntRes_XALUO(SDNode *N, unsigned ResNo) {
  assert(ResNo == 1 && "Only boolean result promotion currently supported!");

  // Simply change the return type of the boolean result.
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@ -270,7 +270,7 @@ private:
  SDValue PromoteIntRes_UDIV(SDNode *N);
  SDValue PromoteIntRes_UNDEF(SDNode *N);
  SDValue PromoteIntRes_VAARG(SDNode *N);
-  SDValue PromoteIntRes_XADDO(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_XALUO(SDNode *N, unsigned ResNo);

  // Integer Operand Promotion.
  bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -1511,6 +1511,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
    return;
  case ISD::SADDO:
  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
    if (Op.getResNo() != 1)
      return;
    // The boolean result conforms to getBooleanContents.  Fall through.
@ -1919,6 +1923,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{

  case ISD::SADDO:
  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
    if (Op.getResNo() != 1)
      break;
    // The boolean result conforms to getBooleanContents.  Fall through.
@ -5216,6 +5224,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
  case ISD::ADDE:        return "adde";
  case ISD::SADDO:       return "saddo";
  case ISD::UADDO:       return "uaddo";
+  case ISD::SSUBO:       return "ssubo";
+  case ISD::USUBO:       return "usubo";
+  case ISD::SMULO:       return "smulo";
+  case ISD::UMULO:       return "umulo";
  case ISD::SUBC:        return "subc";
  case ISD::SUBE:        return "sube";
  case ISD::SHL_PARTS:   return "shl_parts";
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@ -2968,6 +2968,23 @@ SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
  return 0;
 }

+// implVisitAluOverflow - Lower an overflow instrinsics
+const char *
+SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+    SDValue Op1 = getValue(I.getOperand(1));
+    SDValue Op2 = getValue(I.getOperand(2));
+
+    MVT ValueVTs[] = { Op1.getValueType(), MVT::i1 };
+    SDValue Ops[] = { Op1, Op2 };
+
+    SDValue Result =
+      DAG.getNode(Op,
+                  DAG.getVTList(&ValueVTs[0], 2), &Ops[0], 2);
+
+    setValue(&I, Result);
+    return 0;
+  }
+
 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
@ -4097,21 +4114,17 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
  }

  case Intrinsic::uadd_with_overflow:
-  case Intrinsic::sadd_with_overflow: {
-    SDValue Op1 = getValue(I.getOperand(1));
-    SDValue Op2 = getValue(I.getOperand(2));
-
-    MVT ValueVTs[] = { Op1.getValueType(), MVT::i1 };
-    SDValue Ops[] = { Op1, Op2 };
-
-    SDValue Result =
-      DAG.getNode((Intrinsic == Intrinsic::sadd_with_overflow) ?
-                    ISD::SADDO : ISD::UADDO,
-                  DAG.getVTList(&ValueVTs[0], 2), &Ops[0], 2);
-
-    setValue(&I, Result);
-    return 0;
-  }
+    return implVisitAluOverflow(I, ISD::UADDO);
+  case Intrinsic::sadd_with_overflow:
+    return implVisitAluOverflow(I, ISD::SADDO);
+  case Intrinsic::usub_with_overflow:
+    return implVisitAluOverflow(I, ISD::USUBO);
+  case Intrinsic::ssub_with_overflow:
+    return implVisitAluOverflow(I, ISD::SSUBO);
+  case Intrinsic::umul_with_overflow:
+    return implVisitAluOverflow(I, ISD::UMULO);
+  case Intrinsic::smul_with_overflow:
+    return implVisitAluOverflow(I, ISD::SMULO);

  case Intrinsic::prefetch: {
    SDValue Ops[4];
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@ -530,6 +530,7 @@ private:
  }
  
  const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
+  const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
 };

 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -780,11 +780,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
  // We want to custom lower some of our intrinsics.
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

-  // Add with overflow operations are custom lowered.
+  // Add/Sub/Mul with overflow operations are custom lowered.
  setOperationAction(ISD::SADDO, MVT::i32, Custom);
  setOperationAction(ISD::SADDO, MVT::i64, Custom);
  setOperationAction(ISD::UADDO, MVT::i32, Custom);
  setOperationAction(ISD::UADDO, MVT::i64, Custom);
+  setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+  setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+  setOperationAction(ISD::USUBO, MVT::i32, Custom);
+  setOperationAction(ISD::USUBO, MVT::i64, Custom);
+  setOperationAction(ISD::SMULO, MVT::i32, Custom);
+  setOperationAction(ISD::SMULO, MVT::i64, Custom);
+  setOperationAction(ISD::UMULO, MVT::i32, Custom);
+  setOperationAction(ISD::UMULO, MVT::i64, Custom);

  // We have target-specific dag combine patterns for the following nodes:
  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
@ -5202,8 +5210,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {

  if (Cond.getOpcode() == ISD::SETCC)
    Cond = LowerSETCC(Cond, DAG);
-  else if (Cond.getOpcode() == ISD::SADDO || Cond.getOpcode() == ISD::UADDO)
-    Cond = LowerXADDO(Cond, DAG);
+  else if (Cond.getOpcode() == ISD::SADDO || Cond.getOpcode() == ISD::UADDO ||
+           Cond.getOpcode() == ISD::SSUBO || Cond.getOpcode() == ISD::USUBO ||
+           Cond.getOpcode() == ISD::SMULO || Cond.getOpcode() == ISD::UMULO)
+    Cond = LowerXALUO(Cond, DAG);

  // If condition flag is set by a X86ISD::CMP, then use it as the condition
  // setting operand in place of the X86ISD::SETCC.
@ -6118,23 +6128,52 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
  return Op;
 }

-SDValue X86TargetLowering::LowerXADDO(SDValue Op, SelectionDAG &DAG) {
-  // Lower the "add with overflow" instruction into a regular "add" plus a
-  // "setcc" instruction that checks the overflow flag. The "brcond" lowering
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
  // looks for this combo and may remove the "setcc" instruction if the "setcc"
  // has only one use.
  SDNode *N = Op.getNode();
  SDValue LHS = N->getOperand(0);
  SDValue RHS = N->getOperand(1);
+  unsigned BaseOp = 0;
+  unsigned Cond = 0;
+
+  switch (Op.getOpcode()) {
+  default: assert(0 && "Unknown ovf instruction!");
+  case ISD::SADDO:
+    BaseOp = ISD::ADD;
+    Cond = X86::COND_O;
+    break;
+  case ISD::UADDO:
+    BaseOp = ISD::ADD;
+    Cond = X86::COND_C;
+    break;
+  case ISD::SSUBO:
+    BaseOp = ISD::SUB;
+    Cond = X86::COND_O;
+    break;
+  case ISD::USUBO:
+    BaseOp = ISD::SUB;
+    Cond = X86::COND_C;
+    break;
+  case ISD::SMULO:
+    BaseOp = ISD::MUL;
+    Cond = X86::COND_O;
+    break;
+  case ISD::UMULO:
+    BaseOp = ISD::MUL;
+    Cond = X86::COND_C;
+    break;
+  }

  // Also sets EFLAGS.
  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
-  SDValue Sum = DAG.getNode(ISD::ADD, VTs, LHS, RHS);
+  SDValue Sum = DAG.getNode(BaseOp, VTs, LHS, RHS);

  SDValue SetCC =
    DAG.getNode(X86ISD::SETCC, N->getValueType(1),
-                DAG.getConstant((Op.getOpcode() == ISD::SADDO) ?
-                                  X86::COND_O : X86::COND_C,
+                DAG.getConstant(Cond,
                                MVT::i32), SDValue(Sum.getNode(), 1));

  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
@ -6259,8 +6298,12 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
  case ISD::CTLZ:               return LowerCTLZ(Op, DAG);
  case ISD::CTTZ:               return LowerCTTZ(Op, DAG);
-  case ISD::SADDO:              return LowerXADDO(Op, DAG);
-  case ISD::UADDO:              return LowerXADDO(Op, DAG);
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:              return LowerXALUO(Op, DAG);
  case ISD::READCYCLECOUNTER:   return LowerREADCYCLECOUNTER(Op, DAG);
  }
 }
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -593,7 +593,7 @@ namespace llvm {
    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerXADDO(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);

    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
    SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG);
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@ -379,29 +379,36 @@ def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
 let isTwoAddress = 1 in {
 def SUB64rr  : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
                  "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
+                  [(set GR64:$dst, (sub GR64:$src1, GR64:$src2)),
+                   (implicit EFLAGS)]>;

 def SUB64rm  : RI<0x2B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
                  "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2)))]>;
+                  [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2))),
+                   (implicit EFLAGS)]>;

 def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
                      "sub{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2))]>;
+                      [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2)),
+                   (implicit EFLAGS)]>;
 def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
                    "sub{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2))]>;
+                    [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2)),
+                   (implicit EFLAGS)]>;
 } // isTwoAddress

 def SUB64mr  : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
                  "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sub (load addr:$dst), GR64:$src2), addr:$dst)]>;
+                  [(store (sub (load addr:$dst), GR64:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
 def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2), 
                      "sub{q}\t{$src2, $dst|$dst, $src2}",
-               [(store (sub (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
+               [(store (sub (load addr:$dst), i64immSExt32:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
 def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2), 
                    "sub{q}\t{$src2, $dst|$dst, $src2}",
-                [(store (sub (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+                [(store (sub (load addr:$dst), i64immSExt8:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;

 let Uses = [EFLAGS] in {
 let isTwoAddress = 1 in {
@ -454,30 +461,36 @@ let isTwoAddress = 1 in {
 let isCommutable = 1 in
 def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
                  "imul{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>, TB;
+                  [(set GR64:$dst, (mul GR64:$src1, GR64:$src2)),
+                   (implicit EFLAGS)]>, TB;

 def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
                  "imul{q}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2)))]>, TB;
+                 [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2))),
+                  (implicit EFLAGS)]>, TB;
 } // isTwoAddress

 // Suprisingly enough, these are not two address instructions!
 def IMUL64rri32 : RIi32<0x69, MRMSrcReg,                    // GR64 = GR64*I32
                        (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>;
+                       [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2)),
+                        (implicit EFLAGS)]>;
 def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
                      (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2))]>;
+                      [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2)),
+                       (implicit EFLAGS)]>;
 def IMUL64rmi32 : RIi32<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
                        (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                [(set GR64:$dst, (mul (load addr:$src1), i64immSExt32:$src2))]>;
+                [(set GR64:$dst, (mul (load addr:$src1), i64immSExt32:$src2)),
+                 (implicit EFLAGS)]>;
 def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
                      (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                 [(set GR64:$dst, (mul (load addr:$src1), i64immSExt8:$src2))]>;
+                 [(set GR64:$dst, (mul (load addr:$src1), i64immSExt8:$src2)),
+                  (implicit EFLAGS)]>;
 } // Defs = [EFLAGS]

 // Unsigned division / remainder
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@ -709,10 +709,10 @@ def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
               // This probably ought to be moved to a def : Pat<> if the
               // syntax can be accepted.
-               [(set AL, (mul AL, GR8:$src))]>;               // AL,AH = AL*GR8
+               [(set AL, (mul AL, GR8:$src))]>;     // AL,AH = AL*GR8
 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
-def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src), "mul{w}\t$src", []>,
-             OpSize;    // AX,DX = AX*GR16
+def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src), "mul{w}\t$src", 
+             []>, OpSize;    // AX,DX = AX*GR16
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
 def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src), "mul{l}\t$src", []>;
                       // EAX,EDX = EAX*GR32
@ -2054,67 +2054,82 @@ let isTwoAddress = 0 in {

 def SUB8rr   : I<0x28, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
                 "sub{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (sub GR8:$src1, GR8:$src2))]>;
+                 [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
+                  (implicit EFLAGS)]>;
 def SUB16rr  : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                 "sub{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (sub GR16:$src1, GR16:$src2))]>, OpSize;
+                 [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
+                  (implicit EFLAGS)]>, OpSize;
 def SUB32rr  : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
                 "sub{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
+                 [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)),
+                  (implicit EFLAGS)]>;
 def SUB8rm   : I<0x2A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
                 "sub{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2)))]>;
+                 [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
+                  (implicit EFLAGS)]>;
 def SUB16rm  : I<0x2B, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
                 "sub{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2)))]>, OpSize;
+                 [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
+                  (implicit EFLAGS)]>, OpSize;
 def SUB32rm  : I<0x2B, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
                 "sub{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2)))]>;
+                 [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2))),
+                  (implicit EFLAGS)]>;

 def SUB8ri   : Ii8 <0x80, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
                    "sub{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst, (sub GR8:$src1, imm:$src2))]>;
+                    [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
+                    (implicit EFLAGS)]>;
 def SUB16ri  : Ii16<0x81, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
                    "sub{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, (sub GR16:$src1, imm:$src2))]>, OpSize;
+                    [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
+                    (implicit EFLAGS)]>, OpSize;
 def SUB32ri  : Ii32<0x81, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
                    "sub{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, (sub GR32:$src1, imm:$src2))]>;
+                    [(set GR32:$dst, (sub GR32:$src1, imm:$src2)),
+                    (implicit EFLAGS)]>;
 def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                   "sub{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2))]>,
-               OpSize;
+                   [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2)),
+                   (implicit EFLAGS)]>, OpSize;
 def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
                   "sub{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2))]>;
+                   [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2)),
+                   (implicit EFLAGS)]>;
 let isTwoAddress = 0 in {
  def SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
                   "sub{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR8:$src2), addr:$dst)]>;
+                   [(store (sub (load addr:$dst), GR8:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
  def SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
                   "sub{w}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR16:$src2), addr:$dst)]>,
-                 OpSize;
+                   [(store (sub (load addr:$dst), GR16:$src2), addr:$dst),
+                   (implicit EFLAGS)]>, OpSize;
  def SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
                   "sub{l}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR32:$src2), addr:$dst)]>;
+                   [(store (sub (load addr:$dst), GR32:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
  def SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), 
                     "sub{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+                   [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
  def SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), 
                      "sub{w}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sub (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
-                 OpSize;
+                  [(store (sub (loadi16 addr:$dst), imm:$src2), addr:$dst),
+                  (implicit EFLAGS)]>, OpSize;
  def SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), 
                      "sub{l}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sub (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+                  [(store (sub (loadi32 addr:$dst), imm:$src2), addr:$dst),
+                  (implicit EFLAGS)]>;
  def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), 
                     "sub{w}\t{$src2, $dst|$dst, $src2}",
-                [(store (sub (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
-                 OpSize;
+                [(store (sub (load addr:$dst), i16immSExt8:$src2), addr:$dst),
+                  (implicit EFLAGS)]>, OpSize;
  def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), 
                     "sub{l}\t{$src2, $dst|$dst, $src2}",
-                [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+                [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst),
+                (implicit EFLAGS)]>;
 }

 let Uses = [EFLAGS] in {
@ -2152,18 +2167,22 @@ let Defs = [EFLAGS] in {
 let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
 def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                 "imul{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (mul GR16:$src1, GR16:$src2))]>, TB, OpSize;
+                 [(set GR16:$dst, (mul GR16:$src1, GR16:$src2)),
+                 (implicit EFLAGS)]>, TB, OpSize;
 def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
                 "imul{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>, TB;
+                 [(set GR32:$dst, (mul GR32:$src1, GR32:$src2)),
+                 (implicit EFLAGS)]>, TB;
 }
 def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
                 "imul{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2)))]>,
+                 [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))),
+                 (implicit EFLAGS)]>,
                 TB, OpSize;
 def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
                 "imul{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2)))]>, TB;
+                 [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))),
+                 (implicit EFLAGS)]>, TB;
 } // Defs = [EFLAGS]
 } // end Two Address instructions

@ -2172,39 +2191,44 @@ let Defs = [EFLAGS] in {
 def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
                      (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, (mul GR16:$src1, imm:$src2))]>, OpSize;
+                      [(set GR16:$dst, (mul GR16:$src1, imm:$src2)),
+                      (implicit EFLAGS)]>, OpSize;
 def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
                      (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>;
+                      [(set GR32:$dst, (mul GR32:$src1, imm:$src2)),
+                      (implicit EFLAGS)]>;
 def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
                     (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2))]>,
-                     OpSize;
+                     [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2)),
+                     (implicit EFLAGS)]>, OpSize;
 def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
                     (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2))]>;
+                     [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2)),
+                     (implicit EFLAGS)]>;

 def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                      // GR16 = [mem16]*I16
                      (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, (mul (load addr:$src1), imm:$src2))]>,
-                      OpSize;
+                      [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)),
+                      (implicit EFLAGS)]>, OpSize;
 def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                      // GR32 = [mem32]*I32
                      (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR32:$dst, (mul (load addr:$src1), imm:$src2))]>;
+                      [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)),
+                      (implicit EFLAGS)]>;
 def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
                     (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                  [(set GR16:$dst, (mul (load addr:$src1), i16immSExt8:$src2))]>,
-                     OpSize;
+                  [(set GR16:$dst, (mul (load addr:$src1), i16immSExt8:$src2)),
+                  (implicit EFLAGS)]>, OpSize;
 def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
                     (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                  [(set GR32:$dst, (mul (load addr:$src1), i32immSExt8:$src2))]>;
+                  [(set GR32:$dst, (mul (load addr:$src1), i32immSExt8:$src2)),
+                  (implicit EFLAGS)]>;
 } // Defs = [EFLAGS]

 //===----------------------------------------------------------------------===//
--- a/test/CodeGen/X86/mul-with-overflow.ll
+++ b/test/CodeGen/X86/mul-with-overflow.ll
@ -0,0 +1,41 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1
+; RUN: llvm-as < %s | llc -march=x86 | grep {jc} | count 1
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+define i1 @func2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@ -0,0 +1,41 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1
+; RUN: llvm-as < %s | llc -march=x86 | grep {jc} | count 1
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+define i1 @func2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)