[SelectionDAG] Expand ADD/SUBCARRY

This patch allows for expansion of ADDCARRY and SUBCARRY when the target does not support it. Differential Revision: https://reviews.llvm.org/D61411 llvm-svn: 360303
2024-11-26 04:32:44 +01:00 · 2019-05-09 01:17:48 +00:00 · 2019-05-09 01:17:48 +00:00 · 60b495cadb
commit 60b495cadb
parent bcdb0ac11d
2 changed files with 86 additions and 0 deletions
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -3273,6 +3273,48 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
  case ISD::UMULFIX:
    Results.push_back(TLI.expandFixedPointMul(Node, DAG));
    break;
+  case ISD::ADDCARRY:
+  case ISD::SUBCARRY: {
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue Carry = Node->getOperand(2);
+
+    bool IsAdd = Node->getOpcode() == ISD::ADDCARRY;
+
+    // Initial add of the 2 operands.
+    unsigned Op = IsAdd ? ISD::ADD : ISD::SUB;
+    EVT VT = LHS.getValueType();
+    SDValue Sum = DAG.getNode(Op, dl, VT, LHS, RHS);
+
+    // Initial check for overflow.
+    EVT CarryType = Node->getValueType(1);
+    EVT SetCCType = getSetCCResultType(Node->getValueType(0));
+    ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+    SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
+
+    // Add of the sum and the carry.
+    SDValue CarryExt =
+        DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1);
+    SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt);
+
+    // Second check for overflow. If we are adding, we can only overflow if the
+    // initial sum is all 1s ang the carry is set, resulting in a new sum of 0.
+    // If we are subtracting, we can only overflow if the initial sum is 0 and
+    // the carry is set, resulting in a new sum of all 1s.
+    SDValue Zero = DAG.getConstant(0, dl, VT);
+    SDValue Overflow2 =
+        IsAdd ? DAG.getSetCC(dl, SetCCType, Sum2, Zero, ISD::SETEQ)
+              : DAG.getSetCC(dl, SetCCType, Sum, Zero, ISD::SETEQ);
+    Overflow2 = DAG.getNode(ISD::AND, dl, SetCCType, Overflow2,
+                            DAG.getZExtOrTrunc(Carry, dl, SetCCType));
+
+    SDValue ResultCarry =
+        DAG.getNode(ISD::OR, dl, SetCCType, Overflow, Overflow2);
+
+    Results.push_back(Sum2);
+    Results.push_back(DAG.getBoolExtOrTrunc(ResultCarry, dl, CarryType, VT));
+    break;
+  }
  case ISD::SADDO:
  case ISD::SSUBO: {
    SDValue LHS = Node->getOperand(0);
--- a/test/CodeGen/RISCV/addcarry.ll
+++ b/test/CodeGen/RISCV/addcarry.ll
@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RISCV32
+
+; Test ADDCARRY node expansion on a target that does not currently support ADDCARRY.
+; Signed fixed point multiplication eventually expands down to an ADDCARRY.
+
+declare  i64 @llvm.smul.fix.i64  (i64, i64, i32)
+
+define i64 @addcarry(i64 %x, i64 %y) {
+; RISCV32-LABEL: addcarry:
+; RISCV32:       # %bb.0:
+; RISCV32-NEXT:    mul a4, a0, a3
+; RISCV32-NEXT:    mulhu a5, a0, a2
+; RISCV32-NEXT:    add a4, a5, a4
+; RISCV32-NEXT:    sltu a6, a4, a5
+; RISCV32-NEXT:    mulhu a5, a0, a3
+; RISCV32-NEXT:    add a6, a5, a6
+; RISCV32-NEXT:    mulhu a5, a1, a2
+; RISCV32-NEXT:    add a7, a6, a5
+; RISCV32-NEXT:    mul a5, a1, a2
+; RISCV32-NEXT:    add a6, a4, a5
+; RISCV32-NEXT:    sltu a4, a6, a4
+; RISCV32-NEXT:    add a4, a7, a4
+; RISCV32-NEXT:    mul a5, a1, a3
+; RISCV32-NEXT:    add a5, a4, a5
+; RISCV32-NEXT:    bgez a1, .LBB0_2
+; RISCV32-NEXT:  # %bb.1:
+; RISCV32-NEXT:    sub a5, a5, a2
+; RISCV32-NEXT:  .LBB0_2:
+; RISCV32-NEXT:    bgez a3, .LBB0_4
+; RISCV32-NEXT:  # %bb.3:
+; RISCV32-NEXT:    sub a5, a5, a0
+; RISCV32-NEXT:  .LBB0_4:
+; RISCV32-NEXT:    mul a0, a0, a2
+; RISCV32-NEXT:    srli a0, a0, 2
+; RISCV32-NEXT:    slli a1, a6, 30
+; RISCV32-NEXT:    or a0, a0, a1
+; RISCV32-NEXT:    srli a1, a6, 2
+; RISCV32-NEXT:    slli a2, a5, 30
+; RISCV32-NEXT:    or a1, a1, a2
+; RISCV32-NEXT:    ret
+  %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2);
+  ret i64 %tmp;
+}