From b807b2714512de904d0e9a07074c73e80715d779 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 15 Mar 2021 09:20:26 -0700
Subject: [PATCH] [RISCV] Improve legalization of i32 UADDO/USUBO on RV64.

The default legalization uses zero extends that require pair of shifts
on RISCV. Instead we can take advantage of the fact that unsigned
compares work equally well on sign extended inputs. This allows
us to use addw/subw and sext.w.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D98233
---
 lib/Target/RISCV/RISCVISelLowering.cpp |  28 +++++++
 test/CodeGen/RISCV/xaluo.ll            | 102 ++++++++-----------------
 2 files changed, 60 insertions(+), 70 deletions(-)

diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index 069e90c0b6c..e93e61549c0 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -203,6 +203,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SHL, MVT::i32, Custom);
     setOperationAction(ISD::SRA, MVT::i32, Custom);
     setOperationAction(ISD::SRL, MVT::i32, Custom);
+
+    setOperationAction(ISD::UADDO, MVT::i32, Custom);
+    setOperationAction(ISD::USUBO, MVT::i32, Custom);
   }
 
   if (!Subtarget.hasStdExtM()) {
@@ -3468,6 +3471,31 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
     break;
   }
+  case ISD::UADDO:
+  case ISD::USUBO: {
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+    bool IsAdd = N->getOpcode() == ISD::UADDO;
+    SDLoc DL(N);
+    // Create an ADDW or SUBW.
+    SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+    SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+    SDValue Res =
+        DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
+    Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
+                      DAG.getValueType(MVT::i32));
+
+    // Sign extend the LHS and perform an unsigned compare with the ADDW result.
+    // Since the inputs are sign extended from i32, this is equivalent to
+    // comparing the lower 32 bits.
+    LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
+    SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
+                                    IsAdd ? ISD::SETULT : ISD::SETUGT);
+
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+    Results.push_back(Overflow);
+    return;
+  }
   case ISD::BITCAST: {
     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
              Subtarget.hasStdExtF()) ||
diff --git a/test/CodeGen/RISCV/xaluo.ll b/test/CodeGen/RISCV/xaluo.ll
index 04824074faa..43b27d45501 100644
--- a/test/CodeGen/RISCV/xaluo.ll
+++ b/test/CodeGen/RISCV/xaluo.ll
@@ -215,16 +215,12 @@ define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
 ;
 ; RV64-LABEL: uaddo.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    add a1, a0, a1
-; RV64-NEXT:    slli a0, a1, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    xor a0, a0, a1
-; RV64-NEXT:    snez a0, a0
-; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    addw a3, a0, a1
+; RV64-NEXT:    sext.w a4, a0
+; RV64-NEXT:    sltu a3, a3, a4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sw a0, 0(a2)
+; RV64-NEXT:    mv a0, a3
 ; RV64-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
@@ -358,16 +354,12 @@ define zeroext i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) {
 ;
 ; RV64-LABEL: usubo.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    sub a1, a0, a1
-; RV64-NEXT:    slli a0, a1, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    xor a0, a0, a1
-; RV64-NEXT:    snez a0, a0
-; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    subw a3, a0, a1
+; RV64-NEXT:    sext.w a4, a0
+; RV64-NEXT:    sltu a3, a4, a3
+; RV64-NEXT:    sub a0, a0, a1
+; RV64-NEXT:    sw a0, 0(a2)
+; RV64-NEXT:    mv a0, a3
 ; RV64-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
@@ -821,14 +813,9 @@ define i32 @uaddo.select.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: uaddo.select.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    srli a2, a2, 32
-; RV64-NEXT:    slli a3, a0, 32
-; RV64-NEXT:    srli a3, a3, 32
-; RV64-NEXT:    add a2, a3, a2
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    srli a3, a3, 32
-; RV64-NEXT:    bne a3, a2, .LBB26_2
+; RV64-NEXT:    addw a2, a0, a1
+; RV64-NEXT:    sext.w a3, a0
+; RV64-NEXT:    bltu a2, a3, .LBB26_2
 ; RV64-NEXT:  # %bb.1: # %entry
 ; RV64-NEXT:    mv a0, a1
 ; RV64-NEXT:  .LBB26_2: # %entry
@@ -850,15 +837,10 @@ define i1 @uaddo.not.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: uaddo.not.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    xor a0, a1, a0
-; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    addw a1, a0, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    sltu a0, a1, a0
+; RV64-NEXT:    xori a0, a0, 1
 ; RV64-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
@@ -1058,14 +1040,9 @@ define i32 @usubo.select.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: usubo.select.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    srli a2, a2, 32
-; RV64-NEXT:    slli a3, a0, 32
-; RV64-NEXT:    srli a3, a3, 32
-; RV64-NEXT:    sub a2, a3, a2
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    srli a3, a3, 32
-; RV64-NEXT:    bne a3, a2, .LBB34_2
+; RV64-NEXT:    subw a2, a0, a1
+; RV64-NEXT:    sext.w a3, a0
+; RV64-NEXT:    bltu a3, a2, .LBB34_2
 ; RV64-NEXT:  # %bb.1: # %entry
 ; RV64-NEXT:    mv a0, a1
 ; RV64-NEXT:  .LBB34_2: # %entry
@@ -1087,15 +1064,10 @@ define i1 @usubo.not.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: usubo.not.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    sub a0, a0, a1
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    xor a0, a1, a0
-; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    xori a0, a0, 1
 ; RV64-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
@@ -1545,14 +1517,9 @@ define zeroext i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: uaddo.br.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    beq a1, a0, .LBB48_2
+; RV64-NEXT:    addw a1, a0, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    bgeu a1, a0, .LBB48_2
 ; RV64-NEXT:  # %bb.1: # %overflow
 ; RV64-NEXT:    mv a0, zero
 ; RV64-NEXT:    ret
@@ -1712,14 +1679,9 @@ define zeroext i1 @usubo.br.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: usubo.br.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    sub a0, a0, a1
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    beq a1, a0, .LBB52_2
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    bgeu a0, a1, .LBB52_2
 ; RV64-NEXT:  # %bb.1: # %overflow
 ; RV64-NEXT:    mv a0, zero
 ; RV64-NEXT:    ret