1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[RISCV] Improve legalization of i32 UADDO/USUBO on RV64.

The default legalization uses zero extends that require pair of shifts
on RISCV. Instead we can take advantage of the fact that unsigned
compares work equally well on sign extended inputs. This allows
us to use addw/subw and sext.w.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D98233
This commit is contained in:
Craig Topper 2021-03-15 09:20:26 -07:00
parent ae2fffa9ef
commit b807b27145
2 changed files with 60 additions and 70 deletions

View File

@ -203,6 +203,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
}
if (!Subtarget.hasStdExtM()) {
@ -3468,6 +3471,31 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
break;
}
case ISD::UADDO:
case ISD::USUBO: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
bool IsAdd = N->getOpcode() == ISD::UADDO;
SDLoc DL(N);
// Create an ADDW or SUBW.
SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue Res =
DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
DAG.getValueType(MVT::i32));
// Sign extend the LHS and perform an unsigned compare with the ADDW result.
// Since the inputs are sign extended from i32, this is equivalent to
// comparing the lower 32 bits.
LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
IsAdd ? ISD::SETULT : ISD::SETUGT);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
Results.push_back(Overflow);
return;
}
case ISD::BITCAST: {
assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
Subtarget.hasStdExtF()) ||

View File

@ -215,16 +215,12 @@ define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
;
; RV64-LABEL: uaddo.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: slli a0, a1, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: snez a0, a0
; RV64-NEXT: sw a1, 0(a2)
; RV64-NEXT: addw a3, a0, a1
; RV64-NEXT: sext.w a4, a0
; RV64-NEXT: sltu a3, a3, a4
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: sw a0, 0(a2)
; RV64-NEXT: mv a0, a3
; RV64-NEXT: ret
entry:
%t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
@ -358,16 +354,12 @@ define zeroext i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) {
;
; RV64-LABEL: usubo.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: sub a1, a0, a1
; RV64-NEXT: slli a0, a1, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: snez a0, a0
; RV64-NEXT: sw a1, 0(a2)
; RV64-NEXT: subw a3, a0, a1
; RV64-NEXT: sext.w a4, a0
; RV64-NEXT: sltu a3, a4, a3
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: sw a0, 0(a2)
; RV64-NEXT: mv a0, a3
; RV64-NEXT: ret
entry:
%t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
@ -821,14 +813,9 @@ define i32 @uaddo.select.i32(i32 %v1, i32 %v2) {
;
; RV64-LABEL: uaddo.select.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: slli a2, a1, 32
; RV64-NEXT: srli a2, a2, 32
; RV64-NEXT: slli a3, a0, 32
; RV64-NEXT: srli a3, a3, 32
; RV64-NEXT: add a2, a3, a2
; RV64-NEXT: slli a3, a2, 32
; RV64-NEXT: srli a3, a3, 32
; RV64-NEXT: bne a3, a2, .LBB26_2
; RV64-NEXT: addw a2, a0, a1
; RV64-NEXT: sext.w a3, a0
; RV64-NEXT: bltu a2, a3, .LBB26_2
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB26_2: # %entry
@ -850,15 +837,10 @@ define i1 @uaddo.not.i32(i32 %v1, i32 %v2) {
;
; RV64-LABEL: uaddo.not.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: addw a1, a0, a1
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: sltu a0, a1, a0
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: ret
entry:
%t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
@ -1058,14 +1040,9 @@ define i32 @usubo.select.i32(i32 %v1, i32 %v2) {
;
; RV64-LABEL: usubo.select.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: slli a2, a1, 32
; RV64-NEXT: srli a2, a2, 32
; RV64-NEXT: slli a3, a0, 32
; RV64-NEXT: srli a3, a3, 32
; RV64-NEXT: sub a2, a3, a2
; RV64-NEXT: slli a3, a2, 32
; RV64-NEXT: srli a3, a3, 32
; RV64-NEXT: bne a3, a2, .LBB34_2
; RV64-NEXT: subw a2, a0, a1
; RV64-NEXT: sext.w a3, a0
; RV64-NEXT: bltu a3, a2, .LBB34_2
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB34_2: # %entry
@ -1087,15 +1064,10 @@ define i1 @usubo.not.i32(i32 %v1, i32 %v2) {
;
; RV64-LABEL: usubo.not.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: subw a1, a0, a1
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: sltu a0, a0, a1
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: ret
entry:
%t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
@ -1545,14 +1517,9 @@ define zeroext i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
;
; RV64-LABEL: uaddo.br.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: beq a1, a0, .LBB48_2
; RV64-NEXT: addw a1, a0, a1
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: bgeu a1, a0, .LBB48_2
; RV64-NEXT: # %bb.1: # %overflow
; RV64-NEXT: mv a0, zero
; RV64-NEXT: ret
@ -1712,14 +1679,9 @@ define zeroext i1 @usubo.br.i32(i32 %v1, i32 %v2) {
;
; RV64-LABEL: usubo.br.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: beq a1, a0, .LBB52_2
; RV64-NEXT: subw a1, a0, a1
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: bgeu a0, a1, .LBB52_2
; RV64-NEXT: # %bb.1: # %overflow
; RV64-NEXT: mv a0, zero
; RV64-NEXT: ret