1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[RISCV] Add DAG combine to detect opportunities to replace (i64 (any_extend (i32 X)) with sign_extend.

If type legalization is going to insert a sign_extend for other users
of X and we can fold the sign_extend into ADDW/MULW/SUBW, it is
better to replace the ANY_EXTEND so we don't end up with a separate
ADD/MUL/SUB instruction for the users of the ANY_EXTEND.

I'm only handling setcc uses right now, but there are other
instructions that force sign_extends like ashr.

There are probably other *W instructions we could use in addition
to ADDW/SUBW/MULW.

My motivating case was a loop terminating compare and a phi use
as seen in the new test file.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D104581
This commit is contained in:
Craig Topper 2021-06-25 12:58:31 -07:00
parent 7492c6b2fb
commit c2e96e7cf2
3 changed files with 130 additions and 3 deletions

View File

@ -836,6 +836,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR); setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::ANY_EXTEND);
if (Subtarget.hasStdExtV()) { if (Subtarget.hasStdExtV()) {
setTargetDAGCombine(ISD::FCOPYSIGN); setTargetDAGCombine(ISD::FCOPYSIGN);
setTargetDAGCombine(ISD::MGATHER); setTargetDAGCombine(ISD::MGATHER);
@ -5606,6 +5607,83 @@ static SDValue performXORCombine(SDNode *N,
return combineSelectCCAndUseCommutative(N, DAG, false); return combineSelectCCAndUseCommutative(N, DAG, false);
} }
// Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
// has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free
// by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be
// removed during type legalization leaving an ADD/SUB/MUL use that won't use
// ADDW/SUBW/MULW.
static SDValue performANY_EXTENDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
if (!Subtarget.is64Bit())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDValue Src = N->getOperand(0);
EVT VT = N->getValueType(0);
if (VT != MVT::i64 || Src.getValueType() != MVT::i32)
return SDValue();
// The opcode must be one that can implicitly sign_extend.
// FIXME: Additional opcodes.
switch (Src.getOpcode()) {
default:
return SDValue();
case ISD::MUL:
if (!Subtarget.hasStdExtM())
return SDValue();
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB:
break;
}
SmallVector<SDNode *, 4> SetCCs;
for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
UE = Src.getNode()->use_end();
UI != UE; ++UI) {
SDNode *User = *UI;
if (User == N)
continue;
if (UI.getUse().getResNo() != Src.getResNo())
continue;
// All i32 setccs are legalized by sign extending operands.
if (User->getOpcode() == ISD::SETCC) {
SetCCs.push_back(User);
continue;
}
// We don't know if we can extend this user.
break;
}
// If we don't have any SetCCs, this isn't worthwhile.
if (SetCCs.empty())
return SDValue();
SDLoc DL(N);
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src);
DCI.CombineTo(N, SExt);
// Promote all the setccs.
for (SDNode *SetCC : SetCCs) {
SmallVector<SDValue, 4> Ops;
for (unsigned j = 0; j != 2; ++j) {
SDValue SOp = SetCC->getOperand(j);
if (SOp == Src)
Ops.push_back(SExt);
else
Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp));
}
Ops.push_back(SetCC->getOperand(2));
DCI.CombineTo(SetCC,
DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
}
return SDValue(N, 0);
}
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const { DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG; SelectionDAG &DAG = DCI.DAG;
@ -5830,6 +5908,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return performORCombine(N, DCI, Subtarget); return performORCombine(N, DCI, Subtarget);
case ISD::XOR: case ISD::XOR:
return performXORCombine(N, DCI, Subtarget); return performXORCombine(N, DCI, Subtarget);
case ISD::ANY_EXTEND:
return performANY_EXTENDCombine(N, DCI, Subtarget);
case RISCVISD::SELECT_CC: { case RISCVISD::SELECT_CC: {
// Transform // Transform
SDValue LHS = N->getOperand(0); SDValue LHS = N->getOperand(0);

View File

@ -0,0 +1,48 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
; Make sure we don't generate an addi in the loop in
; addition to the addiw. Previously we type legalize the
; setcc use using signext and the phi use using anyext.
; We now detect when it would be beneficial to replace
; anyext with signext.
define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
; RV64I-LABEL: quux:
; RV64I: # %bb.0: # %bb
; RV64I-NEXT: addi sp, sp, -32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: beq a0, a1, .LBB0_3
; RV64I-NEXT: # %bb.1: # %bb2.preheader
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: .LBB0_2: # %bb2
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: call hoge@plt
; RV64I-NEXT: addiw s1, s1, 1
; RV64I-NEXT: bne s1, s0, .LBB0_2
; RV64I-NEXT: .LBB0_3: # %bb6
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
bb:
%tmp = icmp eq i32 %arg, %arg1
br i1 %tmp, label %bb6, label %bb2
bb2: ; preds = %bb2, %bb
%tmp3 = phi i32 [ %tmp4, %bb2 ], [ %arg, %bb ]
tail call void @hoge()
%tmp4 = add nsw i32 %tmp3, 1
%tmp5 = icmp eq i32 %tmp4, %arg1
br i1 %tmp5, label %bb6, label %bb2
bb6: ; preds = %bb2, %bb
ret void
}
declare void @hoge()

View File

@ -193,12 +193,11 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: addiw a2, a0, -1 ; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: addi s0, zero, 32 ; RV64I-NEXT: addi s0, zero, 32
; RV64I-NEXT: addi a1, zero, 32 ; RV64I-NEXT: addi a1, zero, 32
; RV64I-NEXT: beqz a2, .LBB2_2 ; RV64I-NEXT: beqz a0, .LBB2_2
; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: # %bb.1: # %cond.false
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32