mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[RISCV] Add DAG combine to detect opportunities to replace (i64 (any_extend (i32 X)) with sign_extend.
If type legalization is going to insert a sign_extend for other users of X and we can fold the sign_extend into ADDW/MULW/SUBW, it is better to replace the ANY_EXTEND so we don't end up with a separate ADD/MUL/SUB instruction for the users of the ANY_EXTEND. I'm only handling setcc uses right now, but there are other instructions that force sign_extends like ashr. There are probably other *W instructions we could use in addition to ADDW/SUBW/MULW. My motivating case was a loop terminating compare and a phi use as seen in the new test file. Reviewed By: asb Differential Revision: https://reviews.llvm.org/D104581
This commit is contained in:
parent
7492c6b2fb
commit
c2e96e7cf2
@ -836,6 +836,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||
setTargetDAGCombine(ISD::AND);
|
||||
setTargetDAGCombine(ISD::OR);
|
||||
setTargetDAGCombine(ISD::XOR);
|
||||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||
if (Subtarget.hasStdExtV()) {
|
||||
setTargetDAGCombine(ISD::FCOPYSIGN);
|
||||
setTargetDAGCombine(ISD::MGATHER);
|
||||
@ -5606,6 +5607,83 @@ static SDValue performXORCombine(SDNode *N,
|
||||
return combineSelectCCAndUseCommutative(N, DAG, false);
|
||||
}
|
||||
|
||||
// Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
|
||||
// has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free
|
||||
// by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be
|
||||
// removed during type legalization leaving an ADD/SUB/MUL use that won't use
|
||||
// ADDW/SUBW/MULW.
|
||||
static SDValue performANY_EXTENDCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const RISCVSubtarget &Subtarget) {
|
||||
if (!Subtarget.is64Bit())
|
||||
return SDValue();
|
||||
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
|
||||
SDValue Src = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT != MVT::i64 || Src.getValueType() != MVT::i32)
|
||||
return SDValue();
|
||||
|
||||
// The opcode must be one that can implicitly sign_extend.
|
||||
// FIXME: Additional opcodes.
|
||||
switch (Src.getOpcode()) {
|
||||
default:
|
||||
return SDValue();
|
||||
case ISD::MUL:
|
||||
if (!Subtarget.hasStdExtM())
|
||||
return SDValue();
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::ADD:
|
||||
case ISD::SUB:
|
||||
break;
|
||||
}
|
||||
|
||||
SmallVector<SDNode *, 4> SetCCs;
|
||||
for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
|
||||
UE = Src.getNode()->use_end();
|
||||
UI != UE; ++UI) {
|
||||
SDNode *User = *UI;
|
||||
if (User == N)
|
||||
continue;
|
||||
if (UI.getUse().getResNo() != Src.getResNo())
|
||||
continue;
|
||||
// All i32 setccs are legalized by sign extending operands.
|
||||
if (User->getOpcode() == ISD::SETCC) {
|
||||
SetCCs.push_back(User);
|
||||
continue;
|
||||
}
|
||||
// We don't know if we can extend this user.
|
||||
break;
|
||||
}
|
||||
|
||||
// If we don't have any SetCCs, this isn't worthwhile.
|
||||
if (SetCCs.empty())
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src);
|
||||
DCI.CombineTo(N, SExt);
|
||||
|
||||
// Promote all the setccs.
|
||||
for (SDNode *SetCC : SetCCs) {
|
||||
SmallVector<SDValue, 4> Ops;
|
||||
|
||||
for (unsigned j = 0; j != 2; ++j) {
|
||||
SDValue SOp = SetCC->getOperand(j);
|
||||
if (SOp == Src)
|
||||
Ops.push_back(SExt);
|
||||
else
|
||||
Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp));
|
||||
}
|
||||
|
||||
Ops.push_back(SetCC->getOperand(2));
|
||||
DCI.CombineTo(SetCC,
|
||||
DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
|
||||
}
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
@ -5830,6 +5908,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
return performORCombine(N, DCI, Subtarget);
|
||||
case ISD::XOR:
|
||||
return performXORCombine(N, DCI, Subtarget);
|
||||
case ISD::ANY_EXTEND:
|
||||
return performANY_EXTENDCombine(N, DCI, Subtarget);
|
||||
case RISCVISD::SELECT_CC: {
|
||||
// Transform
|
||||
SDValue LHS = N->getOperand(0);
|
||||
|
48
test/CodeGen/RISCV/aext-to-sext.ll
Normal file
48
test/CodeGen/RISCV/aext-to-sext.ll
Normal file
@ -0,0 +1,48 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck %s -check-prefix=RV64I
|
||||
|
||||
; Make sure we don't generate an addi in the loop in
|
||||
; addition to the addiw. Previously we type legalize the
|
||||
; setcc use using signext and the phi use using anyext.
|
||||
; We now detect when it would be beneficial to replace
|
||||
; anyext with signext.
|
||||
|
||||
define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
|
||||
; RV64I-LABEL: quux:
|
||||
; RV64I: # %bb.0: # %bb
|
||||
; RV64I-NEXT: addi sp, sp, -32
|
||||
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: beq a0, a1, .LBB0_3
|
||||
; RV64I-NEXT: # %bb.1: # %bb2.preheader
|
||||
; RV64I-NEXT: mv s0, a1
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: .LBB0_2: # %bb2
|
||||
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV64I-NEXT: call hoge@plt
|
||||
; RV64I-NEXT: addiw s1, s1, 1
|
||||
; RV64I-NEXT: bne s1, s0, .LBB0_2
|
||||
; RV64I-NEXT: .LBB0_3: # %bb6
|
||||
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: addi sp, sp, 32
|
||||
; RV64I-NEXT: ret
|
||||
bb:
|
||||
%tmp = icmp eq i32 %arg, %arg1
|
||||
br i1 %tmp, label %bb6, label %bb2
|
||||
|
||||
bb2: ; preds = %bb2, %bb
|
||||
%tmp3 = phi i32 [ %tmp4, %bb2 ], [ %arg, %bb ]
|
||||
tail call void @hoge()
|
||||
%tmp4 = add nsw i32 %tmp3, 1
|
||||
%tmp5 = icmp eq i32 %tmp4, %arg1
|
||||
br i1 %tmp5, label %bb6, label %bb2
|
||||
|
||||
bb6: ; preds = %bb2, %bb
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @hoge()
|
@ -193,12 +193,11 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
|
||||
; RV64I-NEXT: addi sp, sp, -16
|
||||
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: addiw a2, a0, -1
|
||||
; RV64I-NEXT: addiw a0, a0, -1
|
||||
; RV64I-NEXT: addi s0, zero, 32
|
||||
; RV64I-NEXT: addi a1, zero, 32
|
||||
; RV64I-NEXT: beqz a2, .LBB2_2
|
||||
; RV64I-NEXT: beqz a0, .LBB2_2
|
||||
; RV64I-NEXT: # %bb.1: # %cond.false
|
||||
; RV64I-NEXT: addi a0, a0, -1
|
||||
; RV64I-NEXT: srliw a1, a0, 1
|
||||
; RV64I-NEXT: slli a0, a0, 32
|
||||
; RV64I-NEXT: srli a0, a0, 32
|
||||
|
Loading…
Reference in New Issue
Block a user