1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[RISCV] Add RISCVISD::ROLW/RORW use those for custom legalizing i32 rotl/rotr on RV64IZbb.

This should result in better utilization of RORIW since we
don't need to look for a SIGN_EXTEND_INREG that may not exist.

Also remove rotl/rotr isel matching to GREVI and just prefer RORI.
This is to keep consistency so we don't have to match ROLW/RORW
to GREVIW as well. I imagine RORI/RORIW performance will be the
same or better than GREVI.

Differential Revision: https://reviews.llvm.org/D91449
This commit is contained in:
Craig Topper 2020-11-20 10:11:34 -08:00
parent 7d25b5a0e9
commit 2a19bf1e6f
8 changed files with 58 additions and 110 deletions

View File

@ -376,62 +376,6 @@ bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
return true;
}
// Check that it is a RORIW (i32 Right Rotate Immediate on RV64).
// We first check that it is the right node tree:
//
// (SIGN_EXTEND_INREG (OR (SHL RS1, VC2),
// (SRL (AND RS1, VC3), VC1)))
//
// Then we check that the constant operands respect these constraints:
//
// VC2 == 32 - VC1
// VC3 | maskTrailingOnes<uint64_t>(VC1) == 0xffffffff
//
// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
// and VC3 being 0xffffffff after accounting for SimplifyDemandedBits removing
// some bits due to the right shift.
bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
Subtarget->getXLenVT() == MVT::i64 &&
cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
if (N.getOperand(0).getOpcode() == ISD::OR) {
SDValue Or = N.getOperand(0);
SDValue Shl = Or.getOperand(0);
SDValue Srl = Or.getOperand(1);
// OR is commutable so canonicalize SHL to LHS.
if (Srl.getOpcode() == ISD::SHL)
std::swap(Shl, Srl);
if (Shl.getOpcode() == ISD::SHL && Srl.getOpcode() == ISD::SRL) {
if (Srl.getOperand(0).getOpcode() == ISD::AND) {
SDValue And = Srl.getOperand(0);
if (And.getOperand(0) == Shl.getOperand(0) &&
isa<ConstantSDNode>(Srl.getOperand(1)) &&
isa<ConstantSDNode>(Shl.getOperand(1)) &&
isa<ConstantSDNode>(And.getOperand(1))) {
uint64_t VC1 = Srl.getConstantOperandVal(1);
uint64_t VC2 = Shl.getConstantOperandVal(1);
uint64_t VC3 = And.getConstantOperandVal(1);
// The mask needs to be 0xffffffff, but SimplifyDemandedBits may
// have removed lower bits that aren't necessary due to the right
// shift.
if (VC2 == (32 - VC1) &&
(VC3 | maskTrailingOnes<uint64_t>(VC1)) == 0xffffffff) {
RS1 = Shl.getOperand(0);
Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
Srl.getOperand(1).getValueType());
return true;
}
}
}
}
}
}
return false;
}
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (addi base, off1), off2) -> (load base, off1+off2)
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)

View File

@ -50,7 +50,6 @@ public:
bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
// Include the pieces autogenerated from the target description.
#include "RISCVGenDAGISel.inc"

View File

@ -151,7 +151,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) {
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
if (Subtarget.is64Bit()) {
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::ROTR, MVT::i32, Custom);
}
} else {
setOperationAction(ISD::ROTL, XLenVT, Expand);
setOperationAction(ISD::ROTR, XLenVT, Expand);
}
@ -908,6 +913,10 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
return RISCVISD::DIVUW;
case ISD::UREM:
return RISCVISD::REMUW;
case ISD::ROTL:
return RISCVISD::ROLW;
case ISD::ROTR:
return RISCVISD::RORW;
case RISCVISD::GREVI:
return RISCVISD::GREVIW;
case RISCVISD::GORCI:
@ -1013,6 +1022,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
Results.push_back(customLegalizeToWOp(N, DAG));
break;
case ISD::ROTL:
case ISD::ROTR:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
Results.push_back(customLegalizeToWOp(N, DAG));
break;
case ISD::SDIV:
case ISD::UDIV:
case ISD::UREM:
@ -1267,7 +1282,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
case RISCVISD::SLLW:
case RISCVISD::SRAW:
case RISCVISD::SRLW: {
case RISCVISD::SRLW:
case RISCVISD::ROLW:
case RISCVISD::RORW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@ -1392,6 +1409,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::DIVW:
case RISCVISD::DIVUW:
case RISCVISD::REMUW:
case RISCVISD::ROLW:
case RISCVISD::RORW:
case RISCVISD::GREVIW:
case RISCVISD::GORCIW:
// TODO: As the result is sign-extended, this is conservatively correct. A
@ -2829,6 +2848,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(DIVW)
NODE_NAME_CASE(DIVUW)
NODE_NAME_CASE(REMUW)
NODE_NAME_CASE(ROLW)
NODE_NAME_CASE(RORW)
NODE_NAME_CASE(FMV_W_X_RV64)
NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
NODE_NAME_CASE(READ_CYCLE_WIDE)

View File

@ -42,6 +42,10 @@ enum NodeType : unsigned {
DIVW,
DIVUW,
REMUW,
// RV64IB rotates, directly matching the semantics of the named RISC-V
// instructions.
ROLW,
RORW,
// FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast
// is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X.
// FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.

View File

@ -17,6 +17,9 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
def riscv_rolw : SDNode<"RISCVISD::ROLW", SDTIntShiftOp>;
def riscv_rorw : SDNode<"RISCVISD::RORW", SDTIntShiftOp>;
def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
let Name = "UImmLog2XLenHalf";
let RenderMethod = "addImmOperands";
@ -655,7 +658,6 @@ def SROIPat : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
def SLOIWPat : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
def SROIWPat : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
def RORIWPat : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
let Predicates = [HasStdExtZbbOrZbp] in {
def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
@ -724,17 +726,11 @@ def : Pat<(riscv_gorci GPR:$rs1, timm:$shamt), (GORCI GPR:$rs1, timm:$shamt)>;
let Predicates = [HasStdExtZbp, IsRV32] in {
def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>;
// FIXME: Is grev better than rori?
def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
def : Pat<(rotr GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>;
def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>;
} // Predicates = [HasStdExtZbp, IsRV32]
let Predicates = [HasStdExtZbp, IsRV64] in {
// FIXME: Is grev better than rori?
def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
def : Pat<(rotr GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
} // Predicates = [HasStdExtZbp, IsRV64]
@ -890,12 +886,14 @@ def : Pat<(not (riscv_srlw (not GPR:$rs1), GPR:$rs2)),
} // Predicates = [HasStdExtZbb, IsRV64]
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
def : Pat<(or (riscv_sllw GPR:$rs1, GPR:$rs2),
(riscv_srlw GPR:$rs1, (ineg GPR:$rs2))),
def : Pat<(riscv_rolw GPR:$rs1, GPR:$rs2),
(ROLW GPR:$rs1, GPR:$rs2)>;
def : Pat<(or (riscv_sllw GPR:$rs1, (ineg GPR:$rs2)),
(riscv_srlw GPR:$rs1, GPR:$rs2)),
def : Pat<(riscv_rorw GPR:$rs1, GPR:$rs2),
(RORW GPR:$rs1, GPR:$rs2)>;
def : Pat<(riscv_rorw GPR:$rs1, uimm5:$rs2),
(RORIW GPR:$rs1, uimm5:$rs2)>;
def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
(RORIW GPR:$rs1, (ImmROTL2RW uimm5:$rs2))>;
} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
let Predicates = [HasStdExtZbs, IsRV64] in {
@ -916,10 +914,6 @@ def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
(SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
} // Predicates = [HasStdExtZbb, IsRV64]
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt),
(RORIW GPR:$rs1, uimmlog2xlen:$shamt)>;
let Predicates = [HasStdExtZbp, IsRV64] in {
def : Pat<(riscv_greviw GPR:$rs1, timm:$shamt), (GREVIW GPR:$rs1, timm:$shamt)>;
def : Pat<(riscv_gorciw GPR:$rs1, timm:$shamt), (GORCIW GPR:$rs1, timm:$shamt)>;

View File

@ -1126,12 +1126,12 @@ define i32 @grev16_i32(i32 %a) nounwind {
;
; RV32IB-LABEL: grev16_i32:
; RV32IB: # %bb.0:
; RV32IB-NEXT: rev16 a0, a0
; RV32IB-NEXT: rori a0, a0, 16
; RV32IB-NEXT: ret
;
; RV32IBP-LABEL: grev16_i32:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: rev16 a0, a0
; RV32IBP-NEXT: rori a0, a0, 16
; RV32IBP-NEXT: ret
%shl = shl i32 %a, 16
%shr = lshr i32 %a, 16
@ -1152,12 +1152,12 @@ define signext i32 @grev16_i32_fshl(i32 signext %a) nounwind {
;
; RV32IB-LABEL: grev16_i32_fshl:
; RV32IB: # %bb.0:
; RV32IB-NEXT: rev16 a0, a0
; RV32IB-NEXT: rori a0, a0, 16
; RV32IB-NEXT: ret
;
; RV32IBP-LABEL: grev16_i32_fshl:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: rev16 a0, a0
; RV32IBP-NEXT: rori a0, a0, 16
; RV32IBP-NEXT: ret
%or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 16)
ret i32 %or
@ -1173,12 +1173,12 @@ define signext i32 @grev16_i32_fshr(i32 signext %a) nounwind {
;
; RV32IB-LABEL: grev16_i32_fshr:
; RV32IB: # %bb.0:
; RV32IB-NEXT: rev16 a0, a0
; RV32IB-NEXT: rori a0, a0, 16
; RV32IB-NEXT: ret
;
; RV32IBP-LABEL: grev16_i32_fshr:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: rev16 a0, a0
; RV32IBP-NEXT: rori a0, a0, 16
; RV32IBP-NEXT: ret
%or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 16)
ret i32 %or
@ -1197,14 +1197,14 @@ define i64 @grev16_i64(i64 %a) nounwind {
;
; RV32IB-LABEL: grev16_i64:
; RV32IB: # %bb.0:
; RV32IB-NEXT: rev16 a0, a0
; RV32IB-NEXT: rev16 a1, a1
; RV32IB-NEXT: rori a0, a0, 16
; RV32IB-NEXT: rori a1, a1, 16
; RV32IB-NEXT: ret
;
; RV32IBP-LABEL: grev16_i64:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: rev16 a0, a0
; RV32IBP-NEXT: rev16 a1, a1
; RV32IBP-NEXT: rori a0, a0, 16
; RV32IBP-NEXT: rori a1, a1, 16
; RV32IBP-NEXT: ret
%and = shl i64 %a, 16
%shl = and i64 %and, -281470681808896

View File

@ -374,7 +374,6 @@ define signext i32 @rori_i32_fshl(i32 signext %a) nounwind {
}
; Similar to rori_i32_fshl, but doesn't sign extend the result.
; FIXME: We should be using RORIW, but we need a sext_inreg.
define void @rori_i32_fshl_nosext(i32 signext %a, i32* %x) nounwind {
; RV64I-LABEL: rori_i32_fshl_nosext:
; RV64I: # %bb.0:
@ -386,25 +385,19 @@ define void @rori_i32_fshl_nosext(i32 signext %a, i32* %x) nounwind {
;
; RV64IB-LABEL: rori_i32_fshl_nosext:
; RV64IB: # %bb.0:
; RV64IB-NEXT: srliw a2, a0, 1
; RV64IB-NEXT: slli a0, a0, 31
; RV64IB-NEXT: or a0, a0, a2
; RV64IB-NEXT: roriw a0, a0, 1
; RV64IB-NEXT: sw a0, 0(a1)
; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: rori_i32_fshl_nosext:
; RV64IBB: # %bb.0:
; RV64IBB-NEXT: srliw a2, a0, 1
; RV64IBB-NEXT: slli a0, a0, 31
; RV64IBB-NEXT: or a0, a0, a2
; RV64IBB-NEXT: roriw a0, a0, 1
; RV64IBB-NEXT: sw a0, 0(a1)
; RV64IBB-NEXT: ret
;
; RV64IBP-LABEL: rori_i32_fshl_nosext:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: srliw a2, a0, 1
; RV64IBP-NEXT: slli a0, a0, 31
; RV64IBP-NEXT: or a0, a0, a2
; RV64IBP-NEXT: roriw a0, a0, 1
; RV64IBP-NEXT: sw a0, 0(a1)
; RV64IBP-NEXT: ret
%1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
@ -440,7 +433,6 @@ define signext i32 @rori_i32_fshr(i32 signext %a) nounwind {
}
; Similar to rori_i32_fshr, but doesn't sign extend the result.
; FIXME: We should be using RORIW, but we need a sext_inreg.
define void @rori_i32_fshr_nosext(i32 signext %a, i32* %x) nounwind {
; RV64I-LABEL: rori_i32_fshr_nosext:
; RV64I: # %bb.0:
@ -452,25 +444,19 @@ define void @rori_i32_fshr_nosext(i32 signext %a, i32* %x) nounwind {
;
; RV64IB-LABEL: rori_i32_fshr_nosext:
; RV64IB: # %bb.0:
; RV64IB-NEXT: slli a2, a0, 1
; RV64IB-NEXT: srliw a0, a0, 31
; RV64IB-NEXT: or a0, a0, a2
; RV64IB-NEXT: roriw a0, a0, 31
; RV64IB-NEXT: sw a0, 0(a1)
; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: rori_i32_fshr_nosext:
; RV64IBB: # %bb.0:
; RV64IBB-NEXT: slli a2, a0, 1
; RV64IBB-NEXT: srliw a0, a0, 31
; RV64IBB-NEXT: or a0, a0, a2
; RV64IBB-NEXT: roriw a0, a0, 31
; RV64IBB-NEXT: sw a0, 0(a1)
; RV64IBB-NEXT: ret
;
; RV64IBP-LABEL: rori_i32_fshr_nosext:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: slli a2, a0, 1
; RV64IBP-NEXT: srliw a0, a0, 31
; RV64IBP-NEXT: or a0, a0, a2
; RV64IBP-NEXT: roriw a0, a0, 31
; RV64IBP-NEXT: sw a0, 0(a1)
; RV64IBP-NEXT: ret
%1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)

View File

@ -1377,12 +1377,12 @@ define i64 @grev32(i64 %a) nounwind {
;
; RV64IB-LABEL: grev32:
; RV64IB: # %bb.0:
; RV64IB-NEXT: rev32 a0, a0
; RV64IB-NEXT: rori a0, a0, 32
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: grev32:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: rev32 a0, a0
; RV64IBP-NEXT: rori a0, a0, 32
; RV64IBP-NEXT: ret
%shl = shl i64 %a, 32
%shr = lshr i64 %a, 32
@ -1403,12 +1403,12 @@ define i64 @grev32_fshl(i64 %a) nounwind {
;
; RV64IB-LABEL: grev32_fshl:
; RV64IB: # %bb.0:
; RV64IB-NEXT: rev32 a0, a0
; RV64IB-NEXT: rori a0, a0, 32
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: grev32_fshl:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: rev32 a0, a0
; RV64IBP-NEXT: rori a0, a0, 32
; RV64IBP-NEXT: ret
%or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 32)
ret i64 %or
@ -1424,12 +1424,12 @@ define i64 @grev32_fshr(i64 %a) nounwind {
;
; RV64IB-LABEL: grev32_fshr:
; RV64IB: # %bb.0:
; RV64IB-NEXT: rev32 a0, a0
; RV64IB-NEXT: rori a0, a0, 32
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: grev32_fshr:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: rev32 a0, a0
; RV64IBP-NEXT: rori a0, a0, 32
; RV64IBP-NEXT: ret
%or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 32)
ret i64 %or