1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 13:11:39 +01:00

[RISCV] Add missing patterns for rotr with immediate for Zbb/Zbp extensions.

DAGCombine doesn't canonicalize rotl/rotr with immediate so we
need patterns for both.

Remove the custom matcher for rotl to RORI and just use a SDNodeXForm
to convert the immediate instead. Doing this gives priority to the
rev32/rev16 versions of grevi over rori since an explicit immediate
is more precise than any immediate. I also added rotr patterns for
rev32/rev16. And removed the (or (shl), (shr)) patterns that should be
combined to rotl by DAG combine.

There is at least one other grev pattern that probably needs a
another rotr pattern, but we need more test coverage first.

Differential Revision: https://reviews.llvm.org/D90575
This commit is contained in:
Craig Topper 2020-11-03 09:33:06 -08:00
parent 95f20bec93
commit b3e56d6425
7 changed files with 39 additions and 79 deletions

View File

@ -284,44 +284,6 @@ bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) {
return false;
}
// Check that it is a RORI (Rotate Right Immediate). We first check that
// it is the right node tree:
//
// (ROTL RS1, VC)
//
// The compiler translates immediate rotations to the right given by the call
// to the rotateright32/rotateright64 intrinsics as rotations to the left.
// Since the rotation to the left can be easily emulated as a rotation to the
// right by negating the constant, there is no encoding for ROLI.
// We then select the immediate left rotations as RORI by the complementary
// constant:
//
// Shamt == XLen - VC
bool RISCVDAGToDAGISel::SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt) {
MVT XLenVT = Subtarget->getXLenVT();
if (N.getOpcode() == ISD::ROTL) {
if (isa<ConstantSDNode>(N.getOperand(1))) {
if (XLenVT == MVT::i64) {
uint64_t VC = N.getConstantOperandVal(1);
Shamt = CurDAG->getTargetConstant((64 - VC), SDLoc(N),
N.getOperand(1).getValueType());
RS1 = N.getOperand(0);
return true;
}
if (XLenVT == MVT::i32) {
uint32_t VC = N.getConstantOperandVal(1);
Shamt = CurDAG->getTargetConstant((32 - VC), SDLoc(N),
N.getOperand(1).getValueType());
RS1 = N.getOperand(0);
return true;
}
}
}
return false;
}
// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
// on RV64).
// SLLIUW is the same as SLLI except for the fact that it clears the bits

View File

@ -47,7 +47,6 @@ public:
bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);

View File

@ -46,6 +46,14 @@ def ImmSub32 : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
// Convert rotl immediate to a rotr immediate.
def ImmROTL2R : SDNodeXForm<imm, [{
uint64_t XLen = Subtarget->getXLen();
return CurDAG->getTargetConstant(XLen - N->getZExtValue(), SDLoc(N),
N->getValueType(0));
}]>;
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@ -644,7 +652,6 @@ def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
//===----------------------------------------------------------------------===//
def SLOIPat : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>;
def SROIPat : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
def RORIPat : ComplexPattern<XLenVT, 2, "SelectRORI", [rotl]>;
def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
def SLOIWPat : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
def SROIWPat : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
@ -709,10 +716,12 @@ def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
// There's no encoding for roli in the current version of the 'B' extension
// (v0.92) as it can be implemented with rori by negating the immediate.
// For this reason we pattern-match only against rori[w].
let Predicates = [HasStdExtZbbOrZbp] in
def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt),
let Predicates = [HasStdExtZbbOrZbp] in {
def : Pat<(rotr GPR:$rs1, uimmlog2xlen:$shamt),
(RORI GPR:$rs1, uimmlog2xlen:$shamt)>;
def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
(RORI GPR:$rs1, (ImmROTL2R uimmlog2xlen:$shamt))>;
}
// We don't pattern-match sbclri[w], sbseti[w], sbinvi[w] because they are
// pattern-matched by simple andi, ori, and xori.
@ -778,9 +787,9 @@ def : Pat<(or (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00)),
(and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF))),
(GREVI GPR:$rs1, (i32 8))>;
def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>;
def : Pat<(or (shl GPR:$rs1, (i32 16)), (srl GPR:$rs1, (i32 16))),
(GREVI GPR:$rs1, (i32 16))>;
// FIXME: Is grev better than rori?
def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
def : Pat<(rotr GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>;
def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>;
} // Predicates = [HasStdExtZbp, IsRV32]
@ -801,9 +810,9 @@ def : Pat<(or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00)),
def : Pat<(or (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000)),
(and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF))),
(GREVI GPR:$rs1, (i64 16))>;
def : Pat<(or (shl GPR:$rs1, (i64 32)), (srl GPR:$rs1, (i64 32))),
(GREVI GPR:$rs1, (i64 32))>;
// FIXME: Is grev better than rori?
def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
def : Pat<(rotr GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
} // Predicates = [HasStdExtZbp, IsRV64]

View File

@ -663,20 +663,17 @@ define i32 @rori_i32_fshr(i32 %a) nounwind {
;
; RV32IB-LABEL: rori_i32_fshr:
; RV32IB: # %bb.0:
; RV32IB-NEXT: addi a1, zero, 31
; RV32IB-NEXT: ror a0, a0, a1
; RV32IB-NEXT: rori a0, a0, 31
; RV32IB-NEXT: ret
;
; RV32IBB-LABEL: rori_i32_fshr:
; RV32IBB: # %bb.0:
; RV32IBB-NEXT: addi a1, zero, 31
; RV32IBB-NEXT: ror a0, a0, a1
; RV32IBB-NEXT: rori a0, a0, 31
; RV32IBB-NEXT: ret
;
; RV32IBP-LABEL: rori_i32_fshr:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: addi a1, zero, 31
; RV32IBP-NEXT: ror a0, a0, a1
; RV32IBP-NEXT: rori a0, a0, 31
; RV32IBP-NEXT: ret
%1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
ret i32 %1

View File

@ -646,12 +646,12 @@ define i32 @grev16_i32(i32 %a) nounwind {
;
; RV32IB-LABEL: grev16_i32:
; RV32IB: # %bb.0:
; RV32IB-NEXT: rori a0, a0, 16
; RV32IB-NEXT: rev16 a0, a0
; RV32IB-NEXT: ret
;
; RV32IBP-LABEL: grev16_i32:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: rori a0, a0, 16
; RV32IBP-NEXT: rev16 a0, a0
; RV32IBP-NEXT: ret
%shl = shl i32 %a, 16
%shr = lshr i32 %a, 16
@ -672,12 +672,12 @@ define signext i32 @grev16_i32_fshl(i32 signext %a) nounwind {
;
; RV32IB-LABEL: grev16_i32_fshl:
; RV32IB: # %bb.0:
; RV32IB-NEXT: rori a0, a0, 16
; RV32IB-NEXT: rev16 a0, a0
; RV32IB-NEXT: ret
;
; RV32IBP-LABEL: grev16_i32_fshl:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: rori a0, a0, 16
; RV32IBP-NEXT: rev16 a0, a0
; RV32IBP-NEXT: ret
%or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 16)
ret i32 %or
@ -693,14 +693,12 @@ define signext i32 @grev16_i32_fshr(i32 signext %a) nounwind {
;
; RV32IB-LABEL: grev16_i32_fshr:
; RV32IB: # %bb.0:
; RV32IB-NEXT: addi a1, zero, 16
; RV32IB-NEXT: ror a0, a0, a1
; RV32IB-NEXT: rev16 a0, a0
; RV32IB-NEXT: ret
;
; RV32IBP-LABEL: grev16_i32_fshr:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: addi a1, zero, 16
; RV32IBP-NEXT: ror a0, a0, a1
; RV32IBP-NEXT: rev16 a0, a0
; RV32IBP-NEXT: ret
%or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 16)
ret i32 %or
@ -719,14 +717,14 @@ define i64 @grev16_i64(i64 %a) nounwind {
;
; RV32IB-LABEL: grev16_i64:
; RV32IB: # %bb.0:
; RV32IB-NEXT: rori a0, a0, 16
; RV32IB-NEXT: rori a1, a1, 16
; RV32IB-NEXT: rev16 a0, a0
; RV32IB-NEXT: rev16 a1, a1
; RV32IB-NEXT: ret
;
; RV32IBP-LABEL: grev16_i64:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: rori a0, a0, 16
; RV32IBP-NEXT: rori a1, a1, 16
; RV32IBP-NEXT: rev16 a0, a0
; RV32IBP-NEXT: rev16 a1, a1
; RV32IBP-NEXT: ret
%and = shl i64 %a, 16
%shl = and i64 %and, -281470681808896

View File

@ -456,20 +456,17 @@ define i64 @rori_i64_fshr(i64 %a) nounwind {
;
; RV64IB-LABEL: rori_i64_fshr:
; RV64IB: # %bb.0:
; RV64IB-NEXT: addi a1, zero, 63
; RV64IB-NEXT: ror a0, a0, a1
; RV64IB-NEXT: rori a0, a0, 63
; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: rori_i64_fshr:
; RV64IBB: # %bb.0:
; RV64IBB-NEXT: addi a1, zero, 63
; RV64IBB-NEXT: ror a0, a0, a1
; RV64IBB-NEXT: rori a0, a0, 63
; RV64IBB-NEXT: ret
;
; RV64IBP-LABEL: rori_i64_fshr:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: addi a1, zero, 63
; RV64IBP-NEXT: ror a0, a0, a1
; RV64IBP-NEXT: rori a0, a0, 63
; RV64IBP-NEXT: ret
%1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 63)
ret i64 %1

View File

@ -840,12 +840,12 @@ define i64 @grev32(i64 %a) nounwind {
;
; RV64IB-LABEL: grev32:
; RV64IB: # %bb.0:
; RV64IB-NEXT: rori a0, a0, 32
; RV64IB-NEXT: rev32 a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: grev32:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: rori a0, a0, 32
; RV64IBP-NEXT: rev32 a0, a0
; RV64IBP-NEXT: ret
%shl = shl i64 %a, 32
%shr = lshr i64 %a, 32
@ -866,12 +866,12 @@ define i64 @grev32_fshl(i64 %a) nounwind {
;
; RV64IB-LABEL: grev32_fshl:
; RV64IB: # %bb.0:
; RV64IB-NEXT: rori a0, a0, 32
; RV64IB-NEXT: rev32 a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: grev32_fshl:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: rori a0, a0, 32
; RV64IBP-NEXT: rev32 a0, a0
; RV64IBP-NEXT: ret
%or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 32)
ret i64 %or
@ -887,14 +887,12 @@ define i64 @grev32_fshr(i64 %a) nounwind {
;
; RV64IB-LABEL: grev32_fshr:
; RV64IB: # %bb.0:
; RV64IB-NEXT: addi a1, zero, 32
; RV64IB-NEXT: ror a0, a0, a1
; RV64IB-NEXT: rev32 a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: grev32_fshr:
; RV64IBP: # %bb.0:
; RV64IBP-NEXT: addi a1, zero, 32
; RV64IBP-NEXT: ror a0, a0, a1
; RV64IBP-NEXT: rev32 a0, a0
; RV64IBP-NEXT: ret
%or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 32)
ret i64 %or