[RISCV] Add missing patterns for rotr with immediate for Zbb/Zbp extensions.

DAGCombine doesn't canonicalize rotl/rotr with immediate so we need patterns for both. Remove the custom matcher for rotl to RORI and just use a SDNodeXForm to convert the immediate instead. Doing this gives priority to the rev32/rev16 versions of grevi over rori since an explicit immediate is more precise than any immediate. I also added rotr patterns for rev32/rev16. And removed the (or (shl), (shr)) patterns that should be combined to rotl by DAG combine. There is at least one other grev pattern that probably needs a another rotr pattern, but we need more test coverage first. Differential Revision: https://reviews.llvm.org/D90575
2025-02-01 13:11:39 +01:00 · 2020-11-03 09:33:06 -08:00 · 2020-11-03 09:33:06 -08:00 · b3e56d6425
commit b3e56d6425
parent 95f20bec93
7 changed files with 39 additions and 79 deletions
--- a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@ -284,44 +284,6 @@ bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) {
  return false;
 }

-// Check that it is a RORI (Rotate Right Immediate). We first check that
-// it is the right node tree:
-//
-//  (ROTL RS1, VC)
-//
-// The compiler translates immediate rotations to the right given by the call
-// to the rotateright32/rotateright64 intrinsics as rotations to the left.
-// Since the rotation to the left can be easily emulated as a rotation to the
-// right by negating the constant, there is no encoding for ROLI.
-// We then select the immediate left rotations as RORI by the complementary
-// constant:
-//
-//  Shamt == XLen - VC
-
-bool RISCVDAGToDAGISel::SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt) {
-  MVT XLenVT = Subtarget->getXLenVT();
-  if (N.getOpcode() == ISD::ROTL) {
-    if (isa<ConstantSDNode>(N.getOperand(1))) {
-      if (XLenVT == MVT::i64) {
-        uint64_t VC = N.getConstantOperandVal(1);
-        Shamt = CurDAG->getTargetConstant((64 - VC), SDLoc(N),
-                                          N.getOperand(1).getValueType());
-        RS1 = N.getOperand(0);
-        return true;
-      }
-      if (XLenVT == MVT::i32) {
-        uint32_t VC = N.getConstantOperandVal(1);
-        Shamt = CurDAG->getTargetConstant((32 - VC), SDLoc(N),
-                                          N.getOperand(1).getValueType());
-        RS1 = N.getOperand(0);
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-
 // Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
 // on RV64).
 // SLLIUW is the same as SLLI except for the fact that it clears the bits
--- a/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/lib/Target/RISCV/RISCVISelDAGToDAG.h
@ -47,7 +47,6 @@ public:

  bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt);
  bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt);
-  bool SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt);
  bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
  bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
  bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
--- a/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/lib/Target/RISCV/RISCVInstrInfoB.td
@ -46,6 +46,14 @@ def ImmSub32 : SDNodeXForm<imm, [{
                                   N->getValueType(0));
 }]>;

+
+// Convert rotl immediate to a rotr immediate.
+def ImmROTL2R : SDNodeXForm<imm, [{
+  uint64_t XLen = Subtarget->getXLen();
+  return CurDAG->getTargetConstant(XLen - N->getZExtValue(), SDLoc(N),
+                                   N->getValueType(0));
+}]>;
+
 //===----------------------------------------------------------------------===//
 // Instruction class templates
 //===----------------------------------------------------------------------===//
@ -644,7 +652,6 @@ def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
 //===----------------------------------------------------------------------===//
 def SLOIPat   : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>;
 def SROIPat   : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
-def RORIPat   : ComplexPattern<XLenVT, 2, "SelectRORI", [rotl]>;
 def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
 def SLOIWPat  : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
 def SROIWPat  : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
@ -709,10 +716,12 @@ def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),

 // There's no encoding for roli in the current version of the 'B' extension
 // (v0.92) as it can be implemented with rori by negating the immediate.
-// For this reason we pattern-match only against rori[w].
-let Predicates = [HasStdExtZbbOrZbp] in
-def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt),
+let Predicates = [HasStdExtZbbOrZbp] in {
+def : Pat<(rotr GPR:$rs1, uimmlog2xlen:$shamt),
          (RORI GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
+          (RORI GPR:$rs1, (ImmROTL2R uimmlog2xlen:$shamt))>;
+}

 // We don't pattern-match sbclri[w], sbseti[w], sbinvi[w] because they are
 // pattern-matched by simple andi, ori, and xori.
@ -778,9 +787,9 @@ def : Pat<(or (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00)),
              (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF))),
          (GREVI GPR:$rs1, (i32 8))>;
 def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>;
-def : Pat<(or (shl GPR:$rs1, (i32 16)), (srl GPR:$rs1, (i32 16))),
-          (GREVI GPR:$rs1, (i32 16))>;
+// FIXME: Is grev better than rori?
 def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
+def : Pat<(rotr GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
 def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>;
 def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>;
 } // Predicates = [HasStdExtZbp, IsRV32]
@ -801,9 +810,9 @@ def : Pat<(or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00)),
 def : Pat<(or (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000)),
              (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF))),
          (GREVI GPR:$rs1, (i64 16))>;
-def : Pat<(or (shl GPR:$rs1, (i64 32)), (srl GPR:$rs1, (i64 32))),
-          (GREVI GPR:$rs1, (i64 32))>;
+// FIXME: Is grev better than rori?
 def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
+def : Pat<(rotr GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
 def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
 def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
 } // Predicates = [HasStdExtZbp, IsRV64]
--- a/test/CodeGen/RISCV/rv32Zbbp.ll
+++ b/test/CodeGen/RISCV/rv32Zbbp.ll
@ -663,20 +663,17 @@ define i32 @rori_i32_fshr(i32 %a) nounwind {
 ;
 ; RV32IB-LABEL: rori_i32_fshr:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    addi a1, zero, 31
-; RV32IB-NEXT:    ror a0, a0, a1
+; RV32IB-NEXT:    rori a0, a0, 31
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBB-LABEL: rori_i32_fshr:
 ; RV32IBB:       # %bb.0:
-; RV32IBB-NEXT:    addi a1, zero, 31
-; RV32IBB-NEXT:    ror a0, a0, a1
+; RV32IBB-NEXT:    rori a0, a0, 31
 ; RV32IBB-NEXT:    ret
 ;
 ; RV32IBP-LABEL: rori_i32_fshr:
 ; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    addi a1, zero, 31
-; RV32IBP-NEXT:    ror a0, a0, a1
+; RV32IBP-NEXT:    rori a0, a0, 31
 ; RV32IBP-NEXT:    ret
  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
  ret i32 %1
--- a/test/CodeGen/RISCV/rv32Zbp.ll
+++ b/test/CodeGen/RISCV/rv32Zbp.ll
@ -646,12 +646,12 @@ define i32 @grev16_i32(i32 %a) nounwind {
 ;
 ; RV32IB-LABEL: grev16_i32:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    rori a0, a0, 16
+; RV32IB-NEXT:    rev16 a0, a0
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBP-LABEL: grev16_i32:
 ; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    rori a0, a0, 16
+; RV32IBP-NEXT:    rev16 a0, a0
 ; RV32IBP-NEXT:    ret
  %shl = shl i32 %a, 16
  %shr = lshr i32 %a, 16
@ -672,12 +672,12 @@ define signext i32 @grev16_i32_fshl(i32 signext %a) nounwind {
 ;
 ; RV32IB-LABEL: grev16_i32_fshl:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    rori a0, a0, 16
+; RV32IB-NEXT:    rev16 a0, a0
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBP-LABEL: grev16_i32_fshl:
 ; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    rori a0, a0, 16
+; RV32IBP-NEXT:    rev16 a0, a0
 ; RV32IBP-NEXT:    ret
  %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 16)
  ret i32 %or
@ -693,14 +693,12 @@ define signext i32 @grev16_i32_fshr(i32 signext %a) nounwind {
 ;
 ; RV32IB-LABEL: grev16_i32_fshr:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    addi a1, zero, 16
-; RV32IB-NEXT:    ror a0, a0, a1
+; RV32IB-NEXT:    rev16 a0, a0
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBP-LABEL: grev16_i32_fshr:
 ; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    addi a1, zero, 16
-; RV32IBP-NEXT:    ror a0, a0, a1
+; RV32IBP-NEXT:    rev16 a0, a0
 ; RV32IBP-NEXT:    ret
  %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 16)
  ret i32 %or
@ -719,14 +717,14 @@ define i64 @grev16_i64(i64 %a) nounwind {
 ;
 ; RV32IB-LABEL: grev16_i64:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    rori a0, a0, 16
-; RV32IB-NEXT:    rori a1, a1, 16
+; RV32IB-NEXT:    rev16 a0, a0
+; RV32IB-NEXT:    rev16 a1, a1
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBP-LABEL: grev16_i64:
 ; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    rori a0, a0, 16
-; RV32IBP-NEXT:    rori a1, a1, 16
+; RV32IBP-NEXT:    rev16 a0, a0
+; RV32IBP-NEXT:    rev16 a1, a1
 ; RV32IBP-NEXT:    ret
  %and = shl i64 %a, 16
  %shl = and i64 %and, -281470681808896
--- a/test/CodeGen/RISCV/rv64Zbbp.ll
+++ b/test/CodeGen/RISCV/rv64Zbbp.ll
@ -456,20 +456,17 @@ define i64 @rori_i64_fshr(i64 %a) nounwind {
 ;
 ; RV64IB-LABEL: rori_i64_fshr:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    addi a1, zero, 63
-; RV64IB-NEXT:    ror a0, a0, a1
+; RV64IB-NEXT:    rori a0, a0, 63
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBB-LABEL: rori_i64_fshr:
 ; RV64IBB:       # %bb.0:
-; RV64IBB-NEXT:    addi a1, zero, 63
-; RV64IBB-NEXT:    ror a0, a0, a1
+; RV64IBB-NEXT:    rori a0, a0, 63
 ; RV64IBB-NEXT:    ret
 ;
 ; RV64IBP-LABEL: rori_i64_fshr:
 ; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    addi a1, zero, 63
-; RV64IBP-NEXT:    ror a0, a0, a1
+; RV64IBP-NEXT:    rori a0, a0, 63
 ; RV64IBP-NEXT:    ret
  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 63)
  ret i64 %1
--- a/test/CodeGen/RISCV/rv64Zbp.ll
+++ b/test/CodeGen/RISCV/rv64Zbp.ll
@ -840,12 +840,12 @@ define i64 @grev32(i64 %a) nounwind {
 ;
 ; RV64IB-LABEL: grev32:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    rori a0, a0, 32
+; RV64IB-NEXT:    rev32 a0, a0
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBP-LABEL: grev32:
 ; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    rori a0, a0, 32
+; RV64IBP-NEXT:    rev32 a0, a0
 ; RV64IBP-NEXT:    ret
  %shl = shl i64 %a, 32
  %shr = lshr i64 %a, 32
@ -866,12 +866,12 @@ define i64 @grev32_fshl(i64 %a) nounwind {
 ;
 ; RV64IB-LABEL: grev32_fshl:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    rori a0, a0, 32
+; RV64IB-NEXT:    rev32 a0, a0
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBP-LABEL: grev32_fshl:
 ; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    rori a0, a0, 32
+; RV64IBP-NEXT:    rev32 a0, a0
 ; RV64IBP-NEXT:    ret
  %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 32)
  ret i64 %or
@ -887,14 +887,12 @@ define i64 @grev32_fshr(i64 %a) nounwind {
 ;
 ; RV64IB-LABEL: grev32_fshr:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    addi a1, zero, 32
-; RV64IB-NEXT:    ror a0, a0, a1
+; RV64IB-NEXT:    rev32 a0, a0
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBP-LABEL: grev32_fshr:
 ; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    addi a1, zero, 32
-; RV64IBP-NEXT:    ror a0, a0, a1
+; RV64IBP-NEXT:    rev32 a0, a0
 ; RV64IBP-NEXT:    ret
  %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 32)
  ret i64 %or