[RISCV] Copy isUnneededShiftMask from X86.

In d2927f786e877410d90c1e6f0e0c7d99524529c5, I added patterns to remove (and X, 31) from sllw/srlw/sraw shift amounts. There is code in SelectionDAGISel.cpp that knows to use computeKnownBits to fill in bits of the mask that were removed by SimplifyDemandedBits based on bits being known zero. The non-W shift patterns use immbottomxlenset which allows the mask to have more than log2(xlen) trailing ones, but doesn't have a call to computeKnownBits to fill in bits of the mask that may have been cleared by SimplifyDemandedBits. This patch copies code from X86 to handle more than log2(xlen) bottom bits set and uses computeKnownBits to fill in missing bits before counting. Reviewed By: luismarques Differential Revision: https://reviews.llvm.org/D95422
2025-01-31 12:41:49 +01:00 · 2021-01-27 20:36:21 -08:00 · 2021-01-27 20:36:21 -08:00 · ae2c374959
commit ae2c374959
parent 78c4743b21
6 changed files with 401 additions and 308 deletions
--- a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@ -17,6 +17,7 @@
 #include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"

@ -821,6 +822,21 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
  return false;
 }

+// Helper to detect unneeded and instructions on shift amounts. Called
+// from PatFrags in tablegen.
+bool RISCVDAGToDAGISel::isUnneededShiftMask(SDNode *N, unsigned Width) const {
+  assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
+  assert(Width >= 5 && N->getValueSizeInBits(0) >= (1 << Width) &&
+         "Unexpected width");
+  const APInt &Val = N->getConstantOperandAPInt(1);
+
+  if (Val.countTrailingOnes() >= Width)
+    return true;
+
+  APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
+  return Mask.countTrailingOnes() >= Width;
+}
+
 // Match (srl (and val, mask), imm) where the result would be a
 // zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
 // is equivalent to this (SimplifyDemandedBits may have removed lower bits
--- a/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/lib/Target/RISCV/RISCVISelDAGToDAG.h
@ -45,6 +45,8 @@ public:

  bool SelectAddrFI(SDValue Addr, SDValue &Base);

+  bool isUnneededShiftMask(SDNode *N, unsigned Width) const;
+
  bool MatchSRLIW(SDNode *N) const;
  bool MatchSLOI(SDNode *N) const;
  bool MatchSROI(SDNode *N) const;
--- a/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/lib/Target/RISCV/RISCVInstrInfo.td
@ -289,12 +289,6 @@ def ixlenimm_li : Operand<XLenVT> {
 // Standalone (codegen-only) immleaf patterns.
 def simm32     : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
 def simm32hi20 : ImmLeaf<XLenVT, [{return isShiftedInt<20, 12>(Imm);}]>;
-// A mask value that won't affect significant shift bits.
-def immbottomxlenset : ImmLeaf<XLenVT, [{
-  if (Subtarget->is64Bit())
-    return countTrailingOnes<uint64_t>(Imm) >= 6;
-  return countTrailingOnes<uint64_t>(Imm) >= 5;
-}]>;

 // A 6-bit constant greater than 32.
 def uimm6gt32 : ImmLeaf<XLenVT, [{
@ -901,14 +895,21 @@ def : PatGprUimmLog2XLen<sra, SRAI>;
 // typically introduced when the legalizer promotes the shift amount and
 // zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base
 // ISA only read the least significant 5 bits (RV32I) or 6 bits (RV64I).
+def shiftMaskXLen : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+  return isUnneededShiftMask(N, Subtarget->is64Bit() ? 6 : 5);
+}]>;
+def shiftMask32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+  return isUnneededShiftMask(N, 5);
+}]>;
+
 class shiftop<SDPatternOperator operator>
    : PatFrags<(ops node:$val, node:$count),
               [(operator node:$val, node:$count),
-                (operator node:$val, (and node:$count, immbottomxlenset))]>;
+                (operator node:$val, (shiftMaskXLen node:$count))]>;
 class shiftopw<SDPatternOperator operator>
    : PatFrags<(ops node:$val, node:$count),
               [(operator node:$val, node:$count),
-                (operator node:$val, (and node:$count, (XLenVT 31)))]>;
+                (operator node:$val, (shiftMask32 node:$count))]>;

 def : PatGprGpr<shiftop<shl>, SLL>;
 def : PatGprGpr<shiftop<srl>, SRL>;
--- a/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/test/CodeGen/RISCV/atomic-cmpxchg.ll
@ -26,7 +26,6 @@ define void @cmpxchg_i8_monotonic_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    addi a4, zero, 255
 ; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
@ -103,7 +102,6 @@ define void @cmpxchg_i8_acquire_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    addi a4, zero, 255
 ; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
@ -180,7 +178,6 @@ define void @cmpxchg_i8_acquire_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    addi a4, zero, 255
 ; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
@ -257,7 +254,6 @@ define void @cmpxchg_i8_release_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    addi a4, zero, 255
 ; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
@ -334,7 +330,6 @@ define void @cmpxchg_i8_release_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    addi a4, zero, 255
 ; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
@ -411,7 +406,6 @@ define void @cmpxchg_i8_acq_rel_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    addi a4, zero, 255
 ; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
@ -488,7 +482,6 @@ define void @cmpxchg_i8_acq_rel_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    addi a4, zero, 255
 ; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
@ -565,7 +558,6 @@ define void @cmpxchg_i8_seq_cst_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    addi a4, zero, 255
 ; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
@ -642,7 +634,6 @@ define void @cmpxchg_i8_seq_cst_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    addi a4, zero, 255
 ; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
@ -719,7 +710,6 @@ define void @cmpxchg_i8_seq_cst_seq_cst(i8* %ptr, i8 %cmp, i8 %val) nounwind {
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    addi a4, zero, 255
 ; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
@ -796,7 +786,6 @@ define void @cmpxchg_i16_monotonic_monotonic(i16* %ptr, i16 %cmp, i16 %val) noun
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
 ; RV32IA-NEXT:    sll a5, a4, a0
@ -875,7 +864,6 @@ define void @cmpxchg_i16_acquire_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
 ; RV32IA-NEXT:    sll a5, a4, a0
@ -954,7 +942,6 @@ define void @cmpxchg_i16_acquire_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
 ; RV32IA-NEXT:    sll a5, a4, a0
@ -1033,7 +1020,6 @@ define void @cmpxchg_i16_release_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
 ; RV32IA-NEXT:    sll a5, a4, a0
@ -1112,7 +1098,6 @@ define void @cmpxchg_i16_release_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
 ; RV32IA-NEXT:    sll a5, a4, a0
@ -1191,7 +1176,6 @@ define void @cmpxchg_i16_acq_rel_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
 ; RV32IA-NEXT:    sll a5, a4, a0
@ -1270,7 +1254,6 @@ define void @cmpxchg_i16_acq_rel_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
 ; RV32IA-NEXT:    sll a5, a4, a0
@ -1349,7 +1332,6 @@ define void @cmpxchg_i16_seq_cst_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
 ; RV32IA-NEXT:    sll a5, a4, a0
@ -1428,7 +1410,6 @@ define void @cmpxchg_i16_seq_cst_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
 ; RV32IA-NEXT:    sll a5, a4, a0
@ -1507,7 +1488,6 @@ define void @cmpxchg_i16_seq_cst_seq_cst(i16* %ptr, i16 %cmp, i16 %val) nounwind
 ; RV32IA:       # %bb.0:
 ; RV32IA-NEXT:    andi a3, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    andi a0, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
 ; RV32IA-NEXT:    sll a5, a4, a0
--- a/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/test/CodeGen/RISCV/atomic-rmw.ll
--- a/test/CodeGen/RISCV/shift-masked-shamt.ll
+++ b/test/CodeGen/RISCV/shift-masked-shamt.ll
@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I

 ; This test checks that unnecessary masking of shift amount operands is
 ; eliminated during instruction selection. The test needs to ensure that the
@ -11,6 +13,11 @@ define i32 @sll_redundant_mask(i32 %a, i32 %b) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    sll a0, a0, a1
 ; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: sll_redundant_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    ret
  %1 = and i32 %b, 31
  %2 = shl i32 %a, %1
  ret i32 %2
@ -22,6 +29,12 @@ define i32 @sll_non_redundant_mask(i32 %a, i32 %b) nounwind {
 ; RV32I-NEXT:    andi a1, a1, 15
 ; RV32I-NEXT:    sll a0, a0, a1
 ; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: sll_non_redundant_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a1, a1, 15
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    ret
  %1 = and i32 %b, 15
  %2 = shl i32 %a, %1
  ret i32 %2
@ -32,6 +45,11 @@ define i32 @srl_redundant_mask(i32 %a, i32 %b) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    srl a0, a0, a1
 ; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: srl_redundant_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    ret
  %1 = and i32 %b, 4095
  %2 = lshr i32 %a, %1
  ret i32 %2
@ -43,6 +61,12 @@ define i32 @srl_non_redundant_mask(i32 %a, i32 %b) nounwind {
 ; RV32I-NEXT:    andi a1, a1, 7
 ; RV32I-NEXT:    srl a0, a0, a1
 ; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: srl_non_redundant_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a1, a1, 7
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    ret
  %1 = and i32 %b, 7
  %2 = lshr i32 %a, %1
  ret i32 %2
@ -53,6 +77,11 @@ define i32 @sra_redundant_mask(i32 %a, i32 %b) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    sra a0, a0, a1
 ; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: sra_redundant_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraw a0, a0, a1
+; RV64I-NEXT:    ret
  %1 = and i32 %b, 65535
  %2 = ashr i32 %a, %1
  ret i32 %2
@ -64,7 +93,162 @@ define i32 @sra_non_redundant_mask(i32 %a, i32 %b) nounwind {
 ; RV32I-NEXT:    andi a1, a1, 32
 ; RV32I-NEXT:    sra a0, a0, a1
 ; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: sra_non_redundant_mask:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sraw a0, a0, zero
+; RV64I-NEXT:    ret
  %1 = and i32 %b, 32
  %2 = ashr i32 %a, %1
  ret i32 %2
 }
+
+define i32 @sll_redundant_mask_zeros(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sll_redundant_mask_zeros:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    sll a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: sll_redundant_mask_zeros:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = shl i32 %b, 1
+  %2 = and i32 %1, 30
+  %3 = shl i32 %a, %2
+  ret i32 %3
+}
+
+define i32 @srl_redundant_mask_zeros(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: srl_redundant_mask_zeros:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    srl a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: srl_redundant_mask_zeros:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = shl i32 %b, 2
+  %2 = and i32 %1, 28
+  %3 = lshr i32 %a, %2
+  ret i32 %3
+}
+
+define i32 @sra_redundant_mask_zeros(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sra_redundant_mask_zeros:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a1, 3
+; RV32I-NEXT:    sra a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: sra_redundant_mask_zeros:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    sraw a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = shl i32 %b, 3
+  %2 = and i32 %1, 24
+  %3 = ashr i32 %a, %2
+  ret i32 %3
+}
+
+define i64 @sll_redundant_mask_zeros_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: sll_redundant_mask_zeros_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a2, 2
+; RV32I-NEXT:    andi a3, a2, 60
+; RV32I-NEXT:    addi a4, a3, -32
+; RV32I-NEXT:    bltz a4, .LBB9_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sll a1, a0, a4
+; RV32I-NEXT:    mv a0, zero
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB9_2:
+; RV32I-NEXT:    sll a1, a1, a2
+; RV32I-NEXT:    addi a4, zero, 31
+; RV32I-NEXT:    sub a3, a4, a3
+; RV32I-NEXT:    srli a4, a0, 1
+; RV32I-NEXT:    srl a3, a4, a3
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: sll_redundant_mask_zeros_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    sll a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = shl i64 %b, 2
+  %2 = and i64 %1, 60
+  %3 = shl i64 %a, %2
+  ret i64 %3
+}
+
+define i64 @srl_redundant_mask_zeros_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: srl_redundant_mask_zeros_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a2, 3
+; RV32I-NEXT:    andi a3, a2, 56
+; RV32I-NEXT:    addi a4, a3, -32
+; RV32I-NEXT:    bltz a4, .LBB10_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srl a0, a1, a4
+; RV32I-NEXT:    mv a1, zero
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB10_2:
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    addi a4, zero, 31
+; RV32I-NEXT:    sub a3, a4, a3
+; RV32I-NEXT:    slli a4, a1, 1
+; RV32I-NEXT:    sll a3, a4, a3
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: srl_redundant_mask_zeros_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 3
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = shl i64 %b, 3
+  %2 = and i64 %1, 56
+  %3 = lshr i64 %a, %2
+  ret i64 %3
+}
+
+define i64 @sra_redundant_mask_zeros_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: sra_redundant_mask_zeros_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a2, 4
+; RV32I-NEXT:    andi a3, a2, 48
+; RV32I-NEXT:    addi a4, a3, -32
+; RV32I-NEXT:    bltz a4, .LBB11_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sra a0, a1, a4
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB11_2:
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    addi a4, zero, 31
+; RV32I-NEXT:    sub a3, a4, a3
+; RV32I-NEXT:    slli a4, a1, 1
+; RV32I-NEXT:    sll a3, a4, a3
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    sra a1, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: sra_redundant_mask_zeros_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 4
+; RV64I-NEXT:    sra a0, a0, a1
+; RV64I-NEXT:    ret
+  %1 = shl i64 %b, 4
+  %2 = and i64 %1, 48
+  %3 = ashr i64 %a, %2
+  ret i64 %3
+}