1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[RISCV] Copy isUnneededShiftMask from X86.

In d2927f786e877410d90c1e6f0e0c7d99524529c5, I added patterns
to remove (and X, 31) from sllw/srlw/sraw shift amounts.

There is code in SelectionDAGISel.cpp that knows to use
computeKnownBits to fill in bits of the mask that were removed
by SimplifyDemandedBits based on bits being known zero.

The non-W shift patterns use immbottomxlenset which allows the
mask to have more than log2(xlen) trailing ones, but doesn't
have a call to computeKnownBits to fill in bits of the mask that may
have been cleared by SimplifyDemandedBits.

This patch copies code from X86 to handle more than log2(xlen)
bottom bits set and uses computeKnownBits to fill in missing bits
before counting.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D95422
This commit is contained in:
Craig Topper 2021-01-27 20:36:21 -08:00
parent 78c4743b21
commit ae2c374959
6 changed files with 401 additions and 308 deletions

View File

@ -17,6 +17,7 @@
#include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Alignment.h" #include "llvm/Support/Alignment.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h" #include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
@ -821,6 +822,21 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
return false; return false;
} }
// Helper to detect unneeded and instructions on shift amounts. Called
// from PatFrags in tablegen.
bool RISCVDAGToDAGISel::isUnneededShiftMask(SDNode *N, unsigned Width) const {
assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
assert(Width >= 5 && N->getValueSizeInBits(0) >= (1 << Width) &&
"Unexpected width");
const APInt &Val = N->getConstantOperandAPInt(1);
if (Val.countTrailingOnes() >= Width)
return true;
APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
return Mask.countTrailingOnes() >= Width;
}
// Match (srl (and val, mask), imm) where the result would be a // Match (srl (and val, mask), imm) where the result would be a
// zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result // zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
// is equivalent to this (SimplifyDemandedBits may have removed lower bits // is equivalent to this (SimplifyDemandedBits may have removed lower bits

View File

@ -45,6 +45,8 @@ public:
bool SelectAddrFI(SDValue Addr, SDValue &Base); bool SelectAddrFI(SDValue Addr, SDValue &Base);
bool isUnneededShiftMask(SDNode *N, unsigned Width) const;
bool MatchSRLIW(SDNode *N) const; bool MatchSRLIW(SDNode *N) const;
bool MatchSLOI(SDNode *N) const; bool MatchSLOI(SDNode *N) const;
bool MatchSROI(SDNode *N) const; bool MatchSROI(SDNode *N) const;

View File

@ -289,12 +289,6 @@ def ixlenimm_li : Operand<XLenVT> {
// Standalone (codegen-only) immleaf patterns. // Standalone (codegen-only) immleaf patterns.
def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>; def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
def simm32hi20 : ImmLeaf<XLenVT, [{return isShiftedInt<20, 12>(Imm);}]>; def simm32hi20 : ImmLeaf<XLenVT, [{return isShiftedInt<20, 12>(Imm);}]>;
// A mask value that won't affect significant shift bits.
def immbottomxlenset : ImmLeaf<XLenVT, [{
if (Subtarget->is64Bit())
return countTrailingOnes<uint64_t>(Imm) >= 6;
return countTrailingOnes<uint64_t>(Imm) >= 5;
}]>;
// A 6-bit constant greater than 32. // A 6-bit constant greater than 32.
def uimm6gt32 : ImmLeaf<XLenVT, [{ def uimm6gt32 : ImmLeaf<XLenVT, [{
@ -901,14 +895,21 @@ def : PatGprUimmLog2XLen<sra, SRAI>;
// typically introduced when the legalizer promotes the shift amount and // typically introduced when the legalizer promotes the shift amount and
// zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base // zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base
// ISA only read the least significant 5 bits (RV32I) or 6 bits (RV64I). // ISA only read the least significant 5 bits (RV32I) or 6 bits (RV64I).
def shiftMaskXLen : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
return isUnneededShiftMask(N, Subtarget->is64Bit() ? 6 : 5);
}]>;
def shiftMask32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
return isUnneededShiftMask(N, 5);
}]>;
class shiftop<SDPatternOperator operator> class shiftop<SDPatternOperator operator>
: PatFrags<(ops node:$val, node:$count), : PatFrags<(ops node:$val, node:$count),
[(operator node:$val, node:$count), [(operator node:$val, node:$count),
(operator node:$val, (and node:$count, immbottomxlenset))]>; (operator node:$val, (shiftMaskXLen node:$count))]>;
class shiftopw<SDPatternOperator operator> class shiftopw<SDPatternOperator operator>
: PatFrags<(ops node:$val, node:$count), : PatFrags<(ops node:$val, node:$count),
[(operator node:$val, node:$count), [(operator node:$val, node:$count),
(operator node:$val, (and node:$count, (XLenVT 31)))]>; (operator node:$val, (shiftMask32 node:$count))]>;
def : PatGprGpr<shiftop<shl>, SLL>; def : PatGprGpr<shiftop<shl>, SLL>;
def : PatGprGpr<shiftop<srl>, SRL>; def : PatGprGpr<shiftop<srl>, SRL>;

View File

@ -26,7 +26,6 @@ define void @cmpxchg_i8_monotonic_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: andi a1, a1, 255
@ -103,7 +102,6 @@ define void @cmpxchg_i8_acquire_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: andi a1, a1, 255
@ -180,7 +178,6 @@ define void @cmpxchg_i8_acquire_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: andi a1, a1, 255
@ -257,7 +254,6 @@ define void @cmpxchg_i8_release_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: andi a1, a1, 255
@ -334,7 +330,6 @@ define void @cmpxchg_i8_release_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: andi a1, a1, 255
@ -411,7 +406,6 @@ define void @cmpxchg_i8_acq_rel_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: andi a1, a1, 255
@ -488,7 +482,6 @@ define void @cmpxchg_i8_acq_rel_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: andi a1, a1, 255
@ -565,7 +558,6 @@ define void @cmpxchg_i8_seq_cst_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: andi a1, a1, 255
@ -642,7 +634,6 @@ define void @cmpxchg_i8_seq_cst_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: andi a1, a1, 255
@ -719,7 +710,6 @@ define void @cmpxchg_i8_seq_cst_seq_cst(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255 ; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: andi a1, a1, 255
@ -796,7 +786,6 @@ define void @cmpxchg_i16_monotonic_monotonic(i16* %ptr, i16 %cmp, i16 %val) noun
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1 ; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0 ; RV32IA-NEXT: sll a5, a4, a0
@ -875,7 +864,6 @@ define void @cmpxchg_i16_acquire_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1 ; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0 ; RV32IA-NEXT: sll a5, a4, a0
@ -954,7 +942,6 @@ define void @cmpxchg_i16_acquire_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1 ; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0 ; RV32IA-NEXT: sll a5, a4, a0
@ -1033,7 +1020,6 @@ define void @cmpxchg_i16_release_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1 ; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0 ; RV32IA-NEXT: sll a5, a4, a0
@ -1112,7 +1098,6 @@ define void @cmpxchg_i16_release_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1 ; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0 ; RV32IA-NEXT: sll a5, a4, a0
@ -1191,7 +1176,6 @@ define void @cmpxchg_i16_acq_rel_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1 ; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0 ; RV32IA-NEXT: sll a5, a4, a0
@ -1270,7 +1254,6 @@ define void @cmpxchg_i16_acq_rel_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1 ; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0 ; RV32IA-NEXT: sll a5, a4, a0
@ -1349,7 +1332,6 @@ define void @cmpxchg_i16_seq_cst_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1 ; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0 ; RV32IA-NEXT: sll a5, a4, a0
@ -1428,7 +1410,6 @@ define void @cmpxchg_i16_seq_cst_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1 ; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0 ; RV32IA-NEXT: sll a5, a4, a0
@ -1507,7 +1488,6 @@ define void @cmpxchg_i16_seq_cst_seq_cst(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA: # %bb.0: ; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4 ; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1 ; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0 ; RV32IA-NEXT: sll a5, a4, a0

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: | FileCheck %s -check-prefix=RV32I
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
; This test checks that unnecessary masking of shift amount operands is ; This test checks that unnecessary masking of shift amount operands is
; eliminated during instruction selection. The test needs to ensure that the ; eliminated during instruction selection. The test needs to ensure that the
@ -11,6 +13,11 @@ define i32 @sll_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I: # %bb.0: ; RV32I: # %bb.0:
; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: ret ; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: ret
%1 = and i32 %b, 31 %1 = and i32 %b, 31
%2 = shl i32 %a, %1 %2 = shl i32 %a, %1
ret i32 %2 ret i32 %2
@ -22,6 +29,12 @@ define i32 @sll_non_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I-NEXT: andi a1, a1, 15 ; RV32I-NEXT: andi a1, a1, 15
; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: ret ; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_non_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 15
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: ret
%1 = and i32 %b, 15 %1 = and i32 %b, 15
%2 = shl i32 %a, %1 %2 = shl i32 %a, %1
ret i32 %2 ret i32 %2
@ -32,6 +45,11 @@ define i32 @srl_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I: # %bb.0: ; RV32I: # %bb.0:
; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: ret ; RV32I-NEXT: ret
;
; RV64I-LABEL: srl_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: ret
%1 = and i32 %b, 4095 %1 = and i32 %b, 4095
%2 = lshr i32 %a, %1 %2 = lshr i32 %a, %1
ret i32 %2 ret i32 %2
@ -43,6 +61,12 @@ define i32 @srl_non_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I-NEXT: andi a1, a1, 7 ; RV32I-NEXT: andi a1, a1, 7
; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: ret ; RV32I-NEXT: ret
;
; RV64I-LABEL: srl_non_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 7
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: ret
%1 = and i32 %b, 7 %1 = and i32 %b, 7
%2 = lshr i32 %a, %1 %2 = lshr i32 %a, %1
ret i32 %2 ret i32 %2
@ -53,6 +77,11 @@ define i32 @sra_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I: # %bb.0: ; RV32I: # %bb.0:
; RV32I-NEXT: sra a0, a0, a1 ; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: ret ; RV32I-NEXT: ret
;
; RV64I-LABEL: sra_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: sraw a0, a0, a1
; RV64I-NEXT: ret
%1 = and i32 %b, 65535 %1 = and i32 %b, 65535
%2 = ashr i32 %a, %1 %2 = ashr i32 %a, %1
ret i32 %2 ret i32 %2
@ -64,7 +93,162 @@ define i32 @sra_non_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I-NEXT: andi a1, a1, 32 ; RV32I-NEXT: andi a1, a1, 32
; RV32I-NEXT: sra a0, a0, a1 ; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: ret ; RV32I-NEXT: ret
;
; RV64I-LABEL: sra_non_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: sraw a0, a0, zero
; RV64I-NEXT: ret
%1 = and i32 %b, 32 %1 = and i32 %b, 32
%2 = ashr i32 %a, %1 %2 = ashr i32 %a, %1
ret i32 %2 ret i32 %2
} }
define i32 @sll_redundant_mask_zeros(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: sll_redundant_mask_zeros:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a1, 1
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_redundant_mask_zeros:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 1
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i32 %b, 1
%2 = and i32 %1, 30
%3 = shl i32 %a, %2
ret i32 %3
}
define i32 @srl_redundant_mask_zeros(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: srl_redundant_mask_zeros:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: srl_redundant_mask_zeros:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 2
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i32 %b, 2
%2 = and i32 %1, 28
%3 = lshr i32 %a, %2
ret i32 %3
}
define i32 @sra_redundant_mask_zeros(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: sra_redundant_mask_zeros:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: sra_redundant_mask_zeros:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: sraw a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i32 %b, 3
%2 = and i32 %1, 24
%3 = ashr i32 %a, %2
ret i32 %3
}
define i64 @sll_redundant_mask_zeros_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: sll_redundant_mask_zeros_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a2, a2, 2
; RV32I-NEXT: andi a3, a2, 60
; RV32I-NEXT: addi a4, a3, -32
; RV32I-NEXT: bltz a4, .LBB9_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sll a1, a0, a4
; RV32I-NEXT: mv a0, zero
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB9_2:
; RV32I-NEXT: sll a1, a1, a2
; RV32I-NEXT: addi a4, zero, 31
; RV32I-NEXT: sub a3, a4, a3
; RV32I-NEXT: srli a4, a0, 1
; RV32I-NEXT: srl a3, a4, a3
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: sll a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_redundant_mask_zeros_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 2
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i64 %b, 2
%2 = and i64 %1, 60
%3 = shl i64 %a, %2
ret i64 %3
}
define i64 @srl_redundant_mask_zeros_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: srl_redundant_mask_zeros_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a2, a2, 3
; RV32I-NEXT: andi a3, a2, 56
; RV32I-NEXT: addi a4, a3, -32
; RV32I-NEXT: bltz a4, .LBB10_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a0, a1, a4
; RV32I-NEXT: mv a1, zero
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB10_2:
; RV32I-NEXT: srl a0, a0, a2
; RV32I-NEXT: addi a4, zero, 31
; RV32I-NEXT: sub a3, a4, a3
; RV32I-NEXT: slli a4, a1, 1
; RV32I-NEXT: sll a3, a4, a3
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: srl a1, a1, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: srl_redundant_mask_zeros_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: srl a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i64 %b, 3
%2 = and i64 %1, 56
%3 = lshr i64 %a, %2
ret i64 %3
}
define i64 @sra_redundant_mask_zeros_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: sra_redundant_mask_zeros_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a2, a2, 4
; RV32I-NEXT: andi a3, a2, 48
; RV32I-NEXT: addi a4, a3, -32
; RV32I-NEXT: bltz a4, .LBB11_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sra a0, a1, a4
; RV32I-NEXT: srai a1, a1, 31
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB11_2:
; RV32I-NEXT: srl a0, a0, a2
; RV32I-NEXT: addi a4, zero, 31
; RV32I-NEXT: sub a3, a4, a3
; RV32I-NEXT: slli a4, a1, 1
; RV32I-NEXT: sll a3, a4, a3
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: sra a1, a1, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: sra_redundant_mask_zeros_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 4
; RV64I-NEXT: sra a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i64 %b, 4
%2 = and i64 %1, 48
%3 = ashr i64 %a, %2
ret i64 %3
}