mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[RISCV] Teach targetShrinkDemandedConstant to preserve (and X, 0xffffffff).
We look for this pattern frequently in isel patterns so its a good idea to try to preserve it. This also let's us remove our special isel handling for srliw and use a direct pattern match of (srl (and X, 0xffffffff), C) since no bits will be removed from the and mask. Differential Revision: https://reviews.llvm.org/D99042
This commit is contained in:
parent
bd8a561807
commit
cc233d8df8
@ -1143,27 +1143,6 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Match (srl (and val, mask), imm) where the result would be a
|
||||
// zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
|
||||
// is equivalent to this (SimplifyDemandedBits may have removed lower bits
|
||||
// from the mask that aren't necessary due to the right-shifting).
|
||||
bool RISCVDAGToDAGISel::MatchSRLIW(SDNode *N) const {
|
||||
assert(N->getOpcode() == ISD::SRL);
|
||||
assert(N->getOperand(0).getOpcode() == ISD::AND);
|
||||
assert(isa<ConstantSDNode>(N->getOperand(1)));
|
||||
assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
|
||||
|
||||
// The IsRV64 predicate is checked after PatFrag predicates so we can get
|
||||
// here even on RV32.
|
||||
if (!Subtarget->is64Bit())
|
||||
return false;
|
||||
|
||||
SDValue And = N->getOperand(0);
|
||||
uint64_t ShAmt = N->getConstantOperandVal(1);
|
||||
uint64_t Mask = And.getConstantOperandVal(1);
|
||||
return (Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff;
|
||||
}
|
||||
|
||||
// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
|
||||
// on RV64).
|
||||
// SLLIUW is the same as SLLI except for the fact that it clears the bits
|
||||
|
@ -57,7 +57,6 @@ public:
|
||||
bool selectSExti32(SDValue N, SDValue &Val);
|
||||
bool selectZExti32(SDValue N, SDValue &Val);
|
||||
|
||||
bool MatchSRLIW(SDNode *N) const;
|
||||
bool MatchSLLIUW(SDNode *N) const;
|
||||
|
||||
bool selectVLOp(SDValue N, SDValue &VL);
|
||||
|
@ -4954,16 +4954,36 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
|
||||
// Clear all non-demanded bits initially.
|
||||
APInt ShrunkMask = Mask & DemandedBits;
|
||||
|
||||
// Try to make a smaller immediate by setting undemanded bits.
|
||||
|
||||
APInt ExpandedMask = Mask | ~DemandedBits;
|
||||
|
||||
auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
|
||||
return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
|
||||
};
|
||||
auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
|
||||
if (NewMask == Mask)
|
||||
return true;
|
||||
SDLoc DL(Op);
|
||||
SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
|
||||
SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
|
||||
return TLO.CombineTo(Op, NewOp);
|
||||
};
|
||||
|
||||
// If the shrunk mask fits in sign extended 12 bits, let the target
|
||||
// independent code apply it.
|
||||
if (ShrunkMask.isSignedIntN(12))
|
||||
return false;
|
||||
|
||||
// Try to make a smaller immediate by setting undemanded bits.
|
||||
// Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
|
||||
if (VT == MVT::i64) {
|
||||
APInt NewMask = APInt(64, 0xffffffff);
|
||||
if (IsLegalMask(NewMask))
|
||||
return UseMask(NewMask);
|
||||
}
|
||||
|
||||
// We need to be able to make a negative number through a combination of mask
|
||||
// and undemanded bits.
|
||||
APInt ExpandedMask = Mask | ~DemandedBits;
|
||||
// For the remaining optimizations, we need to be able to make a negative
|
||||
// number through a combination of mask and undemanded bits.
|
||||
if (!ExpandedMask.isNegative())
|
||||
return false;
|
||||
|
||||
@ -4981,18 +5001,8 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
|
||||
return false;
|
||||
|
||||
// Sanity check that our new mask is a subset of the demanded mask.
|
||||
assert(NewMask.isSubsetOf(ExpandedMask));
|
||||
|
||||
// If we aren't changing the mask, just return true to keep it and prevent
|
||||
// the caller from optimizing.
|
||||
if (NewMask == Mask)
|
||||
return true;
|
||||
|
||||
// Replace the constant with the new mask.
|
||||
SDLoc DL(Op);
|
||||
SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
|
||||
SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
|
||||
return TLO.CombineTo(Op, NewOp);
|
||||
assert(IsLegalMask(NewMask));
|
||||
return UseMask(NewMask);
|
||||
}
|
||||
|
||||
void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
|
||||
|
@ -847,11 +847,6 @@ def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
|
||||
}]>;
|
||||
def zexti32 : ComplexPattern<i64, 1, "selectZExti32">;
|
||||
|
||||
def SRLIWPat : PatFrag<(ops node:$A, node:$B),
|
||||
(srl (and node:$A, imm), node:$B), [{
|
||||
return MatchSRLIW(N);
|
||||
}]>;
|
||||
|
||||
// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
|
||||
// on RV64). Also used to optimize the same sequence without SLLIUW.
|
||||
def SLLIUWPat : PatFrag<(ops node:$A, node:$B),
|
||||
@ -1164,7 +1159,7 @@ def : Pat<(sext_inreg (sub GPR:$rs1, GPR:$rs2), i32),
|
||||
(SUBW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32),
|
||||
(SLLIW GPR:$rs1, uimm5:$shamt)>;
|
||||
def : Pat<(i64 (SRLIWPat GPR:$rs1, uimm5:$shamt)),
|
||||
def : Pat<(i64 (srl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
|
||||
(SRLIW GPR:$rs1, uimm5:$shamt)>;
|
||||
def : Pat<(i64 (srl (shl GPR:$rs1, (i64 32)), uimm6gt32:$shamt)),
|
||||
(SRLIW GPR:$rs1, (ImmSub32 uimm6gt32:$shamt))>;
|
||||
|
@ -871,6 +871,6 @@ def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
|
||||
i32)),
|
||||
(PACKW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
|
||||
(SRLIWPat GPR:$rs1, (i64 16)))),
|
||||
(srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))),
|
||||
(PACKUW GPR:$rs1, GPR:$rs2)>;
|
||||
} // Predicates = [HasStdExtZbp, IsRV64]
|
||||
|
@ -129,8 +129,8 @@ define i32 @srli(i32 %a) nounwind {
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
; FIXME: This should use srliw on RV64, but SimplifyDemandedBits breaks the
|
||||
; (and X, 0xffffffff) that type legalization inserts.
|
||||
; This makes sure SimplifyDemandedBits doesn't prevent us from matching SRLIW
|
||||
; on RV64.
|
||||
define i32 @srli_demandedbits(i32 %0) {
|
||||
; RV32I-LABEL: srli_demandedbits:
|
||||
; RV32I: # %bb.0:
|
||||
@ -140,11 +140,7 @@ define i32 @srli_demandedbits(i32 %0) {
|
||||
;
|
||||
; RV64I-LABEL: srli_demandedbits:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: addi a1, zero, 1
|
||||
; RV64I-NEXT: slli a1, a1, 32
|
||||
; RV64I-NEXT: addi a1, a1, -16
|
||||
; RV64I-NEXT: and a0, a0, a1
|
||||
; RV64I-NEXT: srli a0, a0, 3
|
||||
; RV64I-NEXT: srliw a0, a0, 3
|
||||
; RV64I-NEXT: ori a0, a0, 1
|
||||
; RV64I-NEXT: ret
|
||||
%2 = lshr i32 %0, 3
|
||||
|
@ -126,34 +126,26 @@ define i64 @zextw_i64(i64 %a) nounwind {
|
||||
ret i64 %and
|
||||
}
|
||||
|
||||
; FIXME: This can use zext.w, but we need targetShrinkDemandedConstant to
|
||||
; to adjust the immediate.
|
||||
; This makes sure targetShrinkDemandedConstant changes the and immmediate to
|
||||
; allow zext.w or slli+srli.
|
||||
define i64 @zextw_demandedbits_i64(i64 %0) {
|
||||
; RV64I-LABEL: zextw_demandedbits_i64:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: addi a1, zero, 1
|
||||
; RV64I-NEXT: slli a1, a1, 32
|
||||
; RV64I-NEXT: addi a1, a1, -2
|
||||
; RV64I-NEXT: and a0, a0, a1
|
||||
; RV64I-NEXT: ori a0, a0, 1
|
||||
; RV64I-NEXT: slli a0, a0, 32
|
||||
; RV64I-NEXT: srli a0, a0, 32
|
||||
; RV64I-NEXT: ret
|
||||
;
|
||||
; RV64IB-LABEL: zextw_demandedbits_i64:
|
||||
; RV64IB: # %bb.0:
|
||||
; RV64IB-NEXT: addi a1, zero, 1
|
||||
; RV64IB-NEXT: slli a1, a1, 32
|
||||
; RV64IB-NEXT: addi a1, a1, -2
|
||||
; RV64IB-NEXT: and a0, a0, a1
|
||||
; RV64IB-NEXT: ori a0, a0, 1
|
||||
; RV64IB-NEXT: zext.w a0, a0
|
||||
; RV64IB-NEXT: ret
|
||||
;
|
||||
; RV64IBA-LABEL: zextw_demandedbits_i64:
|
||||
; RV64IBA: # %bb.0:
|
||||
; RV64IBA-NEXT: addi a1, zero, 1
|
||||
; RV64IBA-NEXT: slli a1, a1, 32
|
||||
; RV64IBA-NEXT: addi a1, a1, -2
|
||||
; RV64IBA-NEXT: and a0, a0, a1
|
||||
; RV64IBA-NEXT: ori a0, a0, 1
|
||||
; RV64IBA-NEXT: zext.w a0, a0
|
||||
; RV64IBA-NEXT: ret
|
||||
%2 = and i64 %0, 4294967294
|
||||
%3 = or i64 %2, 1
|
||||
|
Loading…
Reference in New Issue
Block a user