1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[SelectionDAG] Add FSHL/FSHR support to computeKnownBits

Also exposes an issue in DAGCombiner::visitFunnelShift where we were assuming the shift amount had the result type (after legalization it'll have the targets shift amount type).

llvm-svn: 349298
This commit is contained in:
Simon Pilgrim 2018-12-16 13:33:37 +00:00
parent 3c38377cf1
commit a3996e5f78
3 changed files with 41 additions and 16 deletions

View File

@ -6966,7 +6966,9 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
// fold (fshl N0, N1, 0) -> N0 // fold (fshl N0, N1, 0) -> N0
// fold (fshr N0, N1, 0) -> N1 // fold (fshr N0, N1, 0) -> N1
if (DAG.MaskedValueIsZero(N2, APInt::getAllOnesValue(BitWidth))) if (isPowerOf2_32(BitWidth))
if (DAG.MaskedValueIsZero(
N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
return IsFSHL ? N0 : N1; return IsFSHL ? N0 : N1;
// fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)

View File

@ -2679,6 +2679,39 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.One.ashrInPlace(Shift); Known.One.ashrInPlace(Shift);
} }
break; break;
case ISD::FSHL:
case ISD::FSHR:
if (ConstantSDNode *C =
isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) {
unsigned Amt = C->getAPIntValue().urem(BitWidth);
// For fshl, 0-shift returns the 1st arg.
// For fshr, 0-shift returns the 2nd arg.
if (Amt == 0) {
Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1),
DemandedElts, Depth + 1);
break;
}
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
if (Opcode == ISD::FSHL) {
Known.One <<= Amt;
Known.Zero <<= Amt;
Known2.One.lshrInPlace(BitWidth - Amt);
Known2.Zero.lshrInPlace(BitWidth - Amt);
} else {
Known.One <<= BitWidth - Amt;
Known.Zero <<= BitWidth - Amt;
Known2.One.lshrInPlace(Amt);
Known2.Zero.lshrInPlace(Amt);
}
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
}
break;
case ISD::SIGN_EXTEND_INREG: { case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned EBits = EVT.getScalarSizeInBits(); unsigned EBits = EVT.getScalarSizeInBits();

View File

@ -302,17 +302,12 @@ declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
define i32 @knownbits_fshl(i32 %a0) nounwind { define i32 @knownbits_fshl(i32 %a0) nounwind {
; X32-LABEL: knownbits_fshl: ; X32-LABEL: knownbits_fshl:
; X32: # %bb.0: ; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl $3, %eax
; X32-NEXT: movl $-1, %eax
; X32-NEXT: shrdl $27, %ecx, %eax
; X32-NEXT: andl $3, %eax
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: knownbits_fshl: ; X64-LABEL: knownbits_fshl:
; X64: # %bb.0: ; X64: # %bb.0:
; X64-NEXT: movl $-1, %eax ; X64-NEXT: movl $3, %eax
; X64-NEXT: shrdl $27, %edi, %eax
; X64-NEXT: andl $3, %eax
; X64-NEXT: retq ; X64-NEXT: retq
%1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5) %1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5)
%2 = and i32 %1, 3 %2 = and i32 %1, 3
@ -322,17 +317,12 @@ define i32 @knownbits_fshl(i32 %a0) nounwind {
define i32 @knownbits_fshr(i32 %a0) nounwind { define i32 @knownbits_fshr(i32 %a0) nounwind {
; X32-LABEL: knownbits_fshr: ; X32-LABEL: knownbits_fshr:
; X32: # %bb.0: ; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl $3, %eax
; X32-NEXT: movl $-1, %eax
; X32-NEXT: shrdl $5, %ecx, %eax
; X32-NEXT: andl $3, %eax
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: knownbits_fshr: ; X64-LABEL: knownbits_fshr:
; X64: # %bb.0: ; X64: # %bb.0:
; X64-NEXT: movl $-1, %eax ; X64-NEXT: movl $3, %eax
; X64-NEXT: shrdl $5, %edi, %eax
; X64-NEXT: andl $3, %eax
; X64-NEXT: retq ; X64-NEXT: retq
%1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5) %1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5)
%2 = and i32 %1, 3 %2 = and i32 %1, 3