From a3996e5f78e5f3f0cc6116b0fda983a0d1d58f41 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 16 Dec 2018 13:33:37 +0000 Subject: [PATCH] [SelectionDAG] Add FSHL/FSHR support to computeKnownBits Also exposes an issue in DAGCombiner::visitFunnelShift where we were assuming the shift amount had the result type (after legalization it'll have the targets shift amount type). llvm-svn: 349298 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +++-- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 33 +++++++++++++++++++++++ test/CodeGen/X86/known-bits.ll | 18 +++---------- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cad130ecea6..93a1ab1dcff 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6966,8 +6966,10 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { // fold (fshl N0, N1, 0) -> N0 // fold (fshr N0, N1, 0) -> N1 - if (DAG.MaskedValueIsZero(N2, APInt::getAllOnesValue(BitWidth))) - return IsFSHL ? N0 : N1; + if (isPowerOf2_32(BitWidth)) + if (DAG.MaskedValueIsZero( + N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1))) + return IsFSHL ? N0 : N1; // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ceb86898946..6241af1332f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2679,6 +2679,39 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One.ashrInPlace(Shift); } break; + case ISD::FSHL: + case ISD::FSHR: + if (ConstantSDNode *C = + isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) { + unsigned Amt = C->getAPIntValue().urem(BitWidth); + + // For fshl, 0-shift returns the 1st arg. + // For fshr, 0-shift returns the 2nd arg. + if (Amt == 0) { + Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1), + DemandedElts, Depth + 1); + break; + } + + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (Opcode == ISD::FSHL) { + Known.One <<= Amt; + Known.Zero <<= Amt; + Known2.One.lshrInPlace(BitWidth - Amt); + Known2.Zero.lshrInPlace(BitWidth - Amt); + } else { + Known.One <<= BitWidth - Amt; + Known.Zero <<= BitWidth - Amt; + Known2.One.lshrInPlace(Amt); + Known2.Zero.lshrInPlace(Amt); + } + Known.One |= Known2.One; + Known.Zero |= Known2.Zero; + } + break; case ISD::SIGN_EXTEND_INREG: { EVT EVT = cast(Op.getOperand(1))->getVT(); unsigned EBits = EVT.getScalarSizeInBits(); diff --git a/test/CodeGen/X86/known-bits.ll b/test/CodeGen/X86/known-bits.ll index 3f7e127c072..8f3b983251d 100644 --- a/test/CodeGen/X86/known-bits.ll +++ b/test/CodeGen/X86/known-bits.ll @@ -302,17 +302,12 @@ declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone define i32 @knownbits_fshl(i32 %a0) nounwind { ; X32-LABEL: knownbits_fshl: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $-1, %eax -; X32-NEXT: shrdl $27, %ecx, %eax -; X32-NEXT: andl $3, %eax +; X32-NEXT: movl $3, %eax ; X32-NEXT: retl ; ; X64-LABEL: knownbits_fshl: ; X64: # %bb.0: -; X64-NEXT: movl $-1, %eax -; X64-NEXT: shrdl $27, %edi, %eax -; X64-NEXT: andl $3, %eax +; X64-NEXT: movl $3, %eax ; X64-NEXT: retq %1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5) %2 = and i32 %1, 3 @@ -322,17 +317,12 @@ define i32 @knownbits_fshl(i32 %a0) nounwind { define i32 @knownbits_fshr(i32 %a0) nounwind { ; X32-LABEL: knownbits_fshr: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $-1, %eax -; X32-NEXT: shrdl $5, %ecx, %eax -; X32-NEXT: andl $3, %eax +; X32-NEXT: movl $3, %eax ; X32-NEXT: retl ; ; X64-LABEL: knownbits_fshr: ; X64: # %bb.0: -; X64-NEXT: movl $-1, %eax -; X64-NEXT: shrdl $5, %edi, %eax -; X64-NEXT: andl $3, %eax +; X64-NEXT: movl $3, %eax ; X64-NEXT: retq %1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5) %2 = and i32 %1, 3