[SelectionDAG] Add FSHL/FSHR support to computeKnownBits

Also exposes an issue in DAGCombiner::visitFunnelShift where we were assuming the shift amount had the result type (after legalization it'll have the targets shift amount type). llvm-svn: 349298
2024-11-22 02:33:06 +01:00 · 2018-12-16 13:33:37 +00:00 · 2018-12-16 13:33:37 +00:00 · a3996e5f78
commit a3996e5f78
parent 3c38377cf1
3 changed files with 41 additions and 16 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -6966,8 +6966,10 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {

  // fold (fshl N0, N1, 0) -> N0
  // fold (fshr N0, N1, 0) -> N1
-  if (DAG.MaskedValueIsZero(N2, APInt::getAllOnesValue(BitWidth)))
-    return IsFSHL ? N0 : N1;
+  if (isPowerOf2_32(BitWidth))
+    if (DAG.MaskedValueIsZero(
+            N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
+      return IsFSHL ? N0 : N1;

  // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
  if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -2679,6 +2679,39 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
      Known.One.ashrInPlace(Shift);
    }
    break;
+  case ISD::FSHL:
+  case ISD::FSHR:
+    if (ConstantSDNode *C =
+            isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) {
+      unsigned Amt = C->getAPIntValue().urem(BitWidth);
+
+      // For fshl, 0-shift returns the 1st arg.
+      // For fshr, 0-shift returns the 2nd arg.
+      if (Amt == 0) {
+        Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1),
+                                 DemandedElts, Depth + 1);
+        break;
+      }
+
+      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+      Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      if (Opcode == ISD::FSHL) {
+        Known.One <<= Amt;
+        Known.Zero <<= Amt;
+        Known2.One.lshrInPlace(BitWidth - Amt);
+        Known2.Zero.lshrInPlace(BitWidth - Amt);
+      } else {
+        Known.One <<= BitWidth - Amt;
+        Known.Zero <<= BitWidth - Amt;
+        Known2.One.lshrInPlace(Amt);
+        Known2.Zero.lshrInPlace(Amt);
+      }
+      Known.One |= Known2.One;
+      Known.Zero |= Known2.Zero;
+    }
+    break;
  case ISD::SIGN_EXTEND_INREG: {
    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
    unsigned EBits = EVT.getScalarSizeInBits();
--- a/test/CodeGen/X86/known-bits.ll
+++ b/test/CodeGen/X86/known-bits.ll
@ -302,17 +302,12 @@ declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
 define i32 @knownbits_fshl(i32 %a0) nounwind {
 ; X32-LABEL: knownbits_fshl:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $-1, %eax
-; X32-NEXT:    shrdl $27, %ecx, %eax
-; X32-NEXT:    andl $3, %eax
+; X32-NEXT:    movl $3, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: knownbits_fshl:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $-1, %eax
-; X64-NEXT:    shrdl $27, %edi, %eax
-; X64-NEXT:    andl $3, %eax
+; X64-NEXT:    movl $3, %eax
 ; X64-NEXT:    retq
  %1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5)
  %2 = and i32 %1, 3
@ -322,17 +317,12 @@ define i32 @knownbits_fshl(i32 %a0) nounwind {
 define i32 @knownbits_fshr(i32 %a0) nounwind {
 ; X32-LABEL: knownbits_fshr:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $-1, %eax
-; X32-NEXT:    shrdl $5, %ecx, %eax
-; X32-NEXT:    andl $3, %eax
+; X32-NEXT:    movl $3, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: knownbits_fshr:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $-1, %eax
-; X64-NEXT:    shrdl $5, %edi, %eax
-; X64-NEXT:    andl $3, %eax
+; X64-NEXT:    movl $3, %eax
 ; X64-NEXT:    retq
  %1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5)
  %2 = and i32 %1, 3