[DAG] computeKnownBits - Move (most) ISD::SHL handling into KnownBits::shl

As discussed on D90527, we should be be trying to move shift handling functionality into KnownBits to avoid code duplication in SelectionDAG/GlobalISel/ValueTracking. The refactor to use the KnownBits fixed/min/max constant helpers allows us to hit a couple of cases that we were missing before. We still need the getValidMinimumShiftAmountConstant case as KnownBits doesn't handle per-element vector cases.
2024-11-23 03:02:36 +01:00 · 2020-11-03 13:49:00 +00:00 · 2020-11-03 13:49:00 +00:00 · fc543f8a95
commit fc543f8a95
parent f3c13cd376
5 changed files with 47 additions and 32 deletions
--- a/include/llvm/Support/KnownBits.h
+++ b/include/llvm/Support/KnownBits.h
@ -270,6 +270,10 @@ public:
  /// Compute known bits for smin(LHS, RHS).
  static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS);

+  /// Compute known bits for shl(LHS, RHS).
+  /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
+  static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS);
+
  /// Insert the bits from a smaller known bits starting at bitPosition.
  void insertBits(const KnownBits &SubBits, unsigned BitPosition) {
    Zero.insertBits(SubBits.Zero, BitPosition);
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -2960,19 +2960,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
  }
  case ISD::SHL:
    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
-    if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
-      unsigned Shift = ShAmt->getZExtValue();
-      Known.Zero <<= Shift;
-      Known.One <<= Shift;
-      // Low bits are known zero.
-      Known.Zero.setLowBits(Shift);
-      break;
-    }
-
-    // No matter the shift amount, the trailing zeros will stay zero.
-    Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros());
-    Known.One.clearAllBits();
+    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known = KnownBits::shl(Known, Known2);

    // Minimum shift low bits are known zero.
    if (const APInt *ShMinAmt =
--- a/lib/Support/KnownBits.cpp
+++ b/lib/Support/KnownBits.cpp
@ -145,6 +145,30 @@ KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) {
  return Flip(umax(Flip(LHS), Flip(RHS)));
 }

+KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS) {
+  unsigned BitWidth = LHS.getBitWidth();
+  KnownBits Known(BitWidth);
+
+  // If the shift amount is a valid constant then transform LHS directly.
+  if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
+    unsigned Shift = RHS.getConstant().getZExtValue();
+    Known = LHS;
+    Known.Zero <<= Shift;
+    Known.One <<= Shift;
+    // Low bits are known zero.
+    Known.Zero.setLowBits(Shift);
+    return Known;
+  }
+
+  // Minimum shift amount low bits are known zero.
+  if (RHS.getMinValue().ult(BitWidth))
+    Known.Zero.setLowBits(RHS.getMinValue().getZExtValue());
+
+  // No matter the shift amount, the trailing zeros will stay zero.
+  Known.Zero.setLowBits(LHS.countMinTrailingZeros());
+  return Known;
+}
+
 KnownBits KnownBits::abs() const {
  // If the source's MSB is zero then we know the rest of the bits already.
  if (isNonNegative())
--- a/test/CodeGen/PowerPC/pr44183.ll
+++ b/test/CodeGen/PowerPC/pr44183.ll
@ -1,9 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:     -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s
+
 %struct.m.2.5.8.11 = type { %struct.l.0.3.6.9, [7 x i8], %struct.a.1.4.7.10 }
 %struct.l.0.3.6.9 = type { i8 }
 %struct.a.1.4.7.10 = type { [27 x i8], [0 x i32], [4 x i8] }
+
 define void @_ZN1m1nEv(%struct.m.2.5.8.11* %this) local_unnamed_addr nounwind align 2 {
 ; CHECK-LABEL: _ZN1m1nEv:
 ; CHECK:       # %bb.0: # %entry
@ -12,15 +14,12 @@ define void @_ZN1m1nEv(%struct.m.2.5.8.11* %this) local_unnamed_addr nounwind al
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
 ; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    ld r4, 16(r30)
-; CHECK-NEXT:    ld r5, 8(r30)
-; CHECK-NEXT:    lwz r6, 36(r30)
-; CHECK-NEXT:    rldicl r5, r5, 60, 4
-; CHECK-NEXT:    sldi r4, r4, 60
-; CHECK-NEXT:    or r4, r4, r5
-; CHECK-NEXT:    rlwinm r3, r4, 31, 0, 0
-; CHECK-NEXT:    clrlwi r4, r6, 31
-; CHECK-NEXT:    or r4, r4, r3
+; CHECK-NEXT:    ld r3, 8(r3)
+; CHECK-NEXT:    lwz r4, 36(r30)
+; CHECK-NEXT:    rldicl r3, r3, 60, 4
+; CHECK-NEXT:    slwi r3, r3, 31
+; CHECK-NEXT:    clrlwi r4, r4, 31
+; CHECK-NEXT:    rlwimi r4, r3, 0, 0, 0
 ; CHECK-NEXT:    bl _ZN1llsE1d
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    ld r3, 16(r30)
@ -29,7 +28,7 @@ define void @_ZN1m1nEv(%struct.m.2.5.8.11* %this) local_unnamed_addr nounwind al
 ; CHECK-NEXT:    sldi r3, r3, 60
 ; CHECK-NEXT:    or r3, r3, r4
 ; CHECK-NEXT:    sldi r3, r3, 31
-; CHECK-NEXT:    clrldi r4, r3, 32
+; CHECK-NEXT:    rlwinm r4, r3, 0, 0, 0
 ; CHECK-NEXT:    bl _ZN1llsE1d
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 48
--- a/test/CodeGen/X86/pr32282.ll
+++ b/test/CodeGen/X86/pr32282.ll
@ -13,18 +13,17 @@ define void @foo(i64 %x) nounwind {
 ; X86-LABEL: foo:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    movl d, %eax
+; X86-NEXT:    movl d+4, %eax
 ; X86-NEXT:    notl %eax
-; X86-NEXT:    movl d+4, %ecx
+; X86-NEXT:    movl d, %ecx
 ; X86-NEXT:    notl %ecx
-; X86-NEXT:    andl $701685459, %ecx # imm = 0x29D2DED3
-; X86-NEXT:    andl $-566231040, %eax # imm = 0xDE400000
-; X86-NEXT:    shrdl $21, %ecx, %eax
-; X86-NEXT:    shrl $21, %ecx
-; X86-NEXT:    addl $7, %eax
-; X86-NEXT:    adcl $0, %ecx
-; X86-NEXT:    pushl %ecx
+; X86-NEXT:    andl $-566231040, %ecx # imm = 0xDE400000
+; X86-NEXT:    andl $701685459, %eax # imm = 0x29D2DED3
+; X86-NEXT:    shrdl $21, %eax, %ecx
+; X86-NEXT:    shrl $21, %eax
+; X86-NEXT:    addl $7, %ecx
 ; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ecx
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll __divdi3