[SelectionDAG] rot(x, y) --> x iff ComputeNumSignBits(x) == BitWidth(x)

Rotating an 0/-1 value by any amount will always result in the same 0/-1 value
2024-11-23 11:13:28 +01:00 · 2020-01-24 10:35:19 +00:00 · 2020-01-24 10:35:19 +00:00 · c336c5c2b0
commit c336c5c2b0
parent c46fcc1c0b
5 changed files with 49 additions and 38 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -7414,6 +7414,10 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
    }
  }
  // Simplify the operands using demanded-bits information.
  if (SimplifyDemandedBits(SDValue(N, 0)))
    return SDValue(N, 0);
  // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
  if (N1.getOpcode() == ISD::TRUNCATE &&
      N1.getOperand(0).getOpcode() == ISD::AND) {
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -3743,6 +3743,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
  }
  case ISD::ROTL:
  case ISD::ROTR:
    Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
    // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
    if (Tmp == VTBits)
      return VTBits;
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
      unsigned RotAmt = C->getAPIntValue().urem(VTBits);
@ -3752,7 +3758,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
      // If we aren't rotating out all of the known-in sign bits, return the
      // number that are left.  This handles rotl(sext(x), 1) for example.
      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
      if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt);
    }
    break;
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -1586,6 +1586,15 @@ bool TargetLowering::SimplifyDemandedBits(
    }
    break;
  }
  case ISD::ROTL:
  case ISD::ROTR: {
    SDValue Op0 = Op.getOperand(0);
    // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
    if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
      return TLO.CombineTo(Op, Op0);
    break;
  }
  case ISD::BITREVERSE: {
    SDValue Src = Op.getOperand(0);
    APInt DemandedSrcBits = DemandedBits.reverseBits();
--- a/test/CodeGen/X86/rotate_vec.ll
+++ b/test/CodeGen/X86/rotate_vec.ll
@ -87,17 +87,10 @@ define <4 x i32> @rot_v4i32_zero_non_splat(<4 x i32> %x) {
 }
 define <4 x i32> @rot_v4i32_allsignbits(<4 x i32> %x, <4 x i32> %y) {
-; XOP-LABEL: rot_v4i32_allsignbits:
+; CHECK-LABEL: rot_v4i32_allsignbits:
-; XOP:       # %bb.0:
+; CHECK:       # %bb.0:
-; XOP-NEXT:    vpsrad $31, %xmm0, %xmm0
+; CHECK-NEXT:    vpsrad $31, %xmm0, %xmm0
-; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 ; XOP-NEXT:    retq
 ;
 ; AVX512-LABEL: rot_v4i32_allsignbits:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpsrad $31, %xmm0, %xmm0
 ; AVX512-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
  %1 = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
  %2 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %1, <4 x i32> %1, <4 x i32> %y)
  ret <4 x i32> %2
--- a/test/CodeGen/X86/sdiv_fix.ll
+++ b/test/CodeGen/X86/sdiv_fix.ll
@ -314,52 +314,52 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $72, %esp
 ; X86-NEXT:    movl 8(%ebp), %ecx
-; X86-NEXT:    movl 12(%ebp), %edx
+; X86-NEXT:    movl 12(%ebp), %eax
-; X86-NEXT:    movl 20(%ebp), %ebx
+; X86-NEXT:    movl 20(%ebp), %edx
-; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    sarl $31, %esi
 ; X86-NEXT:    movl %eax, %edi
 ; X86-NEXT:    sarl $31, %edi
 ; X86-NEXT:    movl %edi, %ebx
 ; X86-NEXT:    shldl $31, %eax, %ebx
 ; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    shldl $31, %ecx, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    shll $31, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    pushl %esi
-; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    pushl %edx
 ; X86-NEXT:    shldl $31, %edx, %esi
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    rorl %edi
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl 20(%ebp)
 ; X86-NEXT:    pushl 16(%ebp)
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    pushl %ecx
-; X86-NEXT:    pushl %edx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    calll __divti3
 ; X86-NEXT:    addl $32, %esp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    subl $1, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    subl $1, %ecx
+; X86-NEXT:    sbbl $0, %ebx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %esi, %esi
 ; X86-NEXT:    sbbl $0, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    testl %ebx, %ebx
 ; X86-NEXT:    sets %al
 ; X86-NEXT:    testl %edi, %edi
 ; X86-NEXT:    sets %cl
 ; X86-NEXT:    xorb %al, %cl
 ; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
-; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl 20(%ebp)
 ; X86-NEXT:    pushl 16(%ebp)
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-NEXT:    pushl %eax
@ -374,8 +374,8 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %ebx, %edx
 ; X86-NEXT:    leal -12(%ebp), %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi