Revert rL357745: [SelectionDAG] Compute known bits of CopyFromReg

Certain optimisations from ConstantHoisting and CGP rely on Selection DAG not seeing through to the constant in other blocks. Revert this patch while we come up with a better way to handle that. I will try to follow this up with some better tests. llvm-svn: 358113
2025-01-31 20:51:52 +01:00 · 2019-04-10 18:00:41 +00:00 · 2019-04-10 18:00:41 +00:00 · 9d042ff7dc
commit 9d042ff7dc
parent 84a834c086
8 changed files with 27 additions and 42 deletions
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -31,7 +31,6 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@ -3208,25 +3207,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
    Known.One &= Known2.One;
    break;
  }
-  case ISD::CopyFromReg: {
-    auto R = cast<RegisterSDNode>(Op.getOperand(1));
-    const unsigned Reg = R->getReg();
-
-    const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
-    if (!TRI->isVirtualRegister(Reg))
-      break;
-
-    const MachineRegisterInfo *MRI = &MF->getRegInfo();
-    if (!MRI->hasOneDef(Reg))
-      break;
-
-    const FunctionLoweringInfo::LiveOutInfo *LOI = FLI->GetLiveOutRegInfo(Reg);
-    if (!LOI || LOI->Known.getBitWidth() != BitWidth)
-      break;
-
-    Known = LOI->Known;
-    break;
-  }
  case ISD::FrameIndex:
  case ISD::TargetFrameIndex:
    TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -19595,10 +19595,10 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
      DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
    Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);

-  // If the operand types disagree, extend or truncate the shift amount to match.
-  // Since BT ignores high bits (like shifts) we can use anyextend for the extension.
+  // If the operand types disagree, extend the shift amount to match.  Since
+  // BT ignores high bits (like shifts) we can use anyextend.
  if (Src.getValueType() != BitNo.getValueType())
-    BitNo = DAG.getAnyExtOrTrunc(BitNo, dl, Src.getValueType());
+    BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);

  X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
                          dl, MVT::i8);
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
@ -110,8 +110,8 @@ main_body:

 ;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb:
 ;CHECK-NOT: s_waitcnt;
-;CHECK-NOT: v_or_b32
-;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
+;CHECK: v_or_b32
+;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
 main_body:
  %tmp = shl i32 %index, 4
@ -127,8 +127,10 @@ bb1:                                              ; preds = %main_body

 ;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb_merged:
 ;CHECK-NOT: s_waitcnt;
-;CHECK-NOT: v_or_b32
-;CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
+;CHECK: v_or_b32
+;CHECK: v_or_b32
+;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
 main_body:
  %tmp = shl i32 %index, 4
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@ -183,11 +183,11 @@ entry:
  ret void
 }

-define void @func2(i16 %int_val) nounwind {
+define void @func2() nounwind {
 entry:
  %val = alloca i16
  %old = alloca i16
-  store i16 %int_val, i16* %val
+  store i16 31, i16* %val
  ; CHECK: ldrex
  ; CHECK: cmp
  ; CHECK: strex
@ -197,7 +197,7 @@ entry:
  ; CHECK-BAREMETAL-NOT: __sync
  %0 = atomicrmw umin i16* %val, i16 16 monotonic
  store i16 %0, i16* %old
-  %uneg = sub i16 0, 2
+  %uneg = sub i16 0, 1
  ; CHECK: ldrex
  ; CHECK: cmp
  ; CHECK: strex
@ -249,7 +249,7 @@ entry:
  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
  ; CHECK-BAREMETAL: cmp
  ; CHECK-BAREMETAL-NOT: __sync
-  %uneg = sub i8 0, 2
+  %uneg = sub i8 0, 1
  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
  store i8 %1, i8* %old
  ; CHECK: ldrex
--- a/test/CodeGen/PowerPC/pr35688.ll
+++ b/test/CodeGen/PowerPC/pr35688.ll
@ -6,14 +6,16 @@
 ; Function Attrs: nounwind
 define void @ec_GFp_nistp256_points_mul() {
 ; CHECK-LABEL: ec_GFp_nistp256_points_mul:
-; CHECK:    ld 4, 0(3)
-; CHECK:    li 3, 0
-; CHECK:    subfic 5, 4, 0
-; CHECK:    subfze 5, 3
+; CHECK:    ld 5, 0(3)
+; CHECK:    li 3, 127
+; CHECK:    li 4, 0
+; CHECK:    subfic 6, 5, 0
+; CHECK:    subfze 6, 4
+; CHECK:    sradi 7, 6, 63
+; CHECK:    srad 6, 6, 3
+; CHECK:    subfc 5, 5, 7
+; CHECK:    subfe 5, 4, 6
 ; CHECK:    sradi 5, 5, 63
-; CHECK:    subfc 4, 4, 5
-; CHECK:    subfe 4, 3, 5
-; CHECK:    sradi 4, 4, 63

 ; With MemorySSA, everything is taken out of the loop by licm.
 ; Loads and stores to undef are treated as non-aliasing.
--- a/test/CodeGen/SystemZ/subregliveness-04.ll
+++ b/test/CodeGen/SystemZ/subregliveness-04.ll
@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp -systemz-subreg-liveness < %s | FileCheck %s

 ; Check for successful compilation.
-; CHECK: lhi {{%r[0-9]+}}, -5
+; CHECK: lhi %r0, -5

 target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
 target triple = "s390x-ibm-linux"
--- a/test/CodeGen/X86/fold-tied-op.ll
+++ b/test/CodeGen/X86/fold-tied-op.ll
@ -6,8 +6,8 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 target triple = "i386--netbsd"

 ; CHECK-LABEL: fn1
-; CHECK:       addl {{.*#+}} 4-byte Folded Reload
 ; CHECK:       orl  {{.*#+}} 4-byte Folded Reload
+; CHECK:       addl {{.*#+}} 4-byte Folded Reload
 ; CHECK:       xorl {{.*#+}} 4-byte Folded Reload
 ; CHECK:       xorl {{.*#+}} 4-byte Folded Reload
 ; CHECK:       retl
--- a/test/CodeGen/X86/pr28444.ll
+++ b/test/CodeGen/X86/pr28444.ll
@ -11,8 +11,9 @@
 define void @extractelt_mismatch_vector_element_type(i32 %arg, i1 %x) {
 ; CHECK-LABEL: extractelt_mismatch_vector_element_type:
 ; CHECK:       # %bb.0: # %bb
-; CHECK-NEXT:    movb $1, (%rax)
-; CHECK-NEXT:    movb $1, (%rax)
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    movb %al, (%rax)
+; CHECK-NEXT:    movb %al, (%rax)
 ; CHECK-NEXT:    retq
 bb:
  %tmp = icmp ult i32 %arg, 0