1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

Revert rL357745: [SelectionDAG] Compute known bits of CopyFromReg

Certain optimisations from ConstantHoisting and CGP rely on Selection DAG not
seeing through to the constant in other blocks. Revert this patch while we come
up with a better way to handle that.

I will try to follow this up with some better tests.

llvm-svn: 358113
This commit is contained in:
David Green 2019-04-10 18:00:41 +00:00
parent 84a834c086
commit 9d042ff7dc
8 changed files with 27 additions and 42 deletions

View File

@ -31,7 +31,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@ -3208,25 +3207,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.One &= Known2.One;
break;
}
case ISD::CopyFromReg: {
auto R = cast<RegisterSDNode>(Op.getOperand(1));
const unsigned Reg = R->getReg();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
if (!TRI->isVirtualRegister(Reg))
break;
const MachineRegisterInfo *MRI = &MF->getRegInfo();
if (!MRI->hasOneDef(Reg))
break;
const FunctionLoweringInfo::LiveOutInfo *LOI = FLI->GetLiveOutRegInfo(Reg);
if (!LOI || LOI->Known.getBitWidth() != BitWidth)
break;
Known = LOI->Known;
break;
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);

View File

@ -19595,10 +19595,10 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
// If the operand types disagree, extend or truncate the shift amount to match.
// Since BT ignores high bits (like shifts) we can use anyextend for the extension.
// If the operand types disagree, extend the shift amount to match. Since
// BT ignores high bits (like shifts) we can use anyextend.
if (Src.getValueType() != BitNo.getValueType())
BitNo = DAG.getAnyExtOrTrunc(BitNo, dl, Src.getValueType());
BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
dl, MVT::i8);

View File

@ -110,8 +110,8 @@ main_body:
;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb:
;CHECK-NOT: s_waitcnt;
;CHECK-NOT: v_or_b32
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
;CHECK: v_or_b32
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
main_body:
%tmp = shl i32 %index, 4
@ -127,8 +127,10 @@ bb1: ; preds = %main_body
;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb_merged:
;CHECK-NOT: s_waitcnt;
;CHECK-NOT: v_or_b32
;CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
;CHECK: v_or_b32
;CHECK: v_or_b32
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
main_body:
%tmp = shl i32 %index, 4

View File

@ -183,11 +183,11 @@ entry:
ret void
}
define void @func2(i16 %int_val) nounwind {
define void @func2() nounwind {
entry:
%val = alloca i16
%old = alloca i16
store i16 %int_val, i16* %val
store i16 31, i16* %val
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
@ -197,7 +197,7 @@ entry:
; CHECK-BAREMETAL-NOT: __sync
%0 = atomicrmw umin i16* %val, i16 16 monotonic
store i16 %0, i16* %old
%uneg = sub i16 0, 2
%uneg = sub i16 0, 1
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
@ -249,7 +249,7 @@ entry:
; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
; CHECK-BAREMETAL: cmp
; CHECK-BAREMETAL-NOT: __sync
%uneg = sub i8 0, 2
%uneg = sub i8 0, 1
%1 = atomicrmw umin i8* %val, i8 %uneg monotonic
store i8 %1, i8* %old
; CHECK: ldrex

View File

@ -6,14 +6,16 @@
; Function Attrs: nounwind
define void @ec_GFp_nistp256_points_mul() {
; CHECK-LABEL: ec_GFp_nistp256_points_mul:
; CHECK: ld 4, 0(3)
; CHECK: li 3, 0
; CHECK: subfic 5, 4, 0
; CHECK: subfze 5, 3
; CHECK: ld 5, 0(3)
; CHECK: li 3, 127
; CHECK: li 4, 0
; CHECK: subfic 6, 5, 0
; CHECK: subfze 6, 4
; CHECK: sradi 7, 6, 63
; CHECK: srad 6, 6, 3
; CHECK: subfc 5, 5, 7
; CHECK: subfe 5, 4, 6
; CHECK: sradi 5, 5, 63
; CHECK: subfc 4, 4, 5
; CHECK: subfe 4, 3, 5
; CHECK: sradi 4, 4, 63
; With MemorySSA, everything is taken out of the loop by licm.
; Loads and stores to undef are treated as non-aliasing.

View File

@ -1,7 +1,7 @@
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp -systemz-subreg-liveness < %s | FileCheck %s
; Check for successful compilation.
; CHECK: lhi {{%r[0-9]+}}, -5
; CHECK: lhi %r0, -5
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
target triple = "s390x-ibm-linux"

View File

@ -6,8 +6,8 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386--netbsd"
; CHECK-LABEL: fn1
; CHECK: addl {{.*#+}} 4-byte Folded Reload
; CHECK: orl {{.*#+}} 4-byte Folded Reload
; CHECK: addl {{.*#+}} 4-byte Folded Reload
; CHECK: xorl {{.*#+}} 4-byte Folded Reload
; CHECK: xorl {{.*#+}} 4-byte Folded Reload
; CHECK: retl

View File

@ -11,8 +11,9 @@
define void @extractelt_mismatch_vector_element_type(i32 %arg, i1 %x) {
; CHECK-LABEL: extractelt_mismatch_vector_element_type:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movb $1, (%rax)
; CHECK-NEXT: movb $1, (%rax)
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: movb %al, (%rax)
; CHECK-NEXT: movb %al, (%rax)
; CHECK-NEXT: retq
bb:
%tmp = icmp ult i32 %arg, 0