mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
Revert rL357745: [SelectionDAG] Compute known bits of CopyFromReg
Certain optimisations from ConstantHoisting and CGP rely on Selection DAG not seeing through to the constant in other blocks. Revert this patch while we come up with a better way to handle that. I will try to follow this up with some better tests. llvm-svn: 358113
This commit is contained in:
parent
84a834c086
commit
9d042ff7dc
@ -31,7 +31,6 @@
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineMemOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/RuntimeLibcalls.h"
|
||||
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
|
||||
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
||||
@ -3208,25 +3207,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
|
||||
Known.One &= Known2.One;
|
||||
break;
|
||||
}
|
||||
case ISD::CopyFromReg: {
|
||||
auto R = cast<RegisterSDNode>(Op.getOperand(1));
|
||||
const unsigned Reg = R->getReg();
|
||||
|
||||
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
|
||||
if (!TRI->isVirtualRegister(Reg))
|
||||
break;
|
||||
|
||||
const MachineRegisterInfo *MRI = &MF->getRegInfo();
|
||||
if (!MRI->hasOneDef(Reg))
|
||||
break;
|
||||
|
||||
const FunctionLoweringInfo::LiveOutInfo *LOI = FLI->GetLiveOutRegInfo(Reg);
|
||||
if (!LOI || LOI->Known.getBitWidth() != BitWidth)
|
||||
break;
|
||||
|
||||
Known = LOI->Known;
|
||||
break;
|
||||
}
|
||||
case ISD::FrameIndex:
|
||||
case ISD::TargetFrameIndex:
|
||||
TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);
|
||||
|
@ -19595,10 +19595,10 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
|
||||
DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
|
||||
Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
|
||||
|
||||
// If the operand types disagree, extend or truncate the shift amount to match.
|
||||
// Since BT ignores high bits (like shifts) we can use anyextend for the extension.
|
||||
// If the operand types disagree, extend the shift amount to match. Since
|
||||
// BT ignores high bits (like shifts) we can use anyextend.
|
||||
if (Src.getValueType() != BitNo.getValueType())
|
||||
BitNo = DAG.getAnyExtOrTrunc(BitNo, dl, Src.getValueType());
|
||||
BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
|
||||
|
||||
X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
|
||||
dl, MVT::i8);
|
||||
|
@ -110,8 +110,8 @@ main_body:
|
||||
|
||||
;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb:
|
||||
;CHECK-NOT: s_waitcnt;
|
||||
;CHECK-NOT: v_or_b32
|
||||
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
|
||||
;CHECK: v_or_b32
|
||||
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
|
||||
define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
|
||||
main_body:
|
||||
%tmp = shl i32 %index, 4
|
||||
@ -127,8 +127,10 @@ bb1: ; preds = %main_body
|
||||
|
||||
;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb_merged:
|
||||
;CHECK-NOT: s_waitcnt;
|
||||
;CHECK-NOT: v_or_b32
|
||||
;CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
|
||||
;CHECK: v_or_b32
|
||||
;CHECK: v_or_b32
|
||||
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
|
||||
;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
|
||||
define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
|
||||
main_body:
|
||||
%tmp = shl i32 %index, 4
|
||||
|
@ -183,11 +183,11 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func2(i16 %int_val) nounwind {
|
||||
define void @func2() nounwind {
|
||||
entry:
|
||||
%val = alloca i16
|
||||
%old = alloca i16
|
||||
store i16 %int_val, i16* %val
|
||||
store i16 31, i16* %val
|
||||
; CHECK: ldrex
|
||||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
@ -197,7 +197,7 @@ entry:
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%0 = atomicrmw umin i16* %val, i16 16 monotonic
|
||||
store i16 %0, i16* %old
|
||||
%uneg = sub i16 0, 2
|
||||
%uneg = sub i16 0, 1
|
||||
; CHECK: ldrex
|
||||
; CHECK: cmp
|
||||
; CHECK: strex
|
||||
@ -249,7 +249,7 @@ entry:
|
||||
; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
|
||||
; CHECK-BAREMETAL: cmp
|
||||
; CHECK-BAREMETAL-NOT: __sync
|
||||
%uneg = sub i8 0, 2
|
||||
%uneg = sub i8 0, 1
|
||||
%1 = atomicrmw umin i8* %val, i8 %uneg monotonic
|
||||
store i8 %1, i8* %old
|
||||
; CHECK: ldrex
|
||||
|
@ -6,14 +6,16 @@
|
||||
; Function Attrs: nounwind
|
||||
define void @ec_GFp_nistp256_points_mul() {
|
||||
; CHECK-LABEL: ec_GFp_nistp256_points_mul:
|
||||
; CHECK: ld 4, 0(3)
|
||||
; CHECK: li 3, 0
|
||||
; CHECK: subfic 5, 4, 0
|
||||
; CHECK: subfze 5, 3
|
||||
; CHECK: ld 5, 0(3)
|
||||
; CHECK: li 3, 127
|
||||
; CHECK: li 4, 0
|
||||
; CHECK: subfic 6, 5, 0
|
||||
; CHECK: subfze 6, 4
|
||||
; CHECK: sradi 7, 6, 63
|
||||
; CHECK: srad 6, 6, 3
|
||||
; CHECK: subfc 5, 5, 7
|
||||
; CHECK: subfe 5, 4, 6
|
||||
; CHECK: sradi 5, 5, 63
|
||||
; CHECK: subfc 4, 4, 5
|
||||
; CHECK: subfe 4, 3, 5
|
||||
; CHECK: sradi 4, 4, 63
|
||||
|
||||
; With MemorySSA, everything is taken out of the loop by licm.
|
||||
; Loads and stores to undef are treated as non-aliasing.
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp -systemz-subreg-liveness < %s | FileCheck %s
|
||||
|
||||
; Check for successful compilation.
|
||||
; CHECK: lhi {{%r[0-9]+}}, -5
|
||||
; CHECK: lhi %r0, -5
|
||||
|
||||
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
|
||||
target triple = "s390x-ibm-linux"
|
||||
|
@ -6,8 +6,8 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
|
||||
target triple = "i386--netbsd"
|
||||
|
||||
; CHECK-LABEL: fn1
|
||||
; CHECK: addl {{.*#+}} 4-byte Folded Reload
|
||||
; CHECK: orl {{.*#+}} 4-byte Folded Reload
|
||||
; CHECK: addl {{.*#+}} 4-byte Folded Reload
|
||||
; CHECK: xorl {{.*#+}} 4-byte Folded Reload
|
||||
; CHECK: xorl {{.*#+}} 4-byte Folded Reload
|
||||
; CHECK: retl
|
||||
|
@ -11,8 +11,9 @@
|
||||
define void @extractelt_mismatch_vector_element_type(i32 %arg, i1 %x) {
|
||||
; CHECK-LABEL: extractelt_mismatch_vector_element_type:
|
||||
; CHECK: # %bb.0: # %bb
|
||||
; CHECK-NEXT: movb $1, (%rax)
|
||||
; CHECK-NEXT: movb $1, (%rax)
|
||||
; CHECK-NEXT: movb $1, %al
|
||||
; CHECK-NEXT: movb %al, (%rax)
|
||||
; CHECK-NEXT: movb %al, (%rax)
|
||||
; CHECK-NEXT: retq
|
||||
bb:
|
||||
%tmp = icmp ult i32 %arg, 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user