mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
Temporairly evert "[SCEV] Improve modelling for (null) pointer constants"
This appears to have broken ubsan bot: https://lab.llvm.org/buildbot/#/builders/85/builds/3062 https://reviews.llvm.org/D98147#2623549 It looks like LSR needs some kind of a change around insertion point handling. Reverting until i have a fix. This reverts commit 61f006ac655431bd44b9e089f74c73bec0c1a48c.
This commit is contained in:
parent
dab4c85276
commit
7c83c3a8e7
@ -1065,23 +1065,15 @@ const SCEV *ScalarEvolution::getPtrToIntExpr(const SCEV *Op, Type *Ty,
|
||||
return getTruncateOrZeroExtend(S, Ty);
|
||||
|
||||
// If not, is this expression something we can't reduce any further?
|
||||
if (auto *U = dyn_cast<SCEVUnknown>(Op)) {
|
||||
if (isa<SCEVUnknown>(Op)) {
|
||||
// Create an explicit cast node.
|
||||
// We can reuse the existing insert position since if we get here,
|
||||
// we won't have made any changes which would invalidate it.
|
||||
Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType());
|
||||
assert(getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(
|
||||
Op->getType())) == getDataLayout().getTypeSizeInBits(IntPtrTy) &&
|
||||
"We can only model ptrtoint if SCEV's effective (integer) type is "
|
||||
"sufficiently wide to represent all possible pointer values.");
|
||||
|
||||
// Perform some basic constant folding. If the operand of the ptr2int cast
|
||||
// is a null pointer, don't create a ptr2int SCEV expression (that will be
|
||||
// left as-is), but produce a zero constant.
|
||||
// NOTE: We could handle a more general case, but lack motivational cases.
|
||||
if (isa<ConstantPointerNull>(U->getValue()))
|
||||
return getZero(Ty);
|
||||
|
||||
// Create an explicit cast node.
|
||||
// We can reuse the existing insert position since if we get here,
|
||||
// we won't have made any changes which would invalidate it.
|
||||
SCEV *S = new (SCEVAllocator)
|
||||
SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy);
|
||||
UniqueSCEVs.InsertNode(S, IP);
|
||||
@ -6374,6 +6366,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
|
||||
return getUnknown(UndefValue::get(V->getType()));
|
||||
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
|
||||
return getConstant(CI);
|
||||
else if (isa<ConstantPointerNull>(V))
|
||||
// FIXME: we shouldn't special-case null pointer constant.
|
||||
return getZero(V->getType());
|
||||
else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
|
||||
return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee());
|
||||
else if (!isa<ConstantExpr>(V))
|
||||
@ -6713,6 +6708,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
|
||||
Value *Ptr = U->getOperand(0);
|
||||
const SCEV *Op = getSCEV(Ptr);
|
||||
Type *DstIntTy = U->getType();
|
||||
// SCEV doesn't have constant pointer expression type, but it supports
|
||||
// nullptr constant (and only that one), which is modelled in SCEV as a
|
||||
// zero integer constant. So just skip the ptrtoint cast for constants.
|
||||
if (isa<SCEVConstant>(Op))
|
||||
return getTruncateOrZeroExtend(Op, DstIntTy);
|
||||
Type *PtrTy = Ptr->getType();
|
||||
Type *IntPtrTy = getDataLayout().getIntPtrType(PtrTy);
|
||||
// But only if effective SCEV (integer) type is wide enough to represent
|
||||
|
@ -82,7 +82,7 @@ define i32 @test2() nounwind uwtable readonly {
|
||||
; CHECK-NEXT: %next = getelementptr inbounds %struct.ListNode, %struct.ListNode* %n.01, i64 0, i32 0
|
||||
; CHECK-NEXT: --> %n.01 U: full-set S: full-set Exits: @node1 LoopDispositions: { %for.body: Variant }
|
||||
; CHECK-NEXT: %1 = load %struct.ListNode*, %struct.ListNode** %next, align 8
|
||||
; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: null LoopDispositions: { %for.body: Variant }
|
||||
; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Variant }
|
||||
; CHECK-NEXT: Determining loop execution counts for: @test2
|
||||
; CHECK-NEXT: Loop %for.body: backedge-taken count is 4
|
||||
; CHECK-NEXT: Loop %for.body: max backedge-taken count is 4
|
||||
|
@ -531,17 +531,17 @@ define void @crash(i8* %ptr) {
|
||||
; CHECK-LABEL: 'crash'
|
||||
; CHECK-NEXT: Classifying expressions for: @crash
|
||||
; CHECK-NEXT: %text.addr.5 = phi i8* [ %incdec.ptr112, %while.cond111 ], [ null, %while.body ]
|
||||
; CHECK-NEXT: --> {null,+,-1}<nw><%while.cond111> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %while.cond111: Computable, %while.body: Variant }
|
||||
; CHECK-NEXT: --> {0,+,-1}<nw><%while.cond111> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %while.cond111: Computable, %while.body: Variant }
|
||||
; CHECK-NEXT: %incdec.ptr112 = getelementptr inbounds i8, i8* %text.addr.5, i64 -1
|
||||
; CHECK-NEXT: --> {(-1 + null)<nuw><nsw>,+,-1}<nw><%while.cond111> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %while.cond111: Computable, %while.body: Variant }
|
||||
; CHECK-NEXT: --> {-1,+,-1}<nw><%while.cond111> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %while.cond111: Computable, %while.body: Variant }
|
||||
; CHECK-NEXT: %lastout.2271 = phi i8* [ %incdec.ptr126, %while.body125 ], [ %ptr, %while.end117 ]
|
||||
; CHECK-NEXT: --> {%ptr,+,1}<nuw><%while.body125> U: full-set S: full-set Exits: {(-2 + null)<nuw><nsw>,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
|
||||
; CHECK-NEXT: --> {%ptr,+,1}<nuw><%while.body125> U: full-set S: full-set Exits: {-2,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
|
||||
; CHECK-NEXT: %incdec.ptr126 = getelementptr inbounds i8, i8* %lastout.2271, i64 1
|
||||
; CHECK-NEXT: --> {(1 + %ptr)<nuw>,+,1}<nuw><%while.body125> U: [1,0) S: [1,0) Exits: {(-1 + null)<nuw><nsw>,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
|
||||
; CHECK-NEXT: --> {(1 + %ptr)<nuw>,+,1}<nuw><%while.body125> U: [1,0) S: [1,0) Exits: {-1,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
|
||||
; CHECK-NEXT: Determining loop execution counts for: @crash
|
||||
; CHECK-NEXT: Loop %while.body125: backedge-taken count is {(-2 + (-1 * %ptr) + null),+,-1}<nw><%while.cond111>
|
||||
; CHECK-NEXT: Loop %while.body125: backedge-taken count is {(-2 + (-1 * %ptr)),+,-1}<nw><%while.cond111>
|
||||
; CHECK-NEXT: Loop %while.body125: max backedge-taken count is -1
|
||||
; CHECK-NEXT: Loop %while.body125: Predicated backedge-taken count is {(-2 + (-1 * %ptr) + null),+,-1}<nw><%while.cond111>
|
||||
; CHECK-NEXT: Loop %while.body125: Predicated backedge-taken count is {(-2 + (-1 * %ptr)),+,-1}<nw><%while.cond111>
|
||||
; CHECK-NEXT: Predicates:
|
||||
; CHECK: Loop %while.body125: Trip multiple is 1
|
||||
; CHECK-NEXT: Loop %while.cond111: Unpredictable backedge-taken count.
|
||||
|
@ -6,7 +6,7 @@ define void @a(<vscale x 1 x i64> *%p) {
|
||||
; CHECK-LABEL: 'a'
|
||||
; CHECK-NEXT: Classifying expressions for: @a
|
||||
; CHECK-NEXT: %1 = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* null, i32 3
|
||||
; CHECK-NEXT: --> ((3 * sizeof(<vscale x 4 x i32>)) + null) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
|
||||
; CHECK-NEXT: --> (3 * sizeof(<vscale x 4 x i32>)) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
|
||||
; CHECK-NEXT: %2 = getelementptr <vscale x 1 x i64>, <vscale x 1 x i64>* %p, i32 1
|
||||
; CHECK-NEXT: --> (sizeof(<vscale x 1 x i64>) + %p) U: full-set S: full-set
|
||||
; CHECK-NEXT: Determining loop execution counts for: @a
|
||||
|
@ -185,7 +185,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
; CHECK: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
|
||||
; CHECK: undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc
|
||||
; CHECK: %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
|
||||
; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0, 0 :: (load 8 from %ir.308, addrspace 4)
|
||||
; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0, 0 :: (load 8 from %ir.304, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0, 0 :: (load 16 from %ir.223, addrspace 4)
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0, 0 :: (load 16 from %ir.230, addrspace 4)
|
||||
@ -202,16 +202,16 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
; CHECK: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
|
||||
; CHECK: undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc
|
||||
; CHECK: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
|
||||
; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0, 0 :: (load 8 from %ir.320, addrspace 4)
|
||||
; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0, 0 :: (load 8 from %ir.316, addrspace 4)
|
||||
; CHECK: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc
|
||||
; CHECK: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0, 0 :: (load 16 from %ir.282, addrspace 4)
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0, 0 :: (load 16 from %ir.278, addrspace 4)
|
||||
; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0, 0 :: (load 4 from `i32 addrspace(4)* undef`, addrspace 4)
|
||||
; CHECK: KILL %411.sub0, %411.sub1
|
||||
; CHECK: KILL undef %488:sreg_64
|
||||
; CHECK: KILL %71.sub0_sub1
|
||||
; CHECK: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 3, implicit-def dead $scc
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0, 0 :: (load 16 from %ir.291, addrspace 4)
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0, 0 :: (load 16 from %ir.287, addrspace 4)
|
||||
; CHECK: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
|
||||
; CHECK: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
|
||||
; CHECK: undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc
|
||||
@ -234,13 +234,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
; CHECK: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
||||
; CHECK: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc
|
||||
; CHECK: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0, 0 :: (load 16 from %ir.351, addrspace 4)
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0, 0 :: (load 16 from %ir.347, addrspace 4)
|
||||
; CHECK: undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc
|
||||
; CHECK: %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0, 0 :: (load 16 from %ir.357, addrspace 4)
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0, 0 :: (load 16 from %ir.353, addrspace 4)
|
||||
; CHECK: undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc
|
||||
; CHECK: %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0, 0 :: (load 16 from %ir.363, addrspace 4)
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0, 0 :: (load 16 from %ir.359, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
|
@ -19,14 +19,15 @@ define dso_local void @test(i64 %arg, i64 %arg1) {
|
||||
; CHECK-NEXT: std r0, 16(r1)
|
||||
; CHECK-NEXT: stdu r1, -64(r1)
|
||||
; CHECK-NEXT: sub r30, r4, r3
|
||||
; CHECK-NEXT: li r29, -4
|
||||
; CHECK-NEXT: li r29, 0
|
||||
; CHECK-NEXT: .p2align 5
|
||||
; CHECK-NEXT: .LBB0_3: # %bb5
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lfsu f1, 4(r29)
|
||||
; CHECK-NEXT: lfsx f1, 0, r29
|
||||
; CHECK-NEXT: bl lrint
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: addi r30, r30, -1
|
||||
; CHECK-NEXT: addi r29, r29, 4
|
||||
; CHECK-NEXT: cmpldi r30, 0
|
||||
; CHECK-NEXT: bne cr0, .LBB0_3
|
||||
; CHECK-NEXT: # %bb.4: # %bb15
|
||||
|
@ -265,17 +265,16 @@ define void @func_48785(half %arg) #0 {
|
||||
; CHECK-NEXT: stdu r1, -64(r1)
|
||||
; CHECK-NEXT: fmr f31, f1
|
||||
; CHECK-NEXT: li r30, 0
|
||||
; CHECK-NEXT: li r29, 0
|
||||
; CHECK-NEXT: .p2align 5
|
||||
; CHECK-NEXT: .LBB3_1: # %bb1
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: fmr f1, f31
|
||||
; CHECK-NEXT: sldi r29, r30, 1
|
||||
; CHECK-NEXT: bl __gnu_f2h_ieee
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: addi r29, r29, -12
|
||||
; CHECK-NEXT: sth r3, 0(r30)
|
||||
; CHECK-NEXT: addi r30, r30, 24
|
||||
; CHECK-NEXT: cmpldi r29, 0
|
||||
; CHECK-NEXT: addi r30, r30, 12
|
||||
; CHECK-NEXT: sth r3, 0(r29)
|
||||
; CHECK-NEXT: cmpldi r30, 0
|
||||
; CHECK-NEXT: bne+ cr0, .LBB3_1
|
||||
; CHECK-NEXT: # %bb.2: # %bb5
|
||||
;
|
||||
|
@ -4,11 +4,11 @@
|
||||
; RUN: >/dev/null | FileCheck %s
|
||||
define dso_local void @sha512() #0 {
|
||||
;CHECK: prolog:
|
||||
;CHECK: %18:g8rc = ADD8 %24:g8rc, %23:g8rc
|
||||
;CHECK: %16:g8rc = ADD8 %21:g8rc, %20:g8rc
|
||||
;CHECK: epilog:
|
||||
;CHECK: %28:g8rc_and_g8rc_nox0 = PHI %6:g8rc_and_g8rc_nox0, %bb.3, %22:g8rc_and_g8rc_nox0, %bb.4
|
||||
;CHECK-NEXT: %29:g8rc = PHI %12:g8rc, %bb.3, %16:g8rc, %bb.4
|
||||
;CHECK-NEXT: %30:g8rc = PHI %15:g8rc, %bb.3, %19:g8rc, %bb.4
|
||||
;CHECK: %23:g8rc_and_g8rc_nox0 = PHI %5:g8rc_and_g8rc_nox0, %bb.3, %18:g8rc_and_g8rc_nox0, %bb.4
|
||||
;CHECK-NEXT: %24:g8rc = PHI %6:g8rc, %bb.3, %16:g8rc, %bb.4
|
||||
;CHECK-NEXT: %25:g8rc = PHI %6:g8rc, %bb.3, %19:g8rc, %bb.4
|
||||
br label %1
|
||||
|
||||
1: ; preds = %1, %0
|
||||
|
@ -192,9 +192,9 @@
|
||||
; SCEV: %t = bitcast i1* getelementptr (i1, i1* inttoptr (i32 1 to i1*), i32 -2) to i1*
|
||||
; SCEV: --> (-2 + inttoptr (i32 1 to i1*))
|
||||
; SCEV: Classifying expressions for: @hoo8
|
||||
; SCEV: --> (-1 + null)<nuw><nsw> U: [-1,0) S: [-1,0)
|
||||
; SCEV: --> -1
|
||||
; SCEV: Classifying expressions for: @hoo1
|
||||
; SCEV: --> (-1 + null)<nuw><nsw> U: [-1,0) S: [-1,0)
|
||||
; SCEV: --> -1
|
||||
|
||||
define i8* @goo8() nounwind {
|
||||
%t = bitcast i8* getelementptr (i8, i8* inttoptr (i32 1 to i8*), i32 -1) to i8*
|
||||
@ -408,13 +408,13 @@ define i64 @fi() nounwind {
|
||||
; TO: }
|
||||
; SCEV: Classifying expressions for: @fM
|
||||
; SCEV: %t = bitcast i64* getelementptr (i64, i64* null, i32 1) to i64*
|
||||
; SCEV: --> (8 + null)<nuw><nsw> U: [8,9) S: [8,9)
|
||||
; SCEV: --> 8
|
||||
; SCEV: Classifying expressions for: @fN
|
||||
; SCEV: %t = bitcast i64* getelementptr ({ i64, i64 }, { i64, i64 }* null, i32 0, i32 1) to i64*
|
||||
; SCEV: --> (8 + null)<nuw><nsw> U: [8,9) S: [8,9)
|
||||
; SCEV: --> 8
|
||||
; SCEV: Classifying expressions for: @fO
|
||||
; SCEV: %t = bitcast i64* getelementptr ([2 x i64], [2 x i64]* null, i32 0, i32 1) to i64*
|
||||
; SCEV: --> (8 + null)<nuw><nsw> U: [8,9) S: [8,9)
|
||||
; SCEV: --> 8
|
||||
|
||||
define i64* @fM() nounwind {
|
||||
%t = bitcast i64* getelementptr (i64, i64* null, i32 1) to i64*
|
||||
|
@ -150,13 +150,13 @@ define i8 @testnullptrint(i8* %buf, i8* %end) nounwind {
|
||||
; PTR64-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]]
|
||||
; PTR64-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
|
||||
; PTR64-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
|
||||
; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP4]]
|
||||
; PTR64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i8*
|
||||
; PTR64-NEXT: br label [[LOOP:%.*]]
|
||||
; PTR64: loop:
|
||||
; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
|
||||
; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1
|
||||
; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1
|
||||
; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]]
|
||||
; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[TMP5]]
|
||||
; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
|
||||
; PTR64: exit.loopexit:
|
||||
; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
|
||||
@ -171,16 +171,16 @@ define i8 @testnullptrint(i8* %buf, i8* %end) nounwind {
|
||||
; PTR32-NEXT: [[BI:%.*]] = ptrtoint i8* [[BUF:%.*]] to i32
|
||||
; PTR32-NEXT: [[EI:%.*]] = ptrtoint i8* [[END:%.*]] to i32
|
||||
; PTR32-NEXT: [[CNT:%.*]] = sub i32 [[EI]], [[BI]]
|
||||
; PTR32-NEXT: [[CNT1:%.*]] = inttoptr i32 [[CNT]] to i8*
|
||||
; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i32 0, [[CNT]]
|
||||
; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
|
||||
; PTR32: preheader:
|
||||
; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i32 [[CNT]]
|
||||
; PTR32-NEXT: br label [[LOOP:%.*]]
|
||||
; PTR32: loop:
|
||||
; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
|
||||
; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1
|
||||
; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1
|
||||
; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]]
|
||||
; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[CNT1]]
|
||||
; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
|
||||
; PTR32: exit.loopexit:
|
||||
; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
|
||||
|
@ -11,12 +11,13 @@ define dso_local void @Widen_i32_i8ptr() local_unnamed_addr {
|
||||
; CHECK-NEXT: store i8** [[ARRAYDECAY2032]], i8*** inttoptr (i64 8 to i8***), align 8
|
||||
; CHECK-NEXT: br label [[FOR_COND2106:%.*]]
|
||||
; CHECK: for.cond2106:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND2106]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[GID_0:%.*]] = phi i8* [ null, [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_COND2106]] ]
|
||||
; CHECK-NEXT: [[GID_0:%.*]] = phi i8* [ null, [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[FOR_COND2106]] ]
|
||||
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC2117:%.*]], [[FOR_COND2106]] ]
|
||||
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[GID_0]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX2115:%.*]] = getelementptr inbounds [15 x i8*], [15 x i8*]* [[PTRIDS]], i64 0, i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[IDXPROM2114:%.*]] = zext i32 [[I_0]] to i64
|
||||
; CHECK-NEXT: [[ARRAYIDX2115:%.*]] = getelementptr inbounds [15 x i8*], [15 x i8*]* [[PTRIDS]], i64 0, i64 [[IDXPROM2114]]
|
||||
; CHECK-NEXT: store i8* [[GID_0]], i8** [[ARRAYIDX2115]], align 8
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[INC2117]] = add nuw nsw i32 [[I_0]], 1
|
||||
; CHECK-NEXT: br label [[FOR_COND2106]]
|
||||
;
|
||||
entry:
|
||||
|
@ -16,8 +16,9 @@ define i8* @test1() {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[LOOP]] ], [ null, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV]], i64 1
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT1:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i8*
|
||||
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
|
||||
; CHECK: loopexit:
|
||||
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
|
||||
@ -36,7 +37,7 @@ define i8* @test1() {
|
||||
; CHECK: bbB.bb89_crit_edge:
|
||||
; CHECK-NEXT: br label [[BB89]]
|
||||
; CHECK: bb89:
|
||||
; CHECK-NEXT: [[TMP75PHI:%.*]] = phi i8* [ [[SCEVGEP]], [[BBA_BB89_CRIT_EDGE]] ], [ [[SCEVGEP]], [[BBB_BB89_CRIT_EDGE]] ]
|
||||
; CHECK-NEXT: [[TMP75PHI:%.*]] = phi i8* [ [[LSR_IV_NEXT1]], [[BBA_BB89_CRIT_EDGE]] ], [ [[LSR_IV_NEXT1]], [[BBB_BB89_CRIT_EDGE]] ]
|
||||
; CHECK-NEXT: br label [[EXIT:%.*]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret i8* [[TMP75PHI]]
|
||||
@ -80,8 +81,9 @@ define i8* @test2() {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[LOOP]] ], [ null, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV]], i64 1
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT1:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i8*
|
||||
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
|
||||
; CHECK: loopexit:
|
||||
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
|
||||
@ -98,10 +100,10 @@ define i8* @test2() {
|
||||
; CHECK: bbB.exit_crit_edge:
|
||||
; CHECK-NEXT: br label [[EXIT:%.*]]
|
||||
; CHECK: bb89:
|
||||
; CHECK-NEXT: [[TMP75PHI:%.*]] = phi i8* [ [[SCEVGEP]], [[BBA]] ], [ [[SCEVGEP]], [[BBA]] ], [ [[SCEVGEP]], [[BBA]] ]
|
||||
; CHECK-NEXT: [[TMP75PHI:%.*]] = phi i8* [ [[LSR_IV_NEXT1]], [[BBA]] ], [ [[LSR_IV_NEXT1]], [[BBA]] ], [ [[LSR_IV_NEXT1]], [[BBA]] ]
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = phi i8* [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP]], [[BBB_EXIT_CRIT_EDGE]] ]
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = phi i8* [ [[TMP75PHI]], [[BB89]] ], [ [[LSR_IV_NEXT1]], [[BBB_EXIT_CRIT_EDGE]] ]
|
||||
; CHECK-NEXT: ret i8* [[RESULT]]
|
||||
;
|
||||
entry:
|
||||
|
@ -1,177 +1,131 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -march=amdgcn -mcpu=bonaire -loop-reduce -S < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -print-lsr-output < %s 2>&1 | FileCheck %s
|
||||
|
||||
; Test various conditions where OptimizeLoopTermCond doesn't look at a
|
||||
; memory instruction use and fails to find the address space.
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
||||
define amdgpu_kernel void @local_cmp_user(i32 %arg0) nounwind {
|
||||
; CHECK-LABEL: @local_cmp_user(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG0:%.*]], 1
|
||||
; CHECK-NEXT: br label [[BB11:%.*]]
|
||||
; CHECK: bb11:
|
||||
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i32 [ [[LSR_IV_NEXT3:%.*]], [[BB:%.*]] ], [ -2, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT3]] = add i32 [[LSR_IV2]], 2
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[C0]], label [[BB13:%.*]], label [[BB]]
|
||||
; CHECK: bb:
|
||||
; CHECK-NEXT: [[T:%.*]] = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef, align 4
|
||||
; CHECK-NEXT: [[T1:%.*]] = ptrtoint i8 addrspace(3)* [[T]] to i32
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[T1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to i8 addrspace(3)*
|
||||
; CHECK-NEXT: [[TMP:%.*]] = inttoptr i32 [[LSR_IV_NEXT3]] to i8 addrspace(3)*
|
||||
; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 addrspace(3)* [[TMP2]], [[TMP]]
|
||||
; CHECK-NEXT: br i1 [[C1]], label [[BB11]], label [[BB13]]
|
||||
; CHECK: bb13:
|
||||
; CHECK-NEXT: unreachable
|
||||
;
|
||||
; CHECK: bb11:
|
||||
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 2, %entry ]
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
|
||||
; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
|
||||
; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, -2
|
||||
; CHECK: br i1
|
||||
|
||||
; CHECK: bb:
|
||||
; CHECK: inttoptr i32 %lsr.iv.next2 to i8 addrspace(3)*
|
||||
; CHECK: %c1 = icmp ne i8 addrspace(3)*
|
||||
define amdgpu_kernel void @local_cmp_user(i32 %arg0) nounwind {
|
||||
entry:
|
||||
br label %bb11
|
||||
|
||||
bb11: ; preds = %bb, %entry
|
||||
bb11:
|
||||
%i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
|
||||
%ii = shl i32 %i, 1
|
||||
%c0 = icmp eq i32 %i, %arg0
|
||||
br i1 %c0, label %bb13, label %bb
|
||||
|
||||
bb: ; preds = %bb11
|
||||
%t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef, align 4
|
||||
bb:
|
||||
%t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef
|
||||
%p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii
|
||||
%c1 = icmp ne i8 addrspace(3)* %p, null
|
||||
%i.next = add i32 %i, 1
|
||||
br i1 %c1, label %bb11, label %bb13
|
||||
|
||||
bb13: ; preds = %bb, %bb11
|
||||
bb13:
|
||||
unreachable
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @global_cmp_user(i64 %arg0) nounwind {
|
||||
; CHECK-LABEL: @global_cmp_user(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG0:%.*]], 1
|
||||
; CHECK-NEXT: br label [[BB11:%.*]]
|
||||
; CHECK: bb11:
|
||||
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[BB:%.*]] ], [ -2, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT3]] = add i64 [[LSR_IV2]], 2
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[C0]], label [[BB13:%.*]], label [[BB]]
|
||||
; CHECK: bb:
|
||||
; CHECK-NEXT: [[T:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
|
||||
; CHECK-NEXT: [[T1:%.*]] = ptrtoint i8 addrspace(1)* [[T]] to i64
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[T1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to i8 addrspace(1)*
|
||||
; CHECK-NEXT: [[TMP:%.*]] = inttoptr i64 [[LSR_IV_NEXT3]] to i8 addrspace(1)*
|
||||
; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 addrspace(1)* [[TMP2]], [[TMP]]
|
||||
; CHECK-NEXT: br i1 [[C1]], label [[BB11]], label [[BB13]]
|
||||
; CHECK: bb13:
|
||||
; CHECK-NEXT: unreachable
|
||||
;
|
||||
; CHECK: %lsr.iv1 = phi i64
|
||||
; CHECK: %lsr.iv = phi i64
|
||||
; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1
|
||||
; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, -2
|
||||
; CHECK: br i1
|
||||
|
||||
; CHECK: bb:
|
||||
; CHECK: inttoptr i64 %lsr.iv.next2 to i8 addrspace(1)*
|
||||
; CHECK: icmp ne i8 addrspace(1)* %t
|
||||
define amdgpu_kernel void @global_cmp_user(i64 %arg0) nounwind {
|
||||
entry:
|
||||
br label %bb11
|
||||
|
||||
bb11: ; preds = %bb, %entry
|
||||
bb11:
|
||||
%i = phi i64 [ 0, %entry ], [ %i.next, %bb ]
|
||||
%ii = shl i64 %i, 1
|
||||
%c0 = icmp eq i64 %i, %arg0
|
||||
br i1 %c0, label %bb13, label %bb
|
||||
|
||||
bb: ; preds = %bb11
|
||||
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
|
||||
bb:
|
||||
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
|
||||
%p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii
|
||||
%c1 = icmp ne i8 addrspace(1)* %p, null
|
||||
%i.next = add i64 %i, 1
|
||||
br i1 %c1, label %bb11, label %bb13
|
||||
|
||||
bb13: ; preds = %bb, %bb11
|
||||
bb13:
|
||||
unreachable
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @global_gep_user(i32 %arg0) nounwind {
|
||||
; CHECK-LABEL: @global_gep_user(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG0:%.*]], 1
|
||||
; CHECK-NEXT: br label [[BB11:%.*]]
|
||||
; CHECK: bb11:
|
||||
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], 2
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[C0]], label [[BB13:%.*]], label [[BB]]
|
||||
; CHECK: bb:
|
||||
; CHECK-NEXT: [[T:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
|
||||
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[LSR_IV1]] to i64
|
||||
; CHECK-NEXT: [[P:%.*]] = getelementptr i8, i8 addrspace(1)* [[T]], i64 [[IDXPROM]]
|
||||
; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 addrspace(1)* [[P]], null
|
||||
; CHECK-NEXT: br i1 [[C1]], label [[BB11]], label [[BB13]]
|
||||
; CHECK: bb13:
|
||||
; CHECK-NEXT: unreachable
|
||||
;
|
||||
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ]
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
|
||||
; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
|
||||
; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
|
||||
; CHECK: br i1
|
||||
|
||||
; CHECK: bb:
|
||||
; CHECK: %idxprom = sext i32 %lsr.iv1 to i64
|
||||
; CHECK: getelementptr i8, i8 addrspace(1)* %t, i64 %idxprom
|
||||
define amdgpu_kernel void @global_gep_user(i32 %arg0) nounwind {
|
||||
entry:
|
||||
br label %bb11
|
||||
|
||||
bb11: ; preds = %bb, %entry
|
||||
bb11:
|
||||
%i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
|
||||
%ii = shl i32 %i, 1
|
||||
%c0 = icmp eq i32 %i, %arg0
|
||||
br i1 %c0, label %bb13, label %bb
|
||||
|
||||
bb: ; preds = %bb11
|
||||
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
|
||||
%idxprom = sext i32 %ii to i64
|
||||
%p = getelementptr i8, i8 addrspace(1)* %t, i64 %idxprom
|
||||
bb:
|
||||
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
|
||||
%p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii
|
||||
%c1 = icmp ne i8 addrspace(1)* %p, null
|
||||
%i.next = add i32 %i, 1
|
||||
br i1 %c1, label %bb11, label %bb13
|
||||
|
||||
bb13: ; preds = %bb, %bb11
|
||||
bb13:
|
||||
unreachable
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @global_sext_scale_user(i32 %arg0) nounwind {
|
||||
; CHECK-LABEL: @global_sext_scale_user(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG0:%.*]], 1
|
||||
; CHECK-NEXT: br label [[BB11:%.*]]
|
||||
; CHECK: bb11:
|
||||
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[II_EXT:%.*]] = sext i32 [[LSR_IV1]] to i64
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], 2
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[C0]], label [[BB13:%.*]], label [[BB]]
|
||||
; CHECK: bb:
|
||||
; CHECK-NEXT: [[T:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
|
||||
; CHECK-NEXT: [[P:%.*]] = getelementptr i8, i8 addrspace(1)* [[T]], i64 [[II_EXT]]
|
||||
; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 addrspace(1)* [[P]], null
|
||||
; CHECK-NEXT: br i1 [[C1]], label [[BB11]], label [[BB13]]
|
||||
; CHECK: bb13:
|
||||
; CHECK-NEXT: unreachable
|
||||
;
|
||||
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ]
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
|
||||
; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
|
||||
; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
|
||||
; CHECK: br i1
|
||||
|
||||
; CHECK: bb
|
||||
; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
|
||||
define amdgpu_kernel void @global_sext_scale_user(i32 %arg0) nounwind {
|
||||
entry:
|
||||
br label %bb11
|
||||
|
||||
bb11: ; preds = %bb, %entry
|
||||
bb11:
|
||||
%i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
|
||||
%ii = shl i32 %i, 1
|
||||
%ii.ext = sext i32 %ii to i64
|
||||
%c0 = icmp eq i32 %i, %arg0
|
||||
br i1 %c0, label %bb13, label %bb
|
||||
|
||||
bb: ; preds = %bb11
|
||||
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
|
||||
bb:
|
||||
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
|
||||
%p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
|
||||
%c1 = icmp ne i8 addrspace(1)* %p, null
|
||||
%i.next = add i32 %i, 1
|
||||
br i1 %c1, label %bb11, label %bb13
|
||||
|
||||
bb13: ; preds = %bb, %bb11
|
||||
bb13:
|
||||
unreachable
|
||||
}
|
||||
|
@ -16,8 +16,17 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]]
|
||||
; CHECK: if.then:
|
||||
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 undef, i64 4)
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
|
||||
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i32 0
|
||||
@ -25,68 +34,68 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw <4 x i32> [[TMP4]], <i32 24, i32 24, i32 24, i32 24>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw <4 x i32> [[TMP5]], <i32 24, i32 24, i32 24, i32 24>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[P:%.*]], align 1, [[TBAA1:!tbaa !.*]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
||||
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw <4 x i32> [[TMP8]], <i32 24, i32 24, i32 24, i32 24>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw <4 x i32> [[TMP9]], <i32 24, i32 24, i32 24, i32 24>
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[P:%.*]], align 1, [[TBAA1:!tbaa !.*]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]]
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP13]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw <4 x i32> [[TMP10]], <i32 16, i32 16, i32 16, i32 16>
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw <4 x i32> [[TMP11]], <i32 16, i32 16, i32 16, i32 16>
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP12]], [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i32> [[TMP13]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP14]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP15]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP22]], i32* undef, align 4, [[TBAA4:!tbaa !.*]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1
|
||||
; CHECK-NEXT: store i32 [[TMP23]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2
|
||||
; CHECK-NEXT: store i32 [[TMP24]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3
|
||||
; CHECK-NEXT: store i32 [[TMP25]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP21]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP21]], i32 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw <4 x i32> [[TMP14]], <i32 16, i32 16, i32 16, i32 16>
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw <4 x i32> [[TMP15]], <i32 16, i32 16, i32 16, i32 16>
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP16]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP17]], [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i32> [[TMP22]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = or <4 x i32> [[TMP23]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, [[TBAA4:!tbaa !.*]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1
|
||||
; CHECK-NEXT: store i32 [[TMP27]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP21]], i32 2
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2
|
||||
; CHECK-NEXT: store i32 [[TMP28]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP21]], i32 3
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3
|
||||
; CHECK-NEXT: store i32 [[TMP29]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP25]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP30]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP25]], i32 1
|
||||
; CHECK-NEXT: store i32 [[TMP31]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP25]], i32 2
|
||||
; CHECK-NEXT: store i32 [[TMP32]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[TMP25]], i32 3
|
||||
; CHECK-NEXT: store i32 [[TMP33]], i32* undef, align 4, [[TBAA4]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[SW_EPILOG:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ null, [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ]
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ]
|
||||
; CHECK-NEXT: br label [[FOR_BODY68:%.*]]
|
||||
; CHECK: for.body68:
|
||||
; CHECK-NEXT: [[P_359:%.*]] = phi i8* [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||
; CHECK-NEXT: [[CONV70:%.*]] = zext i8 [[X]] to i32
|
||||
; CHECK-NEXT: [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]]
|
||||
; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP31]] to i32
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]]
|
||||
; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP35]] to i32
|
||||
; CHECK-NEXT: [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16
|
||||
; CHECK-NEXT: [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
|
||||
; CHECK-NEXT: [[SHL78:%.*]] = shl nuw nsw i32 undef, 8
|
||||
; CHECK-NEXT: [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]]
|
||||
; CHECK-NEXT: [[CONV81:%.*]] = zext i8 undef to i32
|
||||
|
@ -12,13 +12,11 @@ define void @a(i8* readnone %b) {
|
||||
; CHECK-NEXT: br i1 [[CMP_NOT4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
||||
; CHECK: for.body.preheader:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[B1]]
|
||||
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[EXITCOUNT_PTRCNT_TO_INT:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[EXITCOUNT_PTRCNT_TO_INT]], 4
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[EXITCOUNT_PTRCNT_TO_INT]], 4
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[EXITCOUNT_PTRCNT_TO_INT]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], -1
|
||||
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]]
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
@ -68,7 +66,7 @@ define void @a(i8* readnone %b) {
|
||||
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 -4
|
||||
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[EXITCOUNT_PTRCNT_TO_INT]], [[N_VEC]]
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
; CHECK: scalar.ph:
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ]
|
||||
|
@ -947,31 +947,24 @@ TEST_F(ScalarEvolutionExpanderTest, ExpandNonIntegralPtrWithNullBase) {
|
||||
Value *V = Exp.expandCodeFor(PtrPlus1, I.getType(), &I);
|
||||
I.replaceAllUsesWith(V);
|
||||
|
||||
// Check that the expander created:
|
||||
// define float addrspace(1)* @test(i64 %off) {
|
||||
// %scevgep = getelementptr float, float addrspace(1)* null, i64 %off
|
||||
// %scevgep1 = bitcast float addrspace(1)* %scevgep to i8 addrspace(1)*
|
||||
// %uglygep = getelementptr i8, i8 addrspace(1)* %scevgep1, i64 1
|
||||
// %uglygep2 = bitcast i8 addrspace(1)* %uglygep to float addrspace(1)*
|
||||
// %ptr = getelementptr inbounds float, float addrspace(1)* null, i64 %off
|
||||
// ret float addrspace(1)* %uglygep2
|
||||
// }
|
||||
|
||||
// Check the expander created bitcast (gep i8* null, %offset).
|
||||
auto *Cast = dyn_cast<BitCastInst>(V);
|
||||
EXPECT_TRUE(Cast);
|
||||
EXPECT_EQ(Cast->getType(), I.getType());
|
||||
auto *GEP = dyn_cast<GetElementPtrInst>(Cast->getOperand(0));
|
||||
EXPECT_TRUE(GEP);
|
||||
EXPECT_TRUE(match(GEP->getOperand(1), m_SpecificInt(1)));
|
||||
auto *Cast1 = dyn_cast<BitCastInst>(GEP->getPointerOperand());
|
||||
EXPECT_TRUE(Cast1);
|
||||
auto *GEP1 = dyn_cast<GetElementPtrInst>(Cast1->getOperand(0));
|
||||
EXPECT_TRUE(GEP1);
|
||||
EXPECT_TRUE(cast<Constant>(GEP1->getPointerOperand())->isNullValue());
|
||||
EXPECT_EQ(GEP1->getOperand(1), &*F.arg_begin());
|
||||
EXPECT_EQ(cast<PointerType>(GEP1->getPointerOperand()->getType())
|
||||
EXPECT_TRUE(cast<Constant>(GEP->getPointerOperand())->isNullValue());
|
||||
EXPECT_EQ(cast<PointerType>(GEP->getPointerOperand()->getType())
|
||||
->getAddressSpace(),
|
||||
cast<PointerType>(I.getType())->getAddressSpace());
|
||||
|
||||
// Check the expander created the expected index computation: add (shl
|
||||
// %offset, 2), 1.
|
||||
Value *Arg;
|
||||
EXPECT_TRUE(
|
||||
match(GEP->getOperand(1),
|
||||
m_Add(m_Shl(m_Value(Arg), m_SpecificInt(2)), m_SpecificInt(1))));
|
||||
EXPECT_EQ(Arg, &*F.arg_begin());
|
||||
EXPECT_FALSE(verifyFunction(F, &errs()));
|
||||
});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user