Temporairly evert "[SCEV] Improve modelling for (null) pointer constants"

This appears to have broken ubsan bot: https://lab.llvm.org/buildbot/#/builders/85/builds/3062 https://reviews.llvm.org/D98147#2623549 It looks like LSR needs some kind of a change around insertion point handling. Reverting until i have a fix. This reverts commit 61f006ac655431bd44b9e089f74c73bec0c1a48c.
2024-11-22 02:33:06 +01:00 · 2021-03-13 09:10:28 +03:00 · 2021-03-13 09:10:28 +03:00 · 7c83c3a8e7
commit 7c83c3a8e7
parent dab4c85276
16 changed files with 190 additions and 233 deletions
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@ -1065,23 +1065,15 @@ const SCEV *ScalarEvolution::getPtrToIntExpr(const SCEV *Op, Type *Ty,
    return getTruncateOrZeroExtend(S, Ty);

  // If not, is this expression something we can't reduce any further?
-  if (auto *U = dyn_cast<SCEVUnknown>(Op)) {
+  if (isa<SCEVUnknown>(Op)) {
+    // Create an explicit cast node.
+    // We can reuse the existing insert position since if we get here,
+    // we won't have made any changes which would invalidate it.
    Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType());
    assert(getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(
               Op->getType())) == getDataLayout().getTypeSizeInBits(IntPtrTy) &&
           "We can only model ptrtoint if SCEV's effective (integer) type is "
           "sufficiently wide to represent all possible pointer values.");
-
-    // Perform some basic constant folding. If the operand of the ptr2int cast
-    // is a null pointer, don't create a ptr2int SCEV expression (that will be
-    // left as-is), but produce a zero constant.
-    // NOTE: We could handle a more general case, but lack motivational cases.
-    if (isa<ConstantPointerNull>(U->getValue()))
-      return getZero(Ty);
-
-    // Create an explicit cast node.
-    // We can reuse the existing insert position since if we get here,
-    // we won't have made any changes which would invalidate it.
    SCEV *S = new (SCEVAllocator)
        SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy);
    UniqueSCEVs.InsertNode(S, IP);
@ -6374,6 +6366,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
      return getUnknown(UndefValue::get(V->getType()));
  } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
    return getConstant(CI);
+  else if (isa<ConstantPointerNull>(V))
+    // FIXME: we shouldn't special-case null pointer constant.
+    return getZero(V->getType());
  else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
    return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee());
  else if (!isa<ConstantExpr>(V))
@ -6713,6 +6708,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
    Value *Ptr = U->getOperand(0);
    const SCEV *Op = getSCEV(Ptr);
    Type *DstIntTy = U->getType();
+    // SCEV doesn't have constant pointer expression type, but it supports
+    // nullptr constant (and only that one), which is modelled in SCEV as a
+    // zero integer constant. So just skip the ptrtoint cast for constants.
+    if (isa<SCEVConstant>(Op))
+      return getTruncateOrZeroExtend(Op, DstIntTy);
    Type *PtrTy = Ptr->getType();
    Type *IntPtrTy = getDataLayout().getIntPtrType(PtrTy);
    // But only if effective SCEV (integer) type is wide enough to represent
--- a/test/Analysis/ScalarEvolution/load.ll
+++ b/test/Analysis/ScalarEvolution/load.ll
@ -82,7 +82,7 @@ define i32 @test2() nounwind uwtable readonly {
 ; CHECK-NEXT:    %next = getelementptr inbounds %struct.ListNode, %struct.ListNode* %n.01, i64 0, i32 0
 ; CHECK-NEXT:    --> %n.01 U: full-set S: full-set Exits: @node1 LoopDispositions: { %for.body: Variant }
 ; CHECK-NEXT:    %1 = load %struct.ListNode*, %struct.ListNode** %next, align 8
-; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: null LoopDispositions: { %for.body: Variant }
+; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @test2
 ; CHECK-NEXT:  Loop %for.body: backedge-taken count is 4
 ; CHECK-NEXT:  Loop %for.body: max backedge-taken count is 4
--- a/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
+++ b/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
@ -531,17 +531,17 @@ define void @crash(i8* %ptr) {
 ; CHECK-LABEL: 'crash'
 ; CHECK-NEXT:  Classifying expressions for: @crash
 ; CHECK-NEXT:    %text.addr.5 = phi i8* [ %incdec.ptr112, %while.cond111 ], [ null, %while.body ]
-; CHECK-NEXT:    --> {null,+,-1}<nw><%while.cond111> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %while.cond111: Computable, %while.body: Variant }
+; CHECK-NEXT:    --> {0,+,-1}<nw><%while.cond111> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %while.cond111: Computable, %while.body: Variant }
 ; CHECK-NEXT:    %incdec.ptr112 = getelementptr inbounds i8, i8* %text.addr.5, i64 -1
-; CHECK-NEXT:    --> {(-1 + null)<nuw><nsw>,+,-1}<nw><%while.cond111> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %while.cond111: Computable, %while.body: Variant }
+; CHECK-NEXT:    --> {-1,+,-1}<nw><%while.cond111> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %while.cond111: Computable, %while.body: Variant }
 ; CHECK-NEXT:    %lastout.2271 = phi i8* [ %incdec.ptr126, %while.body125 ], [ %ptr, %while.end117 ]
-; CHECK-NEXT:    --> {%ptr,+,1}<nuw><%while.body125> U: full-set S: full-set Exits: {(-2 + null)<nuw><nsw>,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
+; CHECK-NEXT:    --> {%ptr,+,1}<nuw><%while.body125> U: full-set S: full-set Exits: {-2,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
 ; CHECK-NEXT:    %incdec.ptr126 = getelementptr inbounds i8, i8* %lastout.2271, i64 1
-; CHECK-NEXT:    --> {(1 + %ptr)<nuw>,+,1}<nuw><%while.body125> U: [1,0) S: [1,0) Exits: {(-1 + null)<nuw><nsw>,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
+; CHECK-NEXT:    --> {(1 + %ptr)<nuw>,+,1}<nuw><%while.body125> U: [1,0) S: [1,0) Exits: {-1,+,-1}<nw><%while.cond111> LoopDispositions: { %while.body125: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @crash
-; CHECK-NEXT:  Loop %while.body125: backedge-taken count is {(-2 + (-1 * %ptr) + null),+,-1}<nw><%while.cond111>
+; CHECK-NEXT:  Loop %while.body125: backedge-taken count is {(-2 + (-1 * %ptr)),+,-1}<nw><%while.cond111>
 ; CHECK-NEXT:  Loop %while.body125: max backedge-taken count is -1
-; CHECK-NEXT:  Loop %while.body125: Predicated backedge-taken count is {(-2 + (-1 * %ptr) + null),+,-1}<nw><%while.cond111>
+; CHECK-NEXT:  Loop %while.body125: Predicated backedge-taken count is {(-2 + (-1 * %ptr)),+,-1}<nw><%while.cond111>
 ; CHECK-NEXT:   Predicates:
 ; CHECK:       Loop %while.body125: Trip multiple is 1
 ; CHECK-NEXT:  Loop %while.cond111: Unpredictable backedge-taken count.
--- a/test/Analysis/ScalarEvolution/scalable-vector.ll
+++ b/test/Analysis/ScalarEvolution/scalable-vector.ll
@ -6,7 +6,7 @@ define void @a(<vscale x 1 x i64> *%p) {
 ; CHECK-LABEL: 'a'
 ; CHECK-NEXT:  Classifying expressions for: @a
 ; CHECK-NEXT:    %1 = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* null, i32 3
-; CHECK-NEXT:    --> ((3 * sizeof(<vscale x 4 x i32>)) + null) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
+; CHECK-NEXT:    --> (3 * sizeof(<vscale x 4 x i32>)) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
 ; CHECK-NEXT:    %2 = getelementptr <vscale x 1 x i64>, <vscale x 1 x i64>* %p, i32 1
 ; CHECK-NEXT:    --> (sizeof(<vscale x 1 x i64>) + %p) U: full-set S: full-set
 ; CHECK-NEXT:  Determining loop execution counts for: @a
--- a/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
+++ b/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
@ -185,7 +185,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
  ; CHECK:   [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
  ; CHECK:   undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc
  ; CHECK:   %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
-  ; CHECK:   %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0, 0 :: (load 8 from %ir.308, addrspace 4)
+  ; CHECK:   %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0, 0 :: (load 8 from %ir.304, addrspace 4)
  ; CHECK:   [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
  ; CHECK:   [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0, 0 :: (load 16 from %ir.223, addrspace 4)
  ; CHECK:   [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0, 0 :: (load 16 from %ir.230, addrspace 4)
@ -202,16 +202,16 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
  ; CHECK:   [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
  ; CHECK:   undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc
  ; CHECK:   %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
-  ; CHECK:   %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0, 0 :: (load 8 from %ir.320, addrspace 4)
+  ; CHECK:   %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0, 0 :: (load 8 from %ir.316, addrspace 4)
  ; CHECK:   %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc
  ; CHECK:   [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
-  ; CHECK:   [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0, 0 :: (load 16 from %ir.282, addrspace 4)
+  ; CHECK:   [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0, 0 :: (load 16 from %ir.278, addrspace 4)
  ; CHECK:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0, 0 :: (load 4 from `i32 addrspace(4)* undef`, addrspace 4)
  ; CHECK:   KILL %411.sub0, %411.sub1
  ; CHECK:   KILL undef %488:sreg_64
  ; CHECK:   KILL %71.sub0_sub1
  ; CHECK:   [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 3, implicit-def dead $scc
-  ; CHECK:   [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0, 0 :: (load 16 from %ir.291, addrspace 4)
+  ; CHECK:   [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0, 0 :: (load 16 from %ir.287, addrspace 4)
  ; CHECK:   [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
  ; CHECK:   [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
  ; CHECK:   undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc
@ -234,13 +234,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
  ; CHECK:   [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc
  ; CHECK:   undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc
  ; CHECK:   %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
-  ; CHECK:   [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0, 0 :: (load 16 from %ir.351, addrspace 4)
+  ; CHECK:   [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0, 0 :: (load 16 from %ir.347, addrspace 4)
  ; CHECK:   undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc
  ; CHECK:   %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
-  ; CHECK:   [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0, 0 :: (load 16 from %ir.357, addrspace 4)
+  ; CHECK:   [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0, 0 :: (load 16 from %ir.353, addrspace 4)
  ; CHECK:   undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc
  ; CHECK:   %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
-  ; CHECK:   [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0, 0 :: (load 16 from %ir.363, addrspace 4)
+  ; CHECK:   [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0, 0 :: (load 16 from %ir.359, addrspace 4)
  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
  ; CHECK:   [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
--- a/test/CodeGen/PowerPC/pr43527.ll
+++ b/test/CodeGen/PowerPC/pr43527.ll
@ -19,14 +19,15 @@ define dso_local void @test(i64 %arg, i64 %arg1) {
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -64(r1)
 ; CHECK-NEXT:    sub r30, r4, r3
-; CHECK-NEXT:    li r29, -4
+; CHECK-NEXT:    li r29, 0
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_3: # %bb5
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lfsu f1, 4(r29)
+; CHECK-NEXT:    lfsx f1, 0, r29
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r30, r30, -1
+; CHECK-NEXT:    addi r29, r29, 4
 ; CHECK-NEXT:    cmpldi r30, 0
 ; CHECK-NEXT:    bne cr0, .LBB0_3
 ; CHECK-NEXT:  # %bb.4: # %bb15
--- a/test/CodeGen/PowerPC/pr48519.ll
+++ b/test/CodeGen/PowerPC/pr48519.ll
@ -265,17 +265,16 @@ define void @func_48785(half %arg) #0 {
 ; CHECK-NEXT:    stdu r1, -64(r1)
 ; CHECK-NEXT:    fmr f31, f1
 ; CHECK-NEXT:    li r30, 0
-; CHECK-NEXT:    li r29, 0
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_1: # %bb1
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    fmr f1, f31
+; CHECK-NEXT:    sldi r29, r30, 1
 ; CHECK-NEXT:    bl __gnu_f2h_ieee
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi r29, r29, -12
-; CHECK-NEXT:    sth r3, 0(r30)
-; CHECK-NEXT:    addi r30, r30, 24
-; CHECK-NEXT:    cmpldi r29, 0
+; CHECK-NEXT:    addi r30, r30, 12
+; CHECK-NEXT:    sth r3, 0(r29)
+; CHECK-NEXT:    cmpldi r30, 0
 ; CHECK-NEXT:    bne+ cr0, .LBB3_1
 ; CHECK-NEXT:  # %bb.2: # %bb5
 ;
--- a/test/CodeGen/PowerPC/sms-phi.ll
+++ b/test/CodeGen/PowerPC/sms-phi.ll
@ -4,11 +4,11 @@
 ; RUN:       >/dev/null | FileCheck %s
 define dso_local void @sha512() #0 {
 ;CHECK: prolog:
-;CHECK:        %18:g8rc = ADD8 %24:g8rc, %23:g8rc
+;CHECK:        %16:g8rc = ADD8 %21:g8rc, %20:g8rc
 ;CHECK: epilog:
-;CHECK:        %28:g8rc_and_g8rc_nox0 = PHI %6:g8rc_and_g8rc_nox0, %bb.3, %22:g8rc_and_g8rc_nox0, %bb.4
-;CHECK-NEXT:   %29:g8rc = PHI %12:g8rc, %bb.3, %16:g8rc, %bb.4
-;CHECK-NEXT:   %30:g8rc = PHI %15:g8rc, %bb.3, %19:g8rc, %bb.4
+;CHECK:        %23:g8rc_and_g8rc_nox0 = PHI %5:g8rc_and_g8rc_nox0, %bb.3, %18:g8rc_and_g8rc_nox0, %bb.4
+;CHECK-NEXT:   %24:g8rc = PHI %6:g8rc, %bb.3, %16:g8rc, %bb.4
+;CHECK-NEXT:   %25:g8rc = PHI %6:g8rc, %bb.3, %19:g8rc, %bb.4
  br label %1

 1:                                                ; preds = %1, %0
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@ -192,9 +192,9 @@
 ; SCEV:   %t = bitcast i1* getelementptr (i1, i1* inttoptr (i32 1 to i1*), i32 -2) to i1*
 ; SCEV:   -->  (-2 + inttoptr (i32 1 to i1*))
 ; SCEV: Classifying expressions for: @hoo8
-; SCEV:   -->  (-1 + null)<nuw><nsw> U: [-1,0) S: [-1,0)
+; SCEV:   -->  -1
 ; SCEV: Classifying expressions for: @hoo1
-; SCEV:   -->  (-1 + null)<nuw><nsw> U: [-1,0) S: [-1,0)
+; SCEV:   -->  -1

 define i8* @goo8() nounwind {
  %t = bitcast i8* getelementptr (i8, i8* inttoptr (i32 1 to i8*), i32 -1) to i8*
@ -408,13 +408,13 @@ define i64 @fi() nounwind {
 ; TO: }
 ; SCEV: Classifying expressions for: @fM
 ; SCEV:   %t = bitcast i64* getelementptr (i64, i64* null, i32 1) to i64*
-; SCEV:    --> (8 + null)<nuw><nsw> U: [8,9) S: [8,9)
+; SCEV:    --> 8
 ; SCEV: Classifying expressions for: @fN
 ; SCEV:   %t = bitcast i64* getelementptr ({ i64, i64 }, { i64, i64 }* null, i32 0, i32 1) to i64*
-; SCEV:   --> (8 + null)<nuw><nsw> U: [8,9) S: [8,9)
+; SCEV:   --> 8
 ; SCEV: Classifying expressions for: @fO
 ; SCEV:   %t = bitcast i64* getelementptr ([2 x i64], [2 x i64]* null, i32 0, i32 1) to i64*
-; SCEV:   --> (8 + null)<nuw><nsw> U: [8,9) S: [8,9)
+; SCEV:   --> 8

 define i64* @fM() nounwind {
  %t = bitcast i64* getelementptr (i64, i64* null, i32 1) to i64*
--- a/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
+++ b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@ -150,13 +150,13 @@ define i8 @testnullptrint(i8* %buf, i8* %end) nounwind {
 ; PTR64-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]]
 ; PTR64-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
 ; PTR64-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; PTR64-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP4]]
+; PTR64-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i8*
 ; PTR64-NEXT:    br label [[LOOP:%.*]]
 ; PTR64:       loop:
 ; PTR64-NEXT:    [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
 ; PTR64-NEXT:    [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1
 ; PTR64-NEXT:    [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1
-; PTR64-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]]
+; PTR64-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[TMP5]]
 ; PTR64-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; PTR64:       exit.loopexit:
 ; PTR64-NEXT:    [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
@ -171,16 +171,16 @@ define i8 @testnullptrint(i8* %buf, i8* %end) nounwind {
 ; PTR32-NEXT:    [[BI:%.*]] = ptrtoint i8* [[BUF:%.*]] to i32
 ; PTR32-NEXT:    [[EI:%.*]] = ptrtoint i8* [[END:%.*]] to i32
 ; PTR32-NEXT:    [[CNT:%.*]] = sub i32 [[EI]], [[BI]]
+; PTR32-NEXT:    [[CNT1:%.*]] = inttoptr i32 [[CNT]] to i8*
 ; PTR32-NEXT:    [[GUARD:%.*]] = icmp ult i32 0, [[CNT]]
 ; PTR32-NEXT:    br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
 ; PTR32:       preheader:
-; PTR32-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i32 [[CNT]]
 ; PTR32-NEXT:    br label [[LOOP:%.*]]
 ; PTR32:       loop:
 ; PTR32-NEXT:    [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
 ; PTR32-NEXT:    [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1
 ; PTR32-NEXT:    [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1
-; PTR32-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]]
+; PTR32-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[CNT1]]
 ; PTR32-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; PTR32:       exit.loopexit:
 ; PTR32-NEXT:    [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
--- a/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll
+++ b/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll
@ -11,12 +11,13 @@ define dso_local void @Widen_i32_i8ptr() local_unnamed_addr {
 ; CHECK-NEXT:    store i8** [[ARRAYDECAY2032]], i8*** inttoptr (i64 8 to i8***), align 8
 ; CHECK-NEXT:    br label [[FOR_COND2106:%.*]]
 ; CHECK:       for.cond2106:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND2106]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[GID_0:%.*]] = phi i8* [ null, [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_COND2106]] ]
+; CHECK-NEXT:    [[GID_0:%.*]] = phi i8* [ null, [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[FOR_COND2106]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC2117:%.*]], [[FOR_COND2106]] ]
 ; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[GID_0]], i64 1
-; CHECK-NEXT:    [[ARRAYIDX2115:%.*]] = getelementptr inbounds [15 x i8*], [15 x i8*]* [[PTRIDS]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[IDXPROM2114:%.*]] = zext i32 [[I_0]] to i64
+; CHECK-NEXT:    [[ARRAYIDX2115:%.*]] = getelementptr inbounds [15 x i8*], [15 x i8*]* [[PTRIDS]], i64 0, i64 [[IDXPROM2114]]
 ; CHECK-NEXT:    store i8* [[GID_0]], i8** [[ARRAYIDX2115]], align 8
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[INC2117]] = add nuw nsw i32 [[I_0]], 1
 ; CHECK-NEXT:    br label [[FOR_COND2106]]
 ;
 entry:
--- a/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
+++ b/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
@ -16,8 +16,9 @@ define i8* @test1() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[LOOP]] ], [ null, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV]], i64 1
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1
+; CHECK-NEXT:    [[LSR_IV_NEXT1:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i8*
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
 ; CHECK:       loopexit:
 ; CHECK-NEXT:    br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
@ -36,7 +37,7 @@ define i8* @test1() {
 ; CHECK:       bbB.bb89_crit_edge:
 ; CHECK-NEXT:    br label [[BB89]]
 ; CHECK:       bb89:
-; CHECK-NEXT:    [[TMP75PHI:%.*]] = phi i8* [ [[SCEVGEP]], [[BBA_BB89_CRIT_EDGE]] ], [ [[SCEVGEP]], [[BBB_BB89_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[TMP75PHI:%.*]] = phi i8* [ [[LSR_IV_NEXT1]], [[BBA_BB89_CRIT_EDGE]] ], [ [[LSR_IV_NEXT1]], [[BBB_BB89_CRIT_EDGE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i8* [[TMP75PHI]]
@ -80,8 +81,9 @@ define i8* @test2() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[LOOP]] ], [ null, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV]], i64 1
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1
+; CHECK-NEXT:    [[LSR_IV_NEXT1:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i8*
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
 ; CHECK:       loopexit:
 ; CHECK-NEXT:    br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
@ -98,10 +100,10 @@ define i8* @test2() {
 ; CHECK:       bbB.exit_crit_edge:
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       bb89:
-; CHECK-NEXT:    [[TMP75PHI:%.*]] = phi i8* [ [[SCEVGEP]], [[BBA]] ], [ [[SCEVGEP]], [[BBA]] ], [ [[SCEVGEP]], [[BBA]] ]
+; CHECK-NEXT:    [[TMP75PHI:%.*]] = phi i8* [ [[LSR_IV_NEXT1]], [[BBA]] ], [ [[LSR_IV_NEXT1]], [[BBA]] ], [ [[LSR_IV_NEXT1]], [[BBA]] ]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[RESULT:%.*]] = phi i8* [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP]], [[BBB_EXIT_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[RESULT:%.*]] = phi i8* [ [[TMP75PHI]], [[BB89]] ], [ [[LSR_IV_NEXT1]], [[BBB_EXIT_CRIT_EDGE]] ]
 ; CHECK-NEXT:    ret i8* [[RESULT]]
 ;
 entry:
--- a/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
@ -1,177 +1,131 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -march=amdgcn -mcpu=bonaire -loop-reduce -S < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -print-lsr-output < %s 2>&1 | FileCheck %s

 ; Test various conditions where OptimizeLoopTermCond doesn't look at a
 ; memory instruction use and fails to find the address space.

 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"

-define amdgpu_kernel void @local_cmp_user(i32 %arg0) nounwind {
 ; CHECK-LABEL: @local_cmp_user(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[ARG0:%.*]], 1
-; CHECK-NEXT:    br label [[BB11:%.*]]
-; CHECK:       bb11:
-; CHECK-NEXT:    [[LSR_IV2:%.*]] = phi i32 [ [[LSR_IV_NEXT3:%.*]], [[BB:%.*]] ], [ -2, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
-; CHECK-NEXT:    [[LSR_IV_NEXT3]] = add i32 [[LSR_IV2]], 2
-; CHECK-NEXT:    [[C0:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[C0]], label [[BB13:%.*]], label [[BB]]
-; CHECK:       bb:
-; CHECK-NEXT:    [[T:%.*]] = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef, align 4
-; CHECK-NEXT:    [[T1:%.*]] = ptrtoint i8 addrspace(3)* [[T]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 0, [[T1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to i8 addrspace(3)*
-; CHECK-NEXT:    [[TMP:%.*]] = inttoptr i32 [[LSR_IV_NEXT3]] to i8 addrspace(3)*
-; CHECK-NEXT:    [[C1:%.*]] = icmp ne i8 addrspace(3)* [[TMP2]], [[TMP]]
-; CHECK-NEXT:    br i1 [[C1]], label [[BB11]], label [[BB13]]
-; CHECK:       bb13:
-; CHECK-NEXT:    unreachable
-;
+; CHECK: bb11:
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 2, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, -2
+; CHECK: br i1
+
+; CHECK: bb:
+; CHECK: inttoptr i32 %lsr.iv.next2 to i8 addrspace(3)*
+; CHECK: %c1 = icmp ne i8 addrspace(3)*
+define amdgpu_kernel void @local_cmp_user(i32 %arg0) nounwind {
 entry:
  br label %bb11

-bb11:                                             ; preds = %bb, %entry
+bb11:
  %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
  %ii = shl i32 %i, 1
  %c0 = icmp eq i32 %i, %arg0
  br i1 %c0, label %bb13, label %bb

-bb:                                               ; preds = %bb11
-  %t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef, align 4
+bb:
+  %t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef
  %p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii
  %c1 = icmp ne i8 addrspace(3)* %p, null
  %i.next = add i32 %i, 1
  br i1 %c1, label %bb11, label %bb13

-bb13:                                             ; preds = %bb, %bb11
+bb13:
  unreachable
 }

-define amdgpu_kernel void @global_cmp_user(i64 %arg0) nounwind {
 ; CHECK-LABEL: @global_cmp_user(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[ARG0:%.*]], 1
-; CHECK-NEXT:    br label [[BB11:%.*]]
-; CHECK:       bb11:
-; CHECK-NEXT:    [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[BB:%.*]] ], [ -2, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
-; CHECK-NEXT:    [[LSR_IV_NEXT3]] = add i64 [[LSR_IV2]], 2
-; CHECK-NEXT:    [[C0:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[C0]], label [[BB13:%.*]], label [[BB]]
-; CHECK:       bb:
-; CHECK-NEXT:    [[T:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
-; CHECK-NEXT:    [[T1:%.*]] = ptrtoint i8 addrspace(1)* [[T]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 0, [[T1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to i8 addrspace(1)*
-; CHECK-NEXT:    [[TMP:%.*]] = inttoptr i64 [[LSR_IV_NEXT3]] to i8 addrspace(1)*
-; CHECK-NEXT:    [[C1:%.*]] = icmp ne i8 addrspace(1)* [[TMP2]], [[TMP]]
-; CHECK-NEXT:    br i1 [[C1]], label [[BB11]], label [[BB13]]
-; CHECK:       bb13:
-; CHECK-NEXT:    unreachable
-;
+; CHECK: %lsr.iv1 = phi i64
+; CHECK: %lsr.iv = phi i64
+; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, -2
+; CHECK: br i1
+
+; CHECK: bb:
+; CHECK: inttoptr i64 %lsr.iv.next2 to i8 addrspace(1)*
+; CHECK: icmp ne i8 addrspace(1)* %t
+define amdgpu_kernel void @global_cmp_user(i64 %arg0) nounwind {
 entry:
  br label %bb11

-bb11:                                             ; preds = %bb, %entry
+bb11:
  %i = phi i64 [ 0, %entry ], [ %i.next, %bb ]
  %ii = shl i64 %i, 1
  %c0 = icmp eq i64 %i, %arg0
  br i1 %c0, label %bb13, label %bb

-bb:                                               ; preds = %bb11
-  %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
+bb:
+  %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
  %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii
  %c1 = icmp ne i8 addrspace(1)* %p, null
  %i.next = add i64 %i, 1
  br i1 %c1, label %bb11, label %bb13

-bb13:                                             ; preds = %bb, %bb11
+bb13:
  unreachable
 }

-define amdgpu_kernel void @global_gep_user(i32 %arg0) nounwind {
 ; CHECK-LABEL: @global_gep_user(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[ARG0:%.*]], 1
-; CHECK-NEXT:    br label [[BB11:%.*]]
-; CHECK:       bb11:
-; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
-; CHECK-NEXT:    [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], 2
-; CHECK-NEXT:    [[C0:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[C0]], label [[BB13:%.*]], label [[BB]]
-; CHECK:       bb:
-; CHECK-NEXT:    [[T:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
-; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[LSR_IV1]] to i64
-; CHECK-NEXT:    [[P:%.*]] = getelementptr i8, i8 addrspace(1)* [[T]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[C1:%.*]] = icmp ne i8 addrspace(1)* [[P]], null
-; CHECK-NEXT:    br i1 [[C1]], label [[BB11]], label [[BB13]]
-; CHECK:       bb13:
-; CHECK-NEXT:    unreachable
-;
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+; CHECK: br i1
+
+; CHECK: bb:
+; CHECK: %idxprom = sext i32 %lsr.iv1 to i64
+; CHECK: getelementptr i8, i8 addrspace(1)* %t, i64 %idxprom
+define amdgpu_kernel void @global_gep_user(i32 %arg0) nounwind {
 entry:
  br label %bb11

-bb11:                                             ; preds = %bb, %entry
+bb11:
  %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
  %ii = shl i32 %i, 1
  %c0 = icmp eq i32 %i, %arg0
  br i1 %c0, label %bb13, label %bb

-bb:                                               ; preds = %bb11
-  %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
-  %idxprom = sext i32 %ii to i64
-  %p = getelementptr i8, i8 addrspace(1)* %t, i64 %idxprom
+bb:
+  %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
+  %p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii
  %c1 = icmp ne i8 addrspace(1)* %p, null
  %i.next = add i32 %i, 1
  br i1 %c1, label %bb11, label %bb13

-bb13:                                             ; preds = %bb, %bb11
+bb13:
  unreachable
 }

-define amdgpu_kernel void @global_sext_scale_user(i32 %arg0) nounwind {
 ; CHECK-LABEL: @global_sext_scale_user(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[ARG0:%.*]], 1
-; CHECK-NEXT:    br label [[BB11:%.*]]
-; CHECK:       bb11:
-; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ]
-; CHECK-NEXT:    [[II_EXT:%.*]] = sext i32 [[LSR_IV1]] to i64
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
-; CHECK-NEXT:    [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], 2
-; CHECK-NEXT:    [[C0:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[C0]], label [[BB13:%.*]], label [[BB]]
-; CHECK:       bb:
-; CHECK-NEXT:    [[T:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
-; CHECK-NEXT:    [[P:%.*]] = getelementptr i8, i8 addrspace(1)* [[T]], i64 [[II_EXT]]
-; CHECK-NEXT:    [[C1:%.*]] = icmp ne i8 addrspace(1)* [[P]], null
-; CHECK-NEXT:    br i1 [[C1]], label [[BB11]], label [[BB13]]
-; CHECK:       bb13:
-; CHECK-NEXT:    unreachable
-;
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+; CHECK: br i1
+
+; CHECK: bb
+; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
+define amdgpu_kernel void @global_sext_scale_user(i32 %arg0) nounwind {
 entry:
  br label %bb11

-bb11:                                             ; preds = %bb, %entry
+bb11:
  %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
  %ii = shl i32 %i, 1
  %ii.ext = sext i32 %ii to i64
  %c0 = icmp eq i32 %i, %arg0
  br i1 %c0, label %bb13, label %bb

-bb:                                               ; preds = %bb11
-  %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8
+bb:
+  %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
  %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
  %c1 = icmp ne i8 addrspace(1)* %p, null
  %i.next = add i32 %i, 1
  br i1 %c1, label %bb11, label %bb13

-bb13:                                             ; preds = %bb, %bb11
+bb13:
  unreachable
 }
--- a/test/Transforms/LoopVectorize/X86/cost-model-assert.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model-assert.ll
@ -16,8 +16,17 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 undef, i64 4)
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[UMAX]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]]
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i32 0
@ -25,68 +34,68 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
-; CHECK-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
-; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32>
-; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw <4 x i32> [[TMP4]], <i32 24, i32 24, i32 24, i32 24>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw <4 x i32> [[TMP5]], <i32 24, i32 24, i32 24, i32 24>
-; CHECK-NEXT:    [[TMP8:%.*]] = load i8, i8* [[P:%.*]], align 1, [[TBAA1:!tbaa !.*]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
+; CHECK-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32>
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw <4 x i32> [[TMP8]], <i32 24, i32 24, i32 24, i32 24>
+; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw <4 x i32> [[TMP9]], <i32 24, i32 24, i32 24, i32 24>
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[P:%.*]], align 1, [[TBAA1:!tbaa !.*]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP13]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32>
-; CHECK-NEXT:    [[TMP11:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
-; CHECK-NEXT:    [[TMP12:%.*]] = shl nuw nsw <4 x i32> [[TMP10]], <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[TMP13:%.*]] = shl nuw nsw <4 x i32> [[TMP11]], <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP12]], [[TMP6]]
-; CHECK-NEXT:    [[TMP15:%.*]] = or <4 x i32> [[TMP13]], [[TMP7]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
-; CHECK-NEXT:    [[TMP18:%.*]] = or <4 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT:    [[TMP19:%.*]] = or <4 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT:    [[TMP20:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer
-; CHECK-NEXT:    [[TMP21:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0
-; CHECK-NEXT:    store i32 [[TMP22]], i32* undef, align 4, [[TBAA4:!tbaa !.*]]
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1
-; CHECK-NEXT:    store i32 [[TMP23]], i32* undef, align 4, [[TBAA4]]
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2
-; CHECK-NEXT:    store i32 [[TMP24]], i32* undef, align 4, [[TBAA4]]
-; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3
-; CHECK-NEXT:    store i32 [[TMP25]], i32* undef, align 4, [[TBAA4]]
-; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x i32> [[TMP21]], i32 0
-; CHECK-NEXT:    store i32 [[TMP26]], i32* undef, align 4, [[TBAA4]]
-; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x i32> [[TMP21]], i32 1
+; CHECK-NEXT:    [[TMP14:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32>
+; CHECK-NEXT:    [[TMP15:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
+; CHECK-NEXT:    [[TMP16:%.*]] = shl nuw nsw <4 x i32> [[TMP14]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP17:%.*]] = shl nuw nsw <4 x i32> [[TMP15]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP18:%.*]] = or <4 x i32> [[TMP16]], [[TMP10]]
+; CHECK-NEXT:    [[TMP19:%.*]] = or <4 x i32> [[TMP17]], [[TMP11]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
+; CHECK-NEXT:    [[TMP22:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT:    [[TMP24:%.*]] = or <4 x i32> [[TMP22]], zeroinitializer
+; CHECK-NEXT:    [[TMP25:%.*]] = or <4 x i32> [[TMP23]], zeroinitializer
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0
+; CHECK-NEXT:    store i32 [[TMP26]], i32* undef, align 4, [[TBAA4:!tbaa !.*]]
+; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1
 ; CHECK-NEXT:    store i32 [[TMP27]], i32* undef, align 4, [[TBAA4]]
-; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <4 x i32> [[TMP21]], i32 2
+; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2
 ; CHECK-NEXT:    store i32 [[TMP28]], i32* undef, align 4, [[TBAA4]]
-; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i32> [[TMP21]], i32 3
+; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3
 ; CHECK-NEXT:    store i32 [[TMP29]], i32* undef, align 4, [[TBAA4]]
+; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i32> [[TMP25]], i32 0
+; CHECK-NEXT:    store i32 [[TMP30]], i32* undef, align 4, [[TBAA4]]
+; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <4 x i32> [[TMP25]], i32 1
+; CHECK-NEXT:    store i32 [[TMP31]], i32* undef, align 4, [[TBAA4]]
+; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <4 x i32> [[TMP25]], i32 2
+; CHECK-NEXT:    store i32 [[TMP32]], i32* undef, align 4, [[TBAA4]]
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <4 x i32> [[TMP25]], i32 3
+; CHECK-NEXT:    store i32 [[TMP33]], i32* undef, align 4, [[TBAA4]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
-; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
+; CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1, 0
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[SW_EPILOG:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8* [ null, [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY68:%.*]]
 ; CHECK:       for.body68:
 ; CHECK-NEXT:    [[P_359:%.*]] = phi i8* [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[CONV70:%.*]] = zext i8 [[X]] to i32
 ; CHECK-NEXT:    [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24
-; CHECK-NEXT:    [[TMP31:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]]
-; CHECK-NEXT:    [[CONV73:%.*]] = zext i8 [[TMP31]] to i32
+; CHECK-NEXT:    [[TMP35:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]]
+; CHECK-NEXT:    [[CONV73:%.*]] = zext i8 [[TMP35]] to i32
 ; CHECK-NEXT:    [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16
 ; CHECK-NEXT:    [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]]
-; CHECK-NEXT:    [[TMP32:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i8, i8* undef, align 1, [[TBAA1]]
 ; CHECK-NEXT:    [[SHL78:%.*]] = shl nuw nsw i32 undef, 8
 ; CHECK-NEXT:    [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]]
 ; CHECK-NEXT:    [[CONV81:%.*]] = zext i8 undef to i32
--- a/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/test/Transforms/LoopVectorize/pointer-induction.ll
@ -12,13 +12,11 @@ define void @a(i8* readnone %b) {
 ; CHECK-NEXT:    br i1 [[CMP_NOT4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
 ; CHECK:       for.body.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 0, [[B1]]
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP0]]
-; CHECK-NEXT:    [[EXITCOUNT_PTRCNT_TO_INT:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[EXITCOUNT_PTRCNT_TO_INT]], 4
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[EXITCOUNT_PTRCNT_TO_INT]], 4
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[EXITCOUNT_PTRCNT_TO_INT]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[N_VEC]], -1
 ; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@ -68,7 +66,7 @@ define void @a(i8* readnone %b) {
 ; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 -4
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[EXITCOUNT_PTRCNT_TO_INT]], [[N_VEC]]
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ]
--- a/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp
+++ b/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp
@ -947,31 +947,24 @@ TEST_F(ScalarEvolutionExpanderTest, ExpandNonIntegralPtrWithNullBase) {
    Value *V = Exp.expandCodeFor(PtrPlus1, I.getType(), &I);
    I.replaceAllUsesWith(V);

-    // Check that the expander created:
-    // define float addrspace(1)* @test(i64 %off) {
-    //   %scevgep = getelementptr float, float addrspace(1)* null, i64 %off
-    //   %scevgep1 = bitcast float addrspace(1)* %scevgep to i8 addrspace(1)*
-    //   %uglygep = getelementptr i8, i8 addrspace(1)* %scevgep1, i64 1
-    //   %uglygep2 = bitcast i8 addrspace(1)* %uglygep to float addrspace(1)*
-    //   %ptr = getelementptr inbounds float, float addrspace(1)* null, i64 %off
-    //   ret float addrspace(1)* %uglygep2
-    // }
-
+    // Check the expander created bitcast (gep i8* null, %offset).
    auto *Cast = dyn_cast<BitCastInst>(V);
    EXPECT_TRUE(Cast);
    EXPECT_EQ(Cast->getType(), I.getType());
    auto *GEP = dyn_cast<GetElementPtrInst>(Cast->getOperand(0));
    EXPECT_TRUE(GEP);
-    EXPECT_TRUE(match(GEP->getOperand(1), m_SpecificInt(1)));
-    auto *Cast1 = dyn_cast<BitCastInst>(GEP->getPointerOperand());
-    EXPECT_TRUE(Cast1);
-    auto *GEP1 = dyn_cast<GetElementPtrInst>(Cast1->getOperand(0));
-    EXPECT_TRUE(GEP1);
-    EXPECT_TRUE(cast<Constant>(GEP1->getPointerOperand())->isNullValue());
-    EXPECT_EQ(GEP1->getOperand(1), &*F.arg_begin());
-    EXPECT_EQ(cast<PointerType>(GEP1->getPointerOperand()->getType())
+    EXPECT_TRUE(cast<Constant>(GEP->getPointerOperand())->isNullValue());
+    EXPECT_EQ(cast<PointerType>(GEP->getPointerOperand()->getType())
                  ->getAddressSpace(),
              cast<PointerType>(I.getType())->getAddressSpace());
+
+    // Check the expander created the expected index computation: add (shl
+    // %offset, 2), 1.
+    Value *Arg;
+    EXPECT_TRUE(
+        match(GEP->getOperand(1),
+              m_Add(m_Shl(m_Value(Arg), m_SpecificInt(2)), m_SpecificInt(1))));
+    EXPECT_EQ(Arg, &*F.arg_begin());
    EXPECT_FALSE(verifyFunction(F, &errs()));
  });
 }