From 56d0e7bedd5a5fef7220cc7a12e6703124bf4626 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 24 May 2021 11:26:45 +0100 Subject: [PATCH] [ARM] Ensure WLS preheader blocks have branches during memcpy lowering This makes sure that the blocks created for lowering memcpy to loops end up with branches, even if they fall through to the successor. Otherwise IfCvt is getting confused with unanalyzable branches and creating invalid block layouts. The extra branches should be removed as the tail predicated loop is finalized in almost all cases. --- lib/Target/ARM/ARMISelLowering.cpp | 4 + test/CodeGen/Thumb2/mve-memtp-branch.ll | 374 ++++++++++++++++++++++++ test/CodeGen/Thumb2/mve-memtp-loop.ll | 1 + 3 files changed, 379 insertions(+) create mode 100644 test/CodeGen/Thumb2/mve-memtp-branch.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0a37ec38831..38c6c32a160 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -11133,6 +11133,10 @@ static Register genTPEntry(MachineBasicBlock *TpEntry, .addUse(TotalIterationsReg) .addMBB(TpExit); + BuildMI(TpEntry, Dl, TII->get(ARM::t2B)) + .addMBB(TpLoopBody) + .add(predOps(ARMCC::AL)); + return TotalIterationsReg; } diff --git a/test/CodeGen/Thumb2/mve-memtp-branch.ll b/test/CodeGen/Thumb2/mve-memtp-branch.ll new file mode 100644 index 00000000000..d0929ab3b20 --- /dev/null +++ b/test/CodeGen/Thumb2/mve-memtp-branch.ll @@ -0,0 +1,374 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -arm-memtransfer-tploop=force-enabled %s -o - | FileCheck %s + +; In this test, the successors of various blocks were becoming invalid after +; ifcvt as the blocks did not properly fall through to the successor after a +; WhileLoopStart + +@arr_183 = external dso_local local_unnamed_addr global [20 x [23 x [19 x i8]]], align 1 +define i32 @a(i8 zeroext %b, [3 x i8]* nocapture readonly %c, [3 x i32]* nocapture readonly %d) { +; CHECK-LABEL: a: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: cmp r0, #1 +; CHECK-NEXT: bls.w .LBB0_11 +; CHECK-NEXT: @ %bb.1: @ %for.body.us.preheader +; CHECK-NEXT: movw r5, :lower16:arr_183 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: movt r5, :upper16:arr_183 +; CHECK-NEXT: mov.w r12, #19 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: @ %land.end.us.3 +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: .LBB0_3: @ %for.body.us +; CHECK-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-NEXT: @ Child Loop BB0_4 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_6 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_8 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_10 Depth 2 +; CHECK-NEXT: ldr.w r0, [r2, r3, lsl #2] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ite ne +; CHECK-NEXT: ldrbne r0, [r1, r3] +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: mla r3, r3, r12, r5 +; CHECK-NEXT: add r3, r0 +; CHECK-NEXT: rsb.w r0, r0, #108 +; CHECK-NEXT: wlstp.8 lr, r0, .LBB0_5 +; CHECK-NEXT: .LBB0_4: @ Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vstrb.8 q0, [r3], #16 +; CHECK-NEXT: letp lr, .LBB0_4 +; CHECK-NEXT: .LBB0_5: @ %land.end.us +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: ldr r0, [r2, #4] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ite ne +; CHECK-NEXT: ldrbne r0, [r1, #1] +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: adds r3, r5, r0 +; CHECK-NEXT: rsb.w r0, r0, #108 +; CHECK-NEXT: adds r3, #19 +; CHECK-NEXT: wlstp.8 lr, r0, .LBB0_7 +; CHECK-NEXT: .LBB0_6: @ Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vstrb.8 q1, [r3], #16 +; CHECK-NEXT: letp lr, .LBB0_6 +; CHECK-NEXT: .LBB0_7: @ %land.end.us.1 +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: ldr r0, [r2, #4] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ite ne +; CHECK-NEXT: ldrbne r0, [r1, #1] +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: adds r3, r5, r0 +; CHECK-NEXT: rsb.w r0, r0, #108 +; CHECK-NEXT: adds r3, #19 +; CHECK-NEXT: wlstp.8 lr, r0, .LBB0_9 +; CHECK-NEXT: .LBB0_8: @ Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vstrb.8 q2, [r3], #16 +; CHECK-NEXT: letp lr, .LBB0_8 +; CHECK-NEXT: .LBB0_9: @ %land.end.us.2 +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: ldr r0, [r2, #4] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ite ne +; CHECK-NEXT: ldrbne r0, [r1, #1] +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: adds r3, r5, r0 +; CHECK-NEXT: rsb.w r0, r0, #108 +; CHECK-NEXT: add.w r4, r0, #15 +; CHECK-NEXT: adds r3, #19 +; CHECK-NEXT: lsrs r4, r4, #4 +; CHECK-NEXT: subs.w lr, r4, #0 +; CHECK-NEXT: beq .LBB0_2 +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_10: @ Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vctp.8 r0 +; CHECK-NEXT: subs r0, #16 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrbt.8 q3, [r3], #16 +; CHECK-NEXT: subs.w lr, lr, #1 +; CHECK-NEXT: bne .LBB0_10 +; CHECK-NEXT: b .LBB0_2 +; CHECK-NEXT: .LBB0_11: +; CHECK-NEXT: movw r12, :lower16:arr_183 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: movt r12, :upper16:arr_183 +; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: b .LBB0_13 +; CHECK-NEXT: .LBB0_12: @ %for.body.lr.ph.3 +; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: ldr r3, [r2, #4] +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: ite ne +; CHECK-NEXT: ldrbne r3, [r1, #1] +; CHECK-NEXT: moveq r3, #0 +; CHECK-NEXT: add.w r5, r12, r3 +; CHECK-NEXT: rsb.w r3, r3, #108 +; CHECK-NEXT: add.w r4, r5, #19 +; CHECK-NEXT: add.w r5, r3, #15 +; CHECK-NEXT: lsrs r5, r5, #4 +; CHECK-NEXT: subs.w lr, r5, #0 +; CHECK-NEXT: beq .LBB0_13 +; CHECK-NEXT: b .LBB0_23 +; CHECK-NEXT: .LBB0_13: @ %for.cond +; CHECK-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-NEXT: @ Child Loop BB0_15 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_18 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_21 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_23 Depth 2 +; CHECK-NEXT: cmp r0, #2 +; CHECK-NEXT: blo .LBB0_16 +; CHECK-NEXT: @ %bb.14: @ %for.body.lr.ph +; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: ldr r3, [r2, #4] +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: ite ne +; CHECK-NEXT: ldrbne r3, [r1, #1] +; CHECK-NEXT: moveq r3, #0 +; CHECK-NEXT: add.w r5, r12, r3 +; CHECK-NEXT: rsb.w r3, r3, #108 +; CHECK-NEXT: add.w r4, r5, #19 +; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_16 +; CHECK-NEXT: .LBB0_15: @ Parent Loop BB0_13 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vstrb.8 q0, [r4], #16 +; CHECK-NEXT: letp lr, .LBB0_15 +; CHECK-NEXT: .LBB0_16: @ %for.cond.backedge +; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: cmp r0, #2 +; CHECK-NEXT: blo .LBB0_19 +; CHECK-NEXT: @ %bb.17: @ %for.body.lr.ph.1 +; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: ldr r3, [r2, #4] +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: ite ne +; CHECK-NEXT: ldrbne r3, [r1, #1] +; CHECK-NEXT: moveq r3, #0 +; CHECK-NEXT: add.w r5, r12, r3 +; CHECK-NEXT: rsb.w r3, r3, #108 +; CHECK-NEXT: add.w r4, r5, #19 +; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_19 +; CHECK-NEXT: .LBB0_18: @ Parent Loop BB0_13 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vstrb.8 q1, [r4], #16 +; CHECK-NEXT: letp lr, .LBB0_18 +; CHECK-NEXT: .LBB0_19: @ %for.cond.backedge.1 +; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: cmp r0, #2 +; CHECK-NEXT: blo .LBB0_22 +; CHECK-NEXT: @ %bb.20: @ %for.body.lr.ph.2 +; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: ldr r3, [r2, #4] +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: ite ne +; CHECK-NEXT: ldrbne r3, [r1, #1] +; CHECK-NEXT: moveq r3, #0 +; CHECK-NEXT: add.w r5, r12, r3 +; CHECK-NEXT: rsb.w r3, r3, #108 +; CHECK-NEXT: add.w r4, r5, #19 +; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_22 +; CHECK-NEXT: .LBB0_21: @ Parent Loop BB0_13 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vstrb.8 q2, [r4], #16 +; CHECK-NEXT: letp lr, .LBB0_21 +; CHECK-NEXT: .LBB0_22: @ %for.cond.backedge.2 +; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: cmp r0, #2 +; CHECK-NEXT: blo .LBB0_13 +; CHECK-NEXT: b .LBB0_12 +; CHECK-NEXT: .LBB0_23: @ Parent Loop BB0_13 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: vctp.8 r3 +; CHECK-NEXT: subs r3, #16 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrbt.8 q3, [r4], #16 +; CHECK-NEXT: subs.w lr, lr, #1 +; CHECK-NEXT: bne .LBB0_23 +; CHECK-NEXT: b .LBB0_13 +entry: + %cmp = icmp ugt i8 %b, 1 + br i1 %cmp, label %for.body.us.preheader, label %for.cond.preheader + +for.cond.preheader: ; preds = %entry + %cmp43 = icmp ugt i8 %b, 1 + %arrayidx6 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1 + %arrayidx12 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1 + %cmp43.1 = icmp ugt i8 %b, 1 + %arrayidx6.1 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1 + %arrayidx12.1 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1 + %cmp43.2 = icmp ugt i8 %b, 1 + %arrayidx6.2 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1 + %arrayidx12.2 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1 + %cmp43.3 = icmp ugt i8 %b, 1 + %arrayidx6.3 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1 + %arrayidx12.3 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1 + br label %for.cond + +for.body.us.preheader: ; preds = %entry + %arrayidx6.us.1 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1 + %arrayidx12.us.1 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1 + %arrayidx6.us.2 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1 + %arrayidx12.us.2 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1 + %arrayidx6.us.3 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1 + %arrayidx12.us.3 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1 + br label %for.body.us + +for.cond: ; preds = %for.cond.backedge.3, %for.cond.preheader + br i1 %cmp43, label %for.body.lr.ph, label %for.cond.backedge + +for.body.lr.ph: ; preds = %for.cond + %0 = load i32, i32* %arrayidx6, align 4 + %tobool7.not = icmp eq i32 %0, 0 + br i1 %tobool7.not, label %land.end, label %land.rhs + +for.body.us: ; preds = %land.end.us.3, %for.body.us.preheader + %conv44.us = phi i32 [ 0, %for.body.us.preheader ], [ 1, %land.end.us.3 ] + %arrayidx6.us = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 %conv44.us + %1 = load i32, i32* %arrayidx6.us, align 4 + %tobool7.not.us = icmp eq i32 %1, 0 + br i1 %tobool7.not.us, label %land.end.us, label %land.rhs.us + +land.rhs.us: ; preds = %for.body.us + %arrayidx12.us = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 %conv44.us + %2 = load i8, i8* %arrayidx12.us, align 1 + %tobool13.us = zext i8 %2 to i32 + br label %land.end.us + +land.end.us: ; preds = %land.rhs.us, %for.body.us + %3 = phi i32 [ 0, %for.body.us ], [ %tobool13.us, %land.rhs.us ] + %scevgep45 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 %conv44.us, i32 %3 + %4 = sub nuw nsw i32 108, %3 + call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45, i8 0, i32 %4, i1 false) + %5 = load i32, i32* %arrayidx6.us.1, align 4 + %tobool7.not.us.1 = icmp eq i32 %5, 0 + br i1 %tobool7.not.us.1, label %land.end.us.1, label %land.rhs.us.1 + +land.rhs: ; preds = %for.body.lr.ph + %6 = load i8, i8* %arrayidx12, align 1 + %tobool13 = zext i8 %6 to i32 + br label %land.end + +land.end: ; preds = %land.rhs, %for.body.lr.ph + %7 = phi i32 [ 0, %for.body.lr.ph ], [ %tobool13, %land.rhs ] + %scevgep = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %7 + %8 = sub nuw nsw i32 108, %7 + call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep, i8 0, i32 %8, i1 false) + br label %for.cond.backedge + +for.cond.backedge: ; preds = %land.end, %for.cond + br i1 %cmp43.1, label %for.body.lr.ph.1, label %for.cond.backedge.1 + +for.body.lr.ph.1: ; preds = %for.cond.backedge + %9 = load i32, i32* %arrayidx6.1, align 4 + %tobool7.not.1 = icmp eq i32 %9, 0 + br i1 %tobool7.not.1, label %land.end.1, label %land.rhs.1 + +land.rhs.1: ; preds = %for.body.lr.ph.1 + %10 = load i8, i8* %arrayidx12.1, align 1 + %tobool13.1 = zext i8 %10 to i32 + br label %land.end.1 + +land.end.1: ; preds = %land.rhs.1, %for.body.lr.ph.1 + %11 = phi i32 [ 0, %for.body.lr.ph.1 ], [ %tobool13.1, %land.rhs.1 ] + %scevgep.1 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %11 + %12 = sub nuw nsw i32 108, %11 + call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.1, i8 0, i32 %12, i1 false) + br label %for.cond.backedge.1 + +for.cond.backedge.1: ; preds = %land.end.1, %for.cond.backedge + br i1 %cmp43.2, label %for.body.lr.ph.2, label %for.cond.backedge.2 + +for.body.lr.ph.2: ; preds = %for.cond.backedge.1 + %13 = load i32, i32* %arrayidx6.2, align 4 + %tobool7.not.2 = icmp eq i32 %13, 0 + br i1 %tobool7.not.2, label %land.end.2, label %land.rhs.2 + +land.rhs.2: ; preds = %for.body.lr.ph.2 + %14 = load i8, i8* %arrayidx12.2, align 1 + %tobool13.2 = zext i8 %14 to i32 + br label %land.end.2 + +land.end.2: ; preds = %land.rhs.2, %for.body.lr.ph.2 + %15 = phi i32 [ 0, %for.body.lr.ph.2 ], [ %tobool13.2, %land.rhs.2 ] + %scevgep.2 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %15 + %16 = sub nuw nsw i32 108, %15 + call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.2, i8 0, i32 %16, i1 false) + br label %for.cond.backedge.2 + +for.cond.backedge.2: ; preds = %land.end.2, %for.cond.backedge.1 + br i1 %cmp43.3, label %for.body.lr.ph.3, label %for.cond.backedge.3 + +for.body.lr.ph.3: ; preds = %for.cond.backedge.2 + %17 = load i32, i32* %arrayidx6.3, align 4 + %tobool7.not.3 = icmp eq i32 %17, 0 + br i1 %tobool7.not.3, label %land.end.3, label %land.rhs.3 + +land.rhs.3: ; preds = %for.body.lr.ph.3 + %18 = load i8, i8* %arrayidx12.3, align 1 + %tobool13.3 = zext i8 %18 to i32 + br label %land.end.3 + +land.end.3: ; preds = %land.rhs.3, %for.body.lr.ph.3 + %19 = phi i32 [ 0, %for.body.lr.ph.3 ], [ %tobool13.3, %land.rhs.3 ] + %scevgep.3 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %19 + %20 = sub nuw nsw i32 108, %19 + call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.3, i8 0, i32 %20, i1 false) + br label %for.cond.backedge.3 + +for.cond.backedge.3: ; preds = %land.end.3, %for.cond.backedge.2 + br label %for.cond + +land.rhs.us.1: ; preds = %land.end.us + %21 = load i8, i8* %arrayidx12.us.1, align 1 + %tobool13.us.1 = zext i8 %21 to i32 + br label %land.end.us.1 + +land.end.us.1: ; preds = %land.rhs.us.1, %land.end.us + %22 = phi i32 [ 0, %land.end.us ], [ %tobool13.us.1, %land.rhs.us.1 ] + %scevgep45.1 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %22 + %23 = sub nuw nsw i32 108, %22 + call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45.1, i8 0, i32 %23, i1 false) + %24 = load i32, i32* %arrayidx6.us.2, align 4 + %tobool7.not.us.2 = icmp eq i32 %24, 0 + br i1 %tobool7.not.us.2, label %land.end.us.2, label %land.rhs.us.2 + +land.rhs.us.2: ; preds = %land.end.us.1 + %25 = load i8, i8* %arrayidx12.us.2, align 1 + %tobool13.us.2 = zext i8 %25 to i32 + br label %land.end.us.2 + +land.end.us.2: ; preds = %land.rhs.us.2, %land.end.us.1 + %26 = phi i32 [ 0, %land.end.us.1 ], [ %tobool13.us.2, %land.rhs.us.2 ] + %scevgep45.2 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %26 + %27 = sub nuw nsw i32 108, %26 + call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45.2, i8 0, i32 %27, i1 false) + %28 = load i32, i32* %arrayidx6.us.3, align 4 + %tobool7.not.us.3 = icmp eq i32 %28, 0 + br i1 %tobool7.not.us.3, label %land.end.us.3, label %land.rhs.us.3 + +land.rhs.us.3: ; preds = %land.end.us.2 + %29 = load i8, i8* %arrayidx12.us.3, align 1 + %tobool13.us.3 = zext i8 %29 to i32 + br label %land.end.us.3 + +land.end.us.3: ; preds = %land.rhs.us.3, %land.end.us.2 + %30 = phi i32 [ 0, %land.end.us.2 ], [ %tobool13.us.3, %land.rhs.us.3 ] + %scevgep45.3 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %30 + %31 = sub nuw nsw i32 108, %30 + call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45.3, i8 0, i32 %31, i1 false) + br label %for.body.us +} + +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) diff --git a/test/CodeGen/Thumb2/mve-memtp-loop.ll b/test/CodeGen/Thumb2/mve-memtp-loop.ll index 493f2e683de..c4ba54ea680 100644 --- a/test/CodeGen/Thumb2/mve-memtp-loop.ll +++ b/test/CodeGen/Thumb2/mve-memtp-loop.ll @@ -240,6 +240,7 @@ define void @test11(i8* nocapture %x, i8* nocapture %y, i32 %n) { ; CHECK-NEXT: mov r3, r2 ; CHECK-NEXT: subs.w lr, lr, #0 ; CHECK-NEXT: beq .LBB10_3 +; CHECK-NEXT: b .LBB10_2 ; CHECK-NEXT: .LBB10_2: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.8 r3 ; CHECK-NEXT: subs r3, #16