1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-memtp-branch.ll
David Green 35e013cb3d [ARM] Allow findLoopPreheader to return headers with multiple loop successors
The findLoopPreheader function will currently not find a preheader if it
branches to multiple different loop headers. This patch adds an option
to relax that, allowing ARMLowOverheadLoops to process more loops
successfully. This helps with WhileLoopStart setup instructions that can
branch/fallthrough to the low overhead loop and to branch to a separate
loop from the same preheader (but I don't believe it is possible for
both loops to be low overhead loops).

Differential Revision: https://reviews.llvm.org/D102747
2021-05-24 12:22:15 +01:00

368 lines
16 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -arm-memtransfer-tploop=force-enabled %s -o - | FileCheck %s
; In this test, the successors of various blocks were becoming invalid after
; ifcvt as the blocks did not properly fall through to the successor after a
; WhileLoopStart
@arr_183 = external dso_local local_unnamed_addr global [20 x [23 x [19 x i8]]], align 1
define i32 @a(i8 zeroext %b, [3 x i8]* nocapture readonly %c, [3 x i32]* nocapture readonly %d) {
; CHECK-LABEL: a:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: cmp r0, #1
; CHECK-NEXT: bls.w .LBB0_11
; CHECK-NEXT: @ %bb.1: @ %for.body.us.preheader
; CHECK-NEXT: movw r5, :lower16:arr_183
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: movt r5, :upper16:arr_183
; CHECK-NEXT: mov.w r12, #19
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q2, #0x0
; CHECK-NEXT: vmov.i32 q3, #0x0
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .LBB0_2: @ %land.end.us.3
; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: .LBB0_3: @ %for.body.us
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB0_4 Depth 2
; CHECK-NEXT: @ Child Loop BB0_6 Depth 2
; CHECK-NEXT: @ Child Loop BB0_8 Depth 2
; CHECK-NEXT: @ Child Loop BB0_10 Depth 2
; CHECK-NEXT: ldr.w r0, [r2, r3, lsl #2]
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: ldrbne r0, [r1, r3]
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: mla r3, r3, r12, r5
; CHECK-NEXT: add r3, r0
; CHECK-NEXT: rsb.w r0, r0, #108
; CHECK-NEXT: wlstp.8 lr, r0, .LBB0_5
; CHECK-NEXT: .LBB0_4: @ Parent Loop BB0_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q0, [r3], #16
; CHECK-NEXT: letp lr, .LBB0_4
; CHECK-NEXT: .LBB0_5: @ %land.end.us
; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: ldr r0, [r2, #4]
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: ldrbne r0, [r1, #1]
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: adds r3, r5, r0
; CHECK-NEXT: rsb.w r0, r0, #108
; CHECK-NEXT: adds r3, #19
; CHECK-NEXT: wlstp.8 lr, r0, .LBB0_7
; CHECK-NEXT: .LBB0_6: @ Parent Loop BB0_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q1, [r3], #16
; CHECK-NEXT: letp lr, .LBB0_6
; CHECK-NEXT: .LBB0_7: @ %land.end.us.1
; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: ldr r0, [r2, #4]
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: ldrbne r0, [r1, #1]
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: adds r3, r5, r0
; CHECK-NEXT: rsb.w r0, r0, #108
; CHECK-NEXT: adds r3, #19
; CHECK-NEXT: wlstp.8 lr, r0, .LBB0_9
; CHECK-NEXT: .LBB0_8: @ Parent Loop BB0_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q2, [r3], #16
; CHECK-NEXT: letp lr, .LBB0_8
; CHECK-NEXT: .LBB0_9: @ %land.end.us.2
; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: ldr r0, [r2, #4]
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: ldrbne r0, [r1, #1]
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: adds r3, r5, r0
; CHECK-NEXT: rsb.w r0, r0, #108
; CHECK-NEXT: add.w r4, r0, #15
; CHECK-NEXT: adds r3, #19
; CHECK-NEXT: lsrs r4, r4, #4
; CHECK-NEXT: subs.w lr, r4, #0
; CHECK-NEXT: beq .LBB0_2
; CHECK-NEXT: b .LBB0_10
; CHECK-NEXT: .LBB0_10: @ Parent Loop BB0_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vctp.8 r0
; CHECK-NEXT: subs r0, #16
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrbt.8 q3, [r3], #16
; CHECK-NEXT: subs.w lr, lr, #1
; CHECK-NEXT: bne .LBB0_10
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_11:
; CHECK-NEXT: movw r12, :lower16:arr_183
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: movt r12, :upper16:arr_183
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q2, #0x0
; CHECK-NEXT: vmov.i32 q3, #0x0
; CHECK-NEXT: b .LBB0_13
; CHECK-NEXT: .LBB0_12: @ %for.body.lr.ph.3
; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: ldr r3, [r2, #4]
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: ldrbne r3, [r1, #1]
; CHECK-NEXT: moveq r3, #0
; CHECK-NEXT: add.w r5, r12, r3
; CHECK-NEXT: rsb.w r3, r3, #108
; CHECK-NEXT: add.w r4, r5, #19
; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_13
; CHECK-NEXT: b .LBB0_23
; CHECK-NEXT: .LBB0_13: @ %for.cond
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB0_15 Depth 2
; CHECK-NEXT: @ Child Loop BB0_18 Depth 2
; CHECK-NEXT: @ Child Loop BB0_21 Depth 2
; CHECK-NEXT: @ Child Loop BB0_23 Depth 2
; CHECK-NEXT: cmp r0, #2
; CHECK-NEXT: blo .LBB0_16
; CHECK-NEXT: @ %bb.14: @ %for.body.lr.ph
; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: ldr r3, [r2, #4]
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: ldrbne r3, [r1, #1]
; CHECK-NEXT: moveq r3, #0
; CHECK-NEXT: add.w r5, r12, r3
; CHECK-NEXT: rsb.w r3, r3, #108
; CHECK-NEXT: add.w r4, r5, #19
; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_16
; CHECK-NEXT: .LBB0_15: @ Parent Loop BB0_13 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q0, [r4], #16
; CHECK-NEXT: letp lr, .LBB0_15
; CHECK-NEXT: .LBB0_16: @ %for.cond.backedge
; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: cmp r0, #2
; CHECK-NEXT: blo .LBB0_19
; CHECK-NEXT: @ %bb.17: @ %for.body.lr.ph.1
; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: ldr r3, [r2, #4]
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: ldrbne r3, [r1, #1]
; CHECK-NEXT: moveq r3, #0
; CHECK-NEXT: add.w r5, r12, r3
; CHECK-NEXT: rsb.w r3, r3, #108
; CHECK-NEXT: add.w r4, r5, #19
; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_19
; CHECK-NEXT: .LBB0_18: @ Parent Loop BB0_13 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q1, [r4], #16
; CHECK-NEXT: letp lr, .LBB0_18
; CHECK-NEXT: .LBB0_19: @ %for.cond.backedge.1
; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: cmp r0, #2
; CHECK-NEXT: blo .LBB0_22
; CHECK-NEXT: @ %bb.20: @ %for.body.lr.ph.2
; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: ldr r3, [r2, #4]
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: ldrbne r3, [r1, #1]
; CHECK-NEXT: moveq r3, #0
; CHECK-NEXT: add.w r5, r12, r3
; CHECK-NEXT: rsb.w r3, r3, #108
; CHECK-NEXT: add.w r4, r5, #19
; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_22
; CHECK-NEXT: .LBB0_21: @ Parent Loop BB0_13 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q2, [r4], #16
; CHECK-NEXT: letp lr, .LBB0_21
; CHECK-NEXT: .LBB0_22: @ %for.cond.backedge.2
; CHECK-NEXT: @ in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: cmp r0, #2
; CHECK-NEXT: blo .LBB0_13
; CHECK-NEXT: b .LBB0_12
; CHECK-NEXT: .LBB0_23: @ Parent Loop BB0_13 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q3, [r4], #16
; CHECK-NEXT: letp lr, .LBB0_23
; CHECK-NEXT: b .LBB0_13
entry:
%cmp = icmp ugt i8 %b, 1
br i1 %cmp, label %for.body.us.preheader, label %for.cond.preheader
for.cond.preheader: ; preds = %entry
%cmp43 = icmp ugt i8 %b, 1
%arrayidx6 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
%arrayidx12 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
%cmp43.1 = icmp ugt i8 %b, 1
%arrayidx6.1 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
%arrayidx12.1 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
%cmp43.2 = icmp ugt i8 %b, 1
%arrayidx6.2 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
%arrayidx12.2 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
%cmp43.3 = icmp ugt i8 %b, 1
%arrayidx6.3 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
%arrayidx12.3 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
br label %for.cond
for.body.us.preheader: ; preds = %entry
%arrayidx6.us.1 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
%arrayidx12.us.1 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
%arrayidx6.us.2 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
%arrayidx12.us.2 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
%arrayidx6.us.3 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
%arrayidx12.us.3 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
br label %for.body.us
for.cond: ; preds = %for.cond.backedge.3, %for.cond.preheader
br i1 %cmp43, label %for.body.lr.ph, label %for.cond.backedge
for.body.lr.ph: ; preds = %for.cond
%0 = load i32, i32* %arrayidx6, align 4
%tobool7.not = icmp eq i32 %0, 0
br i1 %tobool7.not, label %land.end, label %land.rhs
for.body.us: ; preds = %land.end.us.3, %for.body.us.preheader
%conv44.us = phi i32 [ 0, %for.body.us.preheader ], [ 1, %land.end.us.3 ]
%arrayidx6.us = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 %conv44.us
%1 = load i32, i32* %arrayidx6.us, align 4
%tobool7.not.us = icmp eq i32 %1, 0
br i1 %tobool7.not.us, label %land.end.us, label %land.rhs.us
land.rhs.us: ; preds = %for.body.us
%arrayidx12.us = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 %conv44.us
%2 = load i8, i8* %arrayidx12.us, align 1
%tobool13.us = zext i8 %2 to i32
br label %land.end.us
land.end.us: ; preds = %land.rhs.us, %for.body.us
%3 = phi i32 [ 0, %for.body.us ], [ %tobool13.us, %land.rhs.us ]
%scevgep45 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 %conv44.us, i32 %3
%4 = sub nuw nsw i32 108, %3
call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45, i8 0, i32 %4, i1 false)
%5 = load i32, i32* %arrayidx6.us.1, align 4
%tobool7.not.us.1 = icmp eq i32 %5, 0
br i1 %tobool7.not.us.1, label %land.end.us.1, label %land.rhs.us.1
land.rhs: ; preds = %for.body.lr.ph
%6 = load i8, i8* %arrayidx12, align 1
%tobool13 = zext i8 %6 to i32
br label %land.end
land.end: ; preds = %land.rhs, %for.body.lr.ph
%7 = phi i32 [ 0, %for.body.lr.ph ], [ %tobool13, %land.rhs ]
%scevgep = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %7
%8 = sub nuw nsw i32 108, %7
call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep, i8 0, i32 %8, i1 false)
br label %for.cond.backedge
for.cond.backedge: ; preds = %land.end, %for.cond
br i1 %cmp43.1, label %for.body.lr.ph.1, label %for.cond.backedge.1
for.body.lr.ph.1: ; preds = %for.cond.backedge
%9 = load i32, i32* %arrayidx6.1, align 4
%tobool7.not.1 = icmp eq i32 %9, 0
br i1 %tobool7.not.1, label %land.end.1, label %land.rhs.1
land.rhs.1: ; preds = %for.body.lr.ph.1
%10 = load i8, i8* %arrayidx12.1, align 1
%tobool13.1 = zext i8 %10 to i32
br label %land.end.1
land.end.1: ; preds = %land.rhs.1, %for.body.lr.ph.1
%11 = phi i32 [ 0, %for.body.lr.ph.1 ], [ %tobool13.1, %land.rhs.1 ]
%scevgep.1 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %11
%12 = sub nuw nsw i32 108, %11
call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.1, i8 0, i32 %12, i1 false)
br label %for.cond.backedge.1
for.cond.backedge.1: ; preds = %land.end.1, %for.cond.backedge
br i1 %cmp43.2, label %for.body.lr.ph.2, label %for.cond.backedge.2
for.body.lr.ph.2: ; preds = %for.cond.backedge.1
%13 = load i32, i32* %arrayidx6.2, align 4
%tobool7.not.2 = icmp eq i32 %13, 0
br i1 %tobool7.not.2, label %land.end.2, label %land.rhs.2
land.rhs.2: ; preds = %for.body.lr.ph.2
%14 = load i8, i8* %arrayidx12.2, align 1
%tobool13.2 = zext i8 %14 to i32
br label %land.end.2
land.end.2: ; preds = %land.rhs.2, %for.body.lr.ph.2
%15 = phi i32 [ 0, %for.body.lr.ph.2 ], [ %tobool13.2, %land.rhs.2 ]
%scevgep.2 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %15
%16 = sub nuw nsw i32 108, %15
call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.2, i8 0, i32 %16, i1 false)
br label %for.cond.backedge.2
for.cond.backedge.2: ; preds = %land.end.2, %for.cond.backedge.1
br i1 %cmp43.3, label %for.body.lr.ph.3, label %for.cond.backedge.3
for.body.lr.ph.3: ; preds = %for.cond.backedge.2
%17 = load i32, i32* %arrayidx6.3, align 4
%tobool7.not.3 = icmp eq i32 %17, 0
br i1 %tobool7.not.3, label %land.end.3, label %land.rhs.3
land.rhs.3: ; preds = %for.body.lr.ph.3
%18 = load i8, i8* %arrayidx12.3, align 1
%tobool13.3 = zext i8 %18 to i32
br label %land.end.3
land.end.3: ; preds = %land.rhs.3, %for.body.lr.ph.3
%19 = phi i32 [ 0, %for.body.lr.ph.3 ], [ %tobool13.3, %land.rhs.3 ]
%scevgep.3 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %19
%20 = sub nuw nsw i32 108, %19
call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.3, i8 0, i32 %20, i1 false)
br label %for.cond.backedge.3
for.cond.backedge.3: ; preds = %land.end.3, %for.cond.backedge.2
br label %for.cond
land.rhs.us.1: ; preds = %land.end.us
%21 = load i8, i8* %arrayidx12.us.1, align 1
%tobool13.us.1 = zext i8 %21 to i32
br label %land.end.us.1
land.end.us.1: ; preds = %land.rhs.us.1, %land.end.us
%22 = phi i32 [ 0, %land.end.us ], [ %tobool13.us.1, %land.rhs.us.1 ]
%scevgep45.1 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %22
%23 = sub nuw nsw i32 108, %22
call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45.1, i8 0, i32 %23, i1 false)
%24 = load i32, i32* %arrayidx6.us.2, align 4
%tobool7.not.us.2 = icmp eq i32 %24, 0
br i1 %tobool7.not.us.2, label %land.end.us.2, label %land.rhs.us.2
land.rhs.us.2: ; preds = %land.end.us.1
%25 = load i8, i8* %arrayidx12.us.2, align 1
%tobool13.us.2 = zext i8 %25 to i32
br label %land.end.us.2
land.end.us.2: ; preds = %land.rhs.us.2, %land.end.us.1
%26 = phi i32 [ 0, %land.end.us.1 ], [ %tobool13.us.2, %land.rhs.us.2 ]
%scevgep45.2 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %26
%27 = sub nuw nsw i32 108, %26
call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45.2, i8 0, i32 %27, i1 false)
%28 = load i32, i32* %arrayidx6.us.3, align 4
%tobool7.not.us.3 = icmp eq i32 %28, 0
br i1 %tobool7.not.us.3, label %land.end.us.3, label %land.rhs.us.3
land.rhs.us.3: ; preds = %land.end.us.2
%29 = load i8, i8* %arrayidx12.us.3, align 1
%tobool13.us.3 = zext i8 %29 to i32
br label %land.end.us.3
land.end.us.3: ; preds = %land.rhs.us.3, %land.end.us.2
%30 = phi i32 [ 0, %land.end.us.2 ], [ %tobool13.us.3, %land.rhs.us.3 ]
%scevgep45.3 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %30
%31 = sub nuw nsw i32 108, %30
call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45.3, i8 0, i32 %31, i1 false)
br label %for.body.us
}
declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)