mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Update LSR's logic that identifies a post-increment SCEV value.
One of the checks has been removed as it seem invalid. The LoopStep size is always almost a 32-bit. Differential Revision: https://reviews.llvm.org/D75079
This commit is contained in:
parent
d3dc81a22a
commit
77cbec7e52
@ -3530,9 +3530,6 @@ static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
|
||||
const SCEV *LoopStep = AR->getStepRecurrence(SE);
|
||||
if (!isa<SCEVConstant>(LoopStep))
|
||||
return false;
|
||||
if (LU.AccessTy.getType()->getScalarSizeInBits() !=
|
||||
LoopStep->getType()->getScalarSizeInBits())
|
||||
return false;
|
||||
// Check if a post-indexed load/store can be used.
|
||||
if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
|
||||
TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
|
||||
; CHECK: [[REG0:(r[0-9]+)]] = add(r29
|
||||
; CHECK: [[REG1:(r[0-9]+)]] = add([[REG0]],#4)
|
||||
; CHECK: [[REG1:(r[0-9]+)]] = add([[REG0]],#8)
|
||||
; CHECK-DAG: memd([[REG1]]+#8) =
|
||||
; CHECK-DAG: memd([[REG1]]+#0) =
|
||||
|
||||
|
50
test/CodeGen/Hexagon/lsr-postinc-nested-loop.ll
Normal file
50
test/CodeGen/Hexagon/lsr-postinc-nested-loop.ll
Normal file
@ -0,0 +1,50 @@
|
||||
; RUN: llc -O3 -march=hexagon < %s | FileCheck %s
|
||||
; Test to ensure LSR does not optimize out addrec of the outerloop.
|
||||
; This will help to generate post-increment instructions, otherwise
|
||||
; it end up an as extra reg+reg add inside the loop.
|
||||
; CHECK: loop0(.LBB0_[[LOOP:.]],
|
||||
; CHECK: .LBB0_[[LOOP]]:
|
||||
; CHECK: memuh{{.*}}++
|
||||
; CHECK: endloop
|
||||
|
||||
|
||||
define dso_local signext i16 @foo(i16* nocapture readonly %filt, i16* nocapture readonly %inp, i32 %c1, i32 %c2) local_unnamed_addr {
|
||||
entry:
|
||||
%cmp28 = icmp sgt i32 %c1, 0
|
||||
%cmp221 = icmp sgt i32 %c2, 0
|
||||
%or.cond = and i1 %cmp28, %cmp221
|
||||
br i1 %or.cond, label %for.cond1.preheader.us, label %for.cond.cleanup
|
||||
|
||||
for.cond1.preheader.us: ; preds = %entry, %for.cond1.for.cond.cleanup3_crit_edge.us
|
||||
%filt.addr.032.us = phi i16* [ %scevgep, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %filt, %entry ]
|
||||
%inp.addr.031.us = phi i16* [ %scevgep35, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %inp, %entry ]
|
||||
%l.030.us = phi i32 [ %inc11.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %entry ]
|
||||
%sum0.029.us = phi i16 [ %add8.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %entry ]
|
||||
%scevgep = getelementptr i16, i16* %filt.addr.032.us, i32 %c2
|
||||
br label %for.body4.us
|
||||
|
||||
for.body4.us: ; preds = %for.body4.us, %for.cond1.preheader.us
|
||||
%z.025.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ]
|
||||
%filt.addr.124.us = phi i16* [ %filt.addr.032.us, %for.cond1.preheader.us ], [ %incdec.ptr.us, %for.body4.us ]
|
||||
%inp.addr.123.us = phi i16* [ %inp.addr.031.us, %for.cond1.preheader.us ], [ %incdec.ptr5.us, %for.body4.us ]
|
||||
%sum0.122.us = phi i16 [ %sum0.029.us, %for.cond1.preheader.us ], [ %add8.us, %for.body4.us ]
|
||||
%incdec.ptr.us = getelementptr inbounds i16, i16* %filt.addr.124.us, i32 1
|
||||
%0 = load i16, i16* %filt.addr.124.us, align 2
|
||||
%incdec.ptr5.us = getelementptr inbounds i16, i16* %inp.addr.123.us, i32 1
|
||||
%1 = load i16, i16* %inp.addr.123.us, align 2
|
||||
%add.us = add i16 %0, %sum0.122.us
|
||||
%add8.us = add i16 %add.us, %1
|
||||
%inc.us = add nuw nsw i32 %z.025.us, 1
|
||||
%exitcond = icmp eq i32 %inc.us, %c2
|
||||
br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us
|
||||
|
||||
for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.body4.us
|
||||
%scevgep35 = getelementptr i16, i16* %inp.addr.031.us, i32 %c2
|
||||
%inc11.us = add nuw nsw i32 %l.030.us, 1
|
||||
%exitcond36 = icmp eq i32 %inc11.us, %c1
|
||||
br i1 %exitcond36, label %for.cond.cleanup, label %for.cond1.preheader.us
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %entry
|
||||
%sum0.0.lcssa = phi i16 [ 0, %entry ], [ %add8.us, %for.cond1.for.cond.cleanup3_crit_edge.us ]
|
||||
ret i16 %sum0.0.lcssa
|
||||
}
|
@ -1778,11 +1778,11 @@ for.body: ; preds = %for.body, %for.body
|
||||
define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) {
|
||||
; CHECK-LABEL: half_short_mac:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NEXT: cbz r2, .LBB11_3
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
|
||||
; CHECK-NEXT: subs r3, r2, #1
|
||||
; CHECK-NEXT: and r7, r2, #3
|
||||
; CHECK-NEXT: and r6, r2, #3
|
||||
; CHECK-NEXT: cmp r3, #3
|
||||
; CHECK-NEXT: bhs .LBB11_4
|
||||
; CHECK-NEXT: @ %bb.2:
|
||||
@ -1799,33 +1799,33 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
|
||||
; CHECK-NEXT: vldr s0, .LCPI11_0
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
|
||||
; CHECK-NEXT: movs r3, #0
|
||||
; CHECK-NEXT: adds r3, r1, #4
|
||||
; CHECK-NEXT: adds r2, r0, #4
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: .LBB11_5: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: adds r2, r1, r3
|
||||
; CHECK-NEXT: adds r6, r0, r3
|
||||
; CHECK-NEXT: vldr.16 s2, [r6, #6]
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrsh.w r4, [r3, #2]
|
||||
; CHECK-NEXT: vldr.16 s2, [r2, #2]
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: ldrsh.w r4, [r2, #2]
|
||||
; CHECK-NEXT: ldrsh.w r5, [r2, #4]
|
||||
; CHECK-NEXT: ldrsh.w r2, [r2, #6]
|
||||
; CHECK-NEXT: vmov s8, r4
|
||||
; CHECK-NEXT: vmov s6, r5
|
||||
; CHECK-NEXT: vmov s4, r2
|
||||
; CHECK-NEXT: vmov s4, r4
|
||||
; CHECK-NEXT: vcvt.f16.s32 s4, s4
|
||||
; CHECK-NEXT: ldrsh.w r4, [r3]
|
||||
; CHECK-NEXT: vmul.f16 s2, s2, s4
|
||||
; CHECK-NEXT: vldr.16 s4, [r6, #4]
|
||||
; CHECK-NEXT: vldr.16 s4, [r2]
|
||||
; CHECK-NEXT: vmov s6, r4
|
||||
; CHECK-NEXT: vcvt.f16.s32 s6, s6
|
||||
; CHECK-NEXT: ldrsh r5, [r3, #-2]
|
||||
; CHECK-NEXT: ldrsh r4, [r3, #-4]
|
||||
; CHECK-NEXT: vmul.f16 s4, s4, s6
|
||||
; CHECK-NEXT: vldr.16 s6, [r6, #2]
|
||||
; CHECK-NEXT: vcvt.f16.s32 s8, s8
|
||||
; CHECK-NEXT: ldrsh r2, [r1, r3]
|
||||
; CHECK-NEXT: vmul.f16 s6, s6, s8
|
||||
; CHECK-NEXT: vldr.16 s8, [r6]
|
||||
; CHECK-NEXT: vldr.16 s6, [r2, #-2]
|
||||
; CHECK-NEXT: adds r3, #8
|
||||
; CHECK-NEXT: vmov s10, r2
|
||||
; CHECK-NEXT: vmov s8, r5
|
||||
; CHECK-NEXT: vcvt.f16.s32 s8, s8
|
||||
; CHECK-NEXT: vmov s10, r4
|
||||
; CHECK-NEXT: vmul.f16 s6, s6, s8
|
||||
; CHECK-NEXT: vldr.16 s8, [r2, #-4]
|
||||
; CHECK-NEXT: vcvt.f16.s32 s10, s10
|
||||
; CHECK-NEXT: adds r2, #8
|
||||
; CHECK-NEXT: vmul.f16 s8, s8, s10
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s6, s6
|
||||
@ -1837,11 +1837,11 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s2
|
||||
; CHECK-NEXT: le lr, .LBB11_5
|
||||
; CHECK-NEXT: .LBB11_6: @ %for.cond.cleanup.loopexit.unr-lcssa
|
||||
; CHECK-NEXT: wls lr, r7, .LBB11_9
|
||||
; CHECK-NEXT: wls lr, r6, .LBB11_9
|
||||
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
|
||||
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
|
||||
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
|
||||
; CHECK-NEXT: mov lr, r7
|
||||
; CHECK-NEXT: mov lr, r6
|
||||
; CHECK-NEXT: .LBB11_8: @ %for.body.epil
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrsh r2, [r1], #2
|
||||
@ -1854,7 +1854,7 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s2
|
||||
; CHECK-NEXT: le lr, .LBB11_8
|
||||
; CHECK-NEXT: .LBB11_9: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NEXT: .p2align 2
|
||||
; CHECK-NEXT: @ %bb.10:
|
||||
; CHECK-NEXT: .LCPI11_0:
|
||||
|
@ -372,29 +372,29 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: beq.w .LBB5_11
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
|
||||
; CHECK-NEXT: add.w r4, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r5, r1, r12
|
||||
; CHECK-NEXT: cmp r4, r1
|
||||
; CHECK-NEXT: add.w r6, r0, r12
|
||||
; CHECK-NEXT: cset r7, hi
|
||||
; CHECK-NEXT: cmp r5, r3
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: cmp r4, r0
|
||||
; CHECK-NEXT: add.w r6, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r4, r1, r12
|
||||
; CHECK-NEXT: cmp r6, r1
|
||||
; CHECK-NEXT: add.w r5, r0, r12
|
||||
; CHECK-NEXT: cset lr, hi
|
||||
; CHECK-NEXT: cmp r4, r3
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: cmp r6, r3
|
||||
; CHECK-NEXT: cmp r6, r0
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: mov.w lr, #1
|
||||
; CHECK-NEXT: ands r6, r4
|
||||
; CHECK-NEXT: lsls r6, r6, #31
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: andeq.w r4, r5, r7
|
||||
; CHECK-NEXT: lslseq.w r4, r4, #31
|
||||
; CHECK-NEXT: beq .LBB5_4
|
||||
; CHECK-NEXT: cmp r5, r3
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: ands r5, r6
|
||||
; CHECK-NEXT: movs r6, #1
|
||||
; CHECK-NEXT: lsls r5, r5, #31
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: andeq.w r5, r4, lr
|
||||
; CHECK-NEXT: lslseq.w r5, r5, #31
|
||||
; CHECK-NEXT: beq .LBB5_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: sub.w r4, r12, #1
|
||||
; CHECK-NEXT: and r9, r12, #3
|
||||
; CHECK-NEXT: cmp r4, #3
|
||||
; CHECK-NEXT: bhs .LBB5_6
|
||||
; CHECK-NEXT: sub.w r5, r12, #1
|
||||
; CHECK-NEXT: and r9, r12, #3
|
||||
; CHECK-NEXT: cmp r5, #3
|
||||
; CHECK-NEXT: bhs .LBB5_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: b .LBB5_8
|
||||
@ -409,35 +409,37 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
|
||||
; CHECK-NEXT: letp lr, .LBB5_5
|
||||
; CHECK-NEXT: b .LBB5_11
|
||||
; CHECK-NEXT: .LBB5_6: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r7, r12, #3
|
||||
; CHECK-NEXT: bic r5, r12, #3
|
||||
; CHECK-NEXT: add.w r4, r3, #8
|
||||
; CHECK-NEXT: subs r7, #4
|
||||
; CHECK-NEXT: subs r5, #4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, lr, r7, lsr #2
|
||||
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
|
||||
; CHECK-NEXT: adds r5, r0, #3
|
||||
; CHECK-NEXT: adds r6, r1, #1
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: .LBB5_7: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrb.w r5, [r0, r12]
|
||||
; CHECK-NEXT: add.w r7, r1, r12
|
||||
; CHECK-NEXT: ldrb.w r6, [r1, r12]
|
||||
; CHECK-NEXT: smlabb r5, r6, r5, r2
|
||||
; CHECK-NEXT: str r5, [r4, #-8]
|
||||
; CHECK-NEXT: add.w r5, r0, r12
|
||||
; CHECK-NEXT: ldrb r6, [r7, #1]
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: ldrb.w r8, [r5, #1]
|
||||
; CHECK-NEXT: smlabb r6, r6, r8, r2
|
||||
; CHECK-NEXT: str r6, [r4, #-4]
|
||||
; CHECK-NEXT: ldrb.w r8, [r5, #2]
|
||||
; CHECK-NEXT: ldrb r6, [r7, #2]
|
||||
; CHECK-NEXT: smlabb r6, r6, r8, r2
|
||||
; CHECK-NEXT: str r6, [r4]
|
||||
; CHECK-NEXT: ldrb r5, [r5, #3]
|
||||
; CHECK-NEXT: ldrb r6, [r7, #3]
|
||||
; CHECK-NEXT: smlabb r5, r6, r5, r2
|
||||
; CHECK-NEXT: str r5, [r4, #4]
|
||||
; CHECK-NEXT: adds r4, #16
|
||||
; CHECK-NEXT: le lr, .LBB5_7
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-3]
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-1]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-8]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-2]
|
||||
; CHECK-NEXT: ldrb r7, [r6]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-4]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-1]
|
||||
; CHECK-NEXT: ldrb r7, [r6, #1]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4]
|
||||
; CHECK-NEXT: ldrb.w r8, [r5]
|
||||
; CHECK-NEXT: adds r5, #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #2]
|
||||
; CHECK-NEXT: adds r6, #4
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #4]
|
||||
; CHECK-NEXT: adds r4, #16
|
||||
; CHECK-NEXT: le lr, .LBB5_7
|
||||
; CHECK-NEXT: .LBB5_8: @ %for.cond.cleanup.loopexit.unr-lcssa
|
||||
; CHECK-NEXT: wls lr, r9, .LBB5_11
|
||||
; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader
|
||||
@ -447,10 +449,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
|
||||
; CHECK-NEXT: mov lr, r9
|
||||
; CHECK-NEXT: .LBB5_10: @ %for.body.epil
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrb r7, [r0], #1
|
||||
; CHECK-NEXT: ldrb r6, [r1], #1
|
||||
; CHECK-NEXT: smlabb r7, r6, r7, r2
|
||||
; CHECK-NEXT: str r7, [r3], #4
|
||||
; CHECK-NEXT: ldrb r6, [r0], #1
|
||||
; CHECK-NEXT: ldrb r5, [r1], #1
|
||||
; CHECK-NEXT: smlabb r6, r5, r6, r2
|
||||
; CHECK-NEXT: str r6, [r3], #4
|
||||
; CHECK-NEXT: le lr, .LBB5_10
|
||||
; CHECK-NEXT: .LBB5_11: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
@ -663,28 +665,28 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
|
||||
; CHECK-NEXT: cmp.w r12, #0
|
||||
; CHECK-NEXT: beq.w .LBB7_11
|
||||
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
|
||||
; CHECK-NEXT: add.w r4, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r5, r1, r12
|
||||
; CHECK-NEXT: cmp r4, r1
|
||||
; CHECK-NEXT: add.w r6, r0, r12
|
||||
; CHECK-NEXT: cset r7, hi
|
||||
; CHECK-NEXT: add.w r6, r3, r12, lsl #2
|
||||
; CHECK-NEXT: add.w r4, r1, r12
|
||||
; CHECK-NEXT: cmp r6, r1
|
||||
; CHECK-NEXT: add.w r5, r0, r12
|
||||
; CHECK-NEXT: cset lr, hi
|
||||
; CHECK-NEXT: cmp r4, r3
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: cmp r6, r0
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: cmp r5, r3
|
||||
; CHECK-NEXT: cset r5, hi
|
||||
; CHECK-NEXT: cmp r4, r0
|
||||
; CHECK-NEXT: cset r4, hi
|
||||
; CHECK-NEXT: cmp r6, r3
|
||||
; CHECK-NEXT: cset r6, hi
|
||||
; CHECK-NEXT: mov.w lr, #1
|
||||
; CHECK-NEXT: ands r6, r4
|
||||
; CHECK-NEXT: lsls r6, r6, #31
|
||||
; CHECK-NEXT: ands r5, r6
|
||||
; CHECK-NEXT: movs r6, #1
|
||||
; CHECK-NEXT: lsls r5, r5, #31
|
||||
; CHECK-NEXT: itt eq
|
||||
; CHECK-NEXT: andeq.w r4, r5, r7
|
||||
; CHECK-NEXT: lslseq.w r4, r4, #31
|
||||
; CHECK-NEXT: andeq.w r5, r4, lr
|
||||
; CHECK-NEXT: lslseq.w r5, r5, #31
|
||||
; CHECK-NEXT: beq .LBB7_4
|
||||
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
|
||||
; CHECK-NEXT: sub.w r4, r12, #1
|
||||
; CHECK-NEXT: sub.w r5, r12, #1
|
||||
; CHECK-NEXT: and r9, r12, #3
|
||||
; CHECK-NEXT: cmp r4, #3
|
||||
; CHECK-NEXT: cmp r5, #3
|
||||
; CHECK-NEXT: bhs .LBB7_6
|
||||
; CHECK-NEXT: @ %bb.3:
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
@ -700,33 +702,35 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
|
||||
; CHECK-NEXT: letp lr, .LBB7_5
|
||||
; CHECK-NEXT: b .LBB7_11
|
||||
; CHECK-NEXT: .LBB7_6: @ %for.body.preheader.new
|
||||
; CHECK-NEXT: bic r7, r12, #3
|
||||
; CHECK-NEXT: bic r5, r12, #3
|
||||
; CHECK-NEXT: add.w r4, r3, #8
|
||||
; CHECK-NEXT: subs r7, #4
|
||||
; CHECK-NEXT: subs r5, #4
|
||||
; CHECK-NEXT: mov.w r12, #0
|
||||
; CHECK-NEXT: add.w lr, lr, r7, lsr #2
|
||||
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
|
||||
; CHECK-NEXT: adds r5, r0, #3
|
||||
; CHECK-NEXT: adds r6, r1, #1
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: .LBB7_7: @ %for.body
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrb.w r5, [r0, r12]
|
||||
; CHECK-NEXT: add.w r7, r1, r12
|
||||
; CHECK-NEXT: ldrb.w r6, [r1, r12]
|
||||
; CHECK-NEXT: smlabb r5, r6, r5, r2
|
||||
; CHECK-NEXT: str r5, [r4, #-8]
|
||||
; CHECK-NEXT: add.w r5, r0, r12
|
||||
; CHECK-NEXT: ldrb r6, [r7, #1]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-3]
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: ldrb.w r8, [r5, #1]
|
||||
; CHECK-NEXT: smlabb r6, r6, r8, r2
|
||||
; CHECK-NEXT: str r6, [r4, #-4]
|
||||
; CHECK-NEXT: ldrb.w r8, [r5, #2]
|
||||
; CHECK-NEXT: ldrb r6, [r7, #2]
|
||||
; CHECK-NEXT: smlabb r6, r6, r8, r2
|
||||
; CHECK-NEXT: str r6, [r4]
|
||||
; CHECK-NEXT: ldrb r5, [r5, #3]
|
||||
; CHECK-NEXT: ldrb r6, [r7, #3]
|
||||
; CHECK-NEXT: smlabb r5, r6, r5, r2
|
||||
; CHECK-NEXT: str r5, [r4, #4]
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-1]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-8]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-2]
|
||||
; CHECK-NEXT: ldrb r7, [r6]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-4]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-1]
|
||||
; CHECK-NEXT: ldrb r7, [r6, #1]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4]
|
||||
; CHECK-NEXT: ldrb.w r8, [r5]
|
||||
; CHECK-NEXT: adds r5, #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #2]
|
||||
; CHECK-NEXT: adds r6, #4
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #4]
|
||||
; CHECK-NEXT: adds r4, #16
|
||||
; CHECK-NEXT: le lr, .LBB7_7
|
||||
; CHECK-NEXT: .LBB7_8: @ %for.cond.cleanup.loopexit.unr-lcssa
|
||||
@ -738,10 +742,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
|
||||
; CHECK-NEXT: mov lr, r9
|
||||
; CHECK-NEXT: .LBB7_10: @ %for.body.epil
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrb r7, [r0], #1
|
||||
; CHECK-NEXT: ldrb r6, [r1], #1
|
||||
; CHECK-NEXT: smlabb r7, r6, r7, r2
|
||||
; CHECK-NEXT: str r7, [r3], #4
|
||||
; CHECK-NEXT: ldrb r6, [r0], #1
|
||||
; CHECK-NEXT: ldrb r5, [r1], #1
|
||||
; CHECK-NEXT: smlabb r6, r5, r6, r2
|
||||
; CHECK-NEXT: str r6, [r3], #4
|
||||
; CHECK-NEXT: le lr, .LBB7_10
|
||||
; CHECK-NEXT: .LBB7_11: @ %for.cond.cleanup
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
|
||||
|
Loading…
x
Reference in New Issue
Block a user