1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00
llvm-mirror/test/CodeGen/ARM/loop-align-cortex-m.ll
Tim Northover 9530f416eb ARM: align loops to 4 bytes on Cortex-M3 and Cortex-M4.
The Technical Reference Manuals for these two CPUs state that branching
to an unaligned 32-bit instruction incurs an extra pipeline reload
penalty. That's bad.

This also enables the optimization at -Os since it costs on average one
byte per loop in return for 1 cycle per iteration, which is pretty good
going.

llvm-svn: 342127
2018-09-13 10:28:05 +00:00

50 lines
1.4 KiB
LLVM

; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m3 -o - | FileCheck %s
; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m4 -o - | FileCheck %s
; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m33 -o - | FileCheck %s
define void @test_loop_alignment(i32* %in, i32* %out) optsize {
; CHECK-LABEL: test_loop_alignment:
; CHECK: movs {{r[0-9]+}}, #0
; CHECK: .p2align 2
entry:
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
%in.addr = getelementptr inbounds i32, i32* %in, i32 %i
%lhs = load i32, i32* %in.addr, align 4
%res = mul nsw i32 %lhs, 5
%out.addr = getelementptr inbounds i32, i32* %out, i32 %i
store i32 %res, i32* %out.addr, align 4
%i.next = add i32 %i, 1
%done = icmp eq i32 %i.next, 1024
br i1 %done, label %end, label %loop
end:
ret void
}
define void @test_loop_alignment_minsize(i32* %in, i32* %out) minsize {
; CHECK-LABEL: test_loop_alignment_minsize:
; CHECK: movs {{r[0-9]+}}, #0
; CHECK-NOT: .p2align
entry:
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
%in.addr = getelementptr inbounds i32, i32* %in, i32 %i
%lhs = load i32, i32* %in.addr, align 4
%res = mul nsw i32 %lhs, 5
%out.addr = getelementptr inbounds i32, i32* %out, i32 %i
store i32 %res, i32* %out.addr, align 4
%i.next = add i32 %i, 1
%done = icmp eq i32 %i.next, 1024
br i1 %done, label %end, label %loop
end:
ret void
}