mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
c3db369888
This code starts from the high end of the sorted vector of offsets, and works backwards: it tries to find contiguous offsets, process them, then pops them from the end of the vector. Most of the code agrees with this order of processing, but one loop doesn't: it instead processes elements from the low end of the vector (which are nodes with unrelated offsets). Fix that loop to process the correct elements. This has a few implications. One, we don't incorrectly return early when processing multiple groups of offsets in the same block (which allows rescheduling prera-ldst-insertpt.mir). Two, we pick the correct insert point for loads, so they're correctly sorted (which affects the scheduling of vldm-liveness.ll). I think it might also impact some of the heuristics slightly. Differential Revision: https://reviews.llvm.org/D30368 llvm-svn: 296701
28 lines
1010 B
LLVM
28 lines
1010 B
LLVM
; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s
|
|
|
|
; Make sure we emit the loads in ascending order, and form a vldmia.
|
|
;
|
|
; See vldm-liveness.mir for the bug this file originally testing.
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: vldmia r0, {s0, s1}
|
|
; CHECK: vldr s3, [r0, #8]
|
|
; CHECK: vldr s2, [r0, #16]
|
|
%off0 = getelementptr float, float* %ptr, i32 0
|
|
%val0 = load float, float* %off0
|
|
%off1 = getelementptr float, float* %ptr, i32 1
|
|
%val1 = load float, float* %off1
|
|
%off4 = getelementptr float, float* %ptr, i32 4
|
|
%val4 = load float, float* %off4
|
|
%off2 = getelementptr float, float* %ptr, i32 2
|
|
%val2 = load float, float* %off2
|
|
|
|
%vec1 = insertelement <4 x float> undef, float %val0, i32 0
|
|
%vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
|
|
%vec3 = insertelement <4 x float> %vec2, float %val4, i32 2
|
|
%vec4 = insertelement <4 x float> %vec3, float %val2, i32 3
|
|
|
|
ret <4 x float> %vec4
|
|
}
|