1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-intrinsics/vldr.ll
David Green c55d376d9d [MVE] Fixup order of gather writeback intrinsic outputs
The MVE_VLDRWU32_qi_pre gather loads, like the other _pre/_post mve
loads returns the writeback as result 0, the value as result 1. The llvm
ir intrinsic seems to have this the other way around though, and so when
lowering from one to the other we need to switch the first two outputs.

I've also fixed up the types of _pre/_post on normal MVE loads. There we
were already getting the values the right way around, just not for the
types. I don't believe this was causing anything to go wrong, but it was
very confusing to read in the debug output.

Differential Revision: https://reviews.llvm.org/D73370
2020-01-27 14:08:06 +00:00

63 lines
2.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(<4 x i32>* %addr) {
; CHECK-LABEL: test_vldrwq_gather_base_wb_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vldrw.u32 q0, [q1, #80]!
; CHECK-NEXT: vstrw.32 q1, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
%1 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80)
%2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
store <4 x i32> %2, <4 x i32>* %addr, align 8
%3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
ret <4 x i32> %3
}
declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32)
define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(<4 x i32>* %addr) {
; CHECK-LABEL: test_vldrwq_gather_base_wb_f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vldrw.u32 q0, [q1, #64]!
; CHECK-NEXT: vstrw.32 q1, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = load <4 x i32>, <4 x i32>* %addr, align 8
%1 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 64)
%2 = extractvalue { <4 x float>, <4 x i32> } %1, 1
store <4 x i32> %2, <4 x i32>* %addr, align 8
%3 = extractvalue { <4 x float>, <4 x i32> } %1, 0
ret <4 x float> %3
}
declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32)
define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(<2 x i64>* %addr, i16 zeroext %p) {
; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrdt.u64 q0, [q1, #656]!
; CHECK-NEXT: vstrw.32 q1, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = load <2 x i64>, <2 x i64>* %addr, align 8
%1 = zext i16 %p to i32
%2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
%3 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 656, <4 x i1> %2)
%4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
store <2 x i64> %4, <2 x i64>* %addr, align 8
%5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
ret <2 x i64> %5
}
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)