mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
c978d9ace7
We were using isShiftedInt<7, Shift>(RHSC) to detect the ranges of offsets to fold into MVE loads/stores. The instructions actually take a 7 bit unsigned integer which is either added or subtracted. So something more like isShiftedUInt<7, Shift>(abs(RHSC)). Instead I've changes this to use the isScaledConstantInRange method, same as in SelectT2AddrModeImm7Offset used by pre/post inc, which seemed to already be getting this correct. Differential revision: https://reviews.llvm.org/D66997 llvm-svn: 370731
289 lines
10 KiB
LLVM
289 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
|
|
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4(<4 x i32>* %vp) {
|
|
; CHECK-LE-LABEL: load_4xi32_a4:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: load_4xi32_a4:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-BE-NEXT: vshr.u32 q1, q0, #1
|
|
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %vp, align 4
|
|
%1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
|
|
ret <4 x i32> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a2(<4 x i32>* %vp) {
|
|
; CHECK-LE-LABEL: load_4xi32_a2:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: vldrh.u16 q0, [r0]
|
|
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: load_4xi32_a2:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
|
|
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
|
; CHECK-BE-NEXT: vshr.u32 q1, q0, #1
|
|
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %vp, align 2
|
|
%1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
|
|
ret <4 x i32> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a1(<4 x i32>* %vp) {
|
|
; CHECK-LE-LABEL: load_4xi32_a1:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: vldrb.u8 q0, [r0]
|
|
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: load_4xi32_a1:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
|
|
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
|
; CHECK-BE-NEXT: vshr.u32 q1, q0, #1
|
|
; CHECK-BE-NEXT: vrev64.32 q0, q1
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* %vp, align 1
|
|
%1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
|
|
ret <4 x i32> %1
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @store_4xi32_a4(<4 x i32>* %vp, <4 x i32> %val) {
|
|
; CHECK-LE-LABEL: store_4xi32_a4:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
|
; CHECK-LE-NEXT: vstrw.32 q0, [r0]
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: store_4xi32_a4:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
|
; CHECK-BE-NEXT: vshr.u32 q0, q1, #1
|
|
; CHECK-BE-NEXT: vstrw.32 q0, [r0]
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
|
|
store <4 x i32> %0, <4 x i32>* %vp, align 4
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @store_4xi32_a2(<4 x i32>* %vp, <4 x i32> %val) {
|
|
; CHECK-LE-LABEL: store_4xi32_a2:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
|
; CHECK-LE-NEXT: vstrh.16 q0, [r0]
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: store_4xi32_a2:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
|
; CHECK-BE-NEXT: vshr.u32 q0, q1, #1
|
|
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
|
; CHECK-BE-NEXT: vstrb.8 q0, [r0]
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
|
|
store <4 x i32> %0, <4 x i32>* %vp, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @store_4xi32_a1(<4 x i32>* %vp, <4 x i32> %val) {
|
|
; CHECK-LE-LABEL: store_4xi32_a1:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
|
|
; CHECK-LE-NEXT: vstrb.8 q0, [r0]
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: store_4xi32_a1:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
|
; CHECK-BE-NEXT: vshr.u32 q0, q1, #1
|
|
; CHECK-BE-NEXT: vrev32.8 q0, q0
|
|
; CHECK-BE-NEXT: vstrb.8 q0, [r0]
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
|
|
store <4 x i32> %0, <4 x i32>* %vp, align 1
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_pos(i32* %ip) {
|
|
; CHECK-LE-LABEL: load_4xi32_a4_offset_pos:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #508]
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: load_4xi32_a4_offset_pos:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: add.w r0, r0, #508
|
|
; CHECK-BE-NEXT: vldrb.u8 q1, [r0]
|
|
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%ipoffset = getelementptr inbounds i32, i32* %ip, i32 127
|
|
%vp = bitcast i32* %ipoffset to <4 x i32>*
|
|
%0 = load <4 x i32>, <4 x i32>* %vp, align 4
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_neg(i32* %ip) {
|
|
; CHECK-LE-LABEL: load_4xi32_a4_offset_neg:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #-508]
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: load_4xi32_a4_offset_neg:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: sub.w r0, r0, #508
|
|
; CHECK-BE-NEXT: vldrb.u8 q1, [r0]
|
|
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%ipoffset = getelementptr inbounds i32, i32* %ip, i32 -127
|
|
%vp = bitcast i32* %ipoffset to <4 x i32>*
|
|
%0 = load <4 x i32>, <4 x i32>* %vp, align 4
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @loadstore_4xi32_stack_off16() {
|
|
; CHECK-LE-LABEL: loadstore_4xi32_stack_off16:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: .pad #40
|
|
; CHECK-LE-NEXT: sub sp, #40
|
|
; CHECK-LE-NEXT: vmov.i32 q0, #0x1
|
|
; CHECK-LE-NEXT: mov r0, sp
|
|
; CHECK-LE-NEXT: vstrw.32 q0, [r0]
|
|
; CHECK-LE-NEXT: movs r0, #3
|
|
; CHECK-LE-NEXT: vstrw.32 q0, [sp, #16]
|
|
; CHECK-LE-NEXT: str r0, [sp, #16]
|
|
; CHECK-LE-NEXT: vldrw.u32 q0, [sp, #16]
|
|
; CHECK-LE-NEXT: add sp, #40
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: loadstore_4xi32_stack_off16:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: .pad #40
|
|
; CHECK-BE-NEXT: sub sp, #40
|
|
; CHECK-BE-NEXT: vmov.i32 q0, #0x1
|
|
; CHECK-BE-NEXT: mov r0, sp
|
|
; CHECK-BE-NEXT: vstrw.32 q0, [r0]
|
|
; CHECK-BE-NEXT: movs r0, #3
|
|
; CHECK-BE-NEXT: vstrw.32 q0, [sp, #16]
|
|
; CHECK-BE-NEXT: str r0, [sp, #16]
|
|
; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16]
|
|
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
|
; CHECK-BE-NEXT: add sp, #40
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%c = alloca [1 x [5 x [2 x i32]]], align 4
|
|
%0 = bitcast [1 x [5 x [2 x i32]]]* %c to i8*
|
|
%arrayidx5 = getelementptr inbounds [1 x [5 x [2 x i32]]], [1 x [5 x [2 x i32]]]* %c, i32 0, i32 0, i32 0, i32 0
|
|
%1 = bitcast [1 x [5 x [2 x i32]]]* %c to <4 x i32>*
|
|
store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>* %1, align 4
|
|
%arrayidx5.2 = getelementptr inbounds [1 x [5 x [2 x i32]]], [1 x [5 x [2 x i32]]]* %c, i32 0, i32 0, i32 2, i32 0
|
|
%2 = bitcast i32* %arrayidx5.2 to <4 x i32>*
|
|
store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>* %2, align 4
|
|
store i32 3, i32* %arrayidx5.2, align 4
|
|
%3 = load <4 x i32>, <4 x i32>* %2, align 4
|
|
ret <4 x i32> %3
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @loadstore_8xi16_stack_off16() {
|
|
; CHECK-LE-LABEL: loadstore_8xi16_stack_off16:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: .pad #40
|
|
; CHECK-LE-NEXT: sub sp, #40
|
|
; CHECK-LE-NEXT: vmov.i16 q0, #0x1
|
|
; CHECK-LE-NEXT: mov r0, sp
|
|
; CHECK-LE-NEXT: vstrh.16 q0, [r0]
|
|
; CHECK-LE-NEXT: movs r0, #3
|
|
; CHECK-LE-NEXT: vstrh.16 q0, [sp, #16]
|
|
; CHECK-LE-NEXT: strh.w r0, [sp, #16]
|
|
; CHECK-LE-NEXT: vldrh.u16 q0, [sp, #16]
|
|
; CHECK-LE-NEXT: add sp, #40
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: loadstore_8xi16_stack_off16:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: .pad #40
|
|
; CHECK-BE-NEXT: sub sp, #40
|
|
; CHECK-BE-NEXT: vmov.i16 q0, #0x1
|
|
; CHECK-BE-NEXT: mov r0, sp
|
|
; CHECK-BE-NEXT: vstrh.16 q0, [r0]
|
|
; CHECK-BE-NEXT: movs r0, #3
|
|
; CHECK-BE-NEXT: vstrh.16 q0, [sp, #16]
|
|
; CHECK-BE-NEXT: strh.w r0, [sp, #16]
|
|
; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16]
|
|
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
|
; CHECK-BE-NEXT: add sp, #40
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%c = alloca [1 x [10 x [2 x i16]]], align 2
|
|
%0 = bitcast [1 x [10 x [2 x i16]]]* %c to i8*
|
|
%arrayidx5 = getelementptr inbounds [1 x [10 x [2 x i16]]], [1 x [10 x [2 x i16]]]* %c, i32 0, i32 0, i32 0, i32 0
|
|
%1 = bitcast [1 x [10 x [2 x i16]]]* %c to <8 x i16>*
|
|
store <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16>* %1, align 2
|
|
%arrayidx5.2 = getelementptr inbounds [1 x [10 x [2 x i16]]], [1 x [10 x [2 x i16]]]* %c, i32 0, i32 0, i32 4, i32 0
|
|
%2 = bitcast i16* %arrayidx5.2 to <8 x i16>*
|
|
store <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16>* %2, align 2
|
|
store i16 3, i16* %arrayidx5.2, align 2
|
|
%3 = load <8 x i16>, <8 x i16>* %2, align 2
|
|
ret <8 x i16> %3
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @loadstore_16xi8_stack_off16() {
|
|
; CHECK-LE-LABEL: loadstore_16xi8_stack_off16:
|
|
; CHECK-LE: @ %bb.0: @ %entry
|
|
; CHECK-LE-NEXT: .pad #40
|
|
; CHECK-LE-NEXT: sub sp, #40
|
|
; CHECK-LE-NEXT: vmov.i8 q0, #0x1
|
|
; CHECK-LE-NEXT: mov r0, sp
|
|
; CHECK-LE-NEXT: vstrb.8 q0, [r0]
|
|
; CHECK-LE-NEXT: movs r0, #3
|
|
; CHECK-LE-NEXT: vstrb.8 q0, [sp, #16]
|
|
; CHECK-LE-NEXT: strb.w r0, [sp, #16]
|
|
; CHECK-LE-NEXT: vldrb.u8 q0, [sp, #16]
|
|
; CHECK-LE-NEXT: add sp, #40
|
|
; CHECK-LE-NEXT: bx lr
|
|
;
|
|
; CHECK-BE-LABEL: loadstore_16xi8_stack_off16:
|
|
; CHECK-BE: @ %bb.0: @ %entry
|
|
; CHECK-BE-NEXT: .pad #40
|
|
; CHECK-BE-NEXT: sub sp, #40
|
|
; CHECK-BE-NEXT: vmov.i8 q0, #0x1
|
|
; CHECK-BE-NEXT: mov r0, sp
|
|
; CHECK-BE-NEXT: vstrb.8 q0, [r0]
|
|
; CHECK-BE-NEXT: movs r0, #3
|
|
; CHECK-BE-NEXT: vstrb.8 q0, [sp, #16]
|
|
; CHECK-BE-NEXT: strb.w r0, [sp, #16]
|
|
; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16]
|
|
; CHECK-BE-NEXT: vrev64.8 q0, q1
|
|
; CHECK-BE-NEXT: add sp, #40
|
|
; CHECK-BE-NEXT: bx lr
|
|
entry:
|
|
%c = alloca [1 x [20 x [2 x i8]]], align 1
|
|
%0 = bitcast [1 x [20 x [2 x i8]]]* %c to i8*
|
|
%arrayidx5 = getelementptr inbounds [1 x [20 x [2 x i8]]], [1 x [20 x [2 x i8]]]* %c, i32 0, i32 0, i32 0, i32 0
|
|
%1 = bitcast [1 x [20 x [2 x i8]]]* %c to <16 x i8>*
|
|
store <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>* %1, align 1
|
|
%arrayidx5.2 = getelementptr inbounds [1 x [20 x [2 x i8]]], [1 x [20 x [2 x i8]]]* %c, i32 0, i32 0, i32 8, i32 0
|
|
%2 = bitcast i8* %arrayidx5.2 to <16 x i8>*
|
|
store <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>* %2, align 1
|
|
store i8 3, i8* %arrayidx5.2, align 1
|
|
%3 = load <16 x i8>, <16 x i8>* %2, align 1
|
|
ret <16 x i8> %3
|
|
}
|