1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-ldst-offset.ll
David Green a82cfd92f0 [ARM] Widening loads and narrowing stores
MVE has instructions to widen as it loads, and narrow as it stores. This adds
the required patterns and legalisation to make them work including specifying
that they are legal, patterns to select them and test changes.

Patch by David Sherwood.

Differential Revision: https://reviews.llvm.org/D63839

llvm-svn: 364636
2019-06-28 09:47:55 +00:00

1223 lines
35 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s
define i8* @post_ldrwu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrwu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %x
}
define i8* @post_ldrwu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrwu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %x
}
define i8* @post_ldrwu32_m4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrwu32_m4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 -4
%0 = bitcast i8* %z to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %x
}
define i8* @post_ldrwu32_508(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrwu32_508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #508
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 508
%0 = bitcast i8* %z to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %x
}
define i8* @post_ldrwu32_512(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrwu32_512:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #512
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 512
%0 = bitcast i8* %z to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %x
}
define i8* @post_ldrwu32_m508(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrwu32_m508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: sub.w r2, r0, #508
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 -508
%0 = bitcast i8* %z to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %x
}
define i8* @post_ldrwu32_m512(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrwu32_m512:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: sub.w r2, r0, #512
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 -512
%0 = bitcast i8* %z to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %x
}
define i8* @post_ldrhu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = zext <4 x i16> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrhu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrh.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = zext <4 x i16> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrhu32_2(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #2
; CHECK-NEXT: vldrh.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 2
%0 = bitcast i8* %z to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = zext <4 x i16> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrhu32_254(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #254
; CHECK-NEXT: vldrh.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 254
%0 = bitcast i8* %z to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = zext <4 x i16> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrhu32_256(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #256
; CHECK-NEXT: vldrh.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 256
%0 = bitcast i8* %z to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = zext <4 x i16> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrhs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.s32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = sext <4 x i16> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrhs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = sext <4 x i16> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrhs32_2(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #2
; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 2
%0 = bitcast i8* %z to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = sext <4 x i16> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrhs32_254(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #254
; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 254
%0 = bitcast i8* %z to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = sext <4 x i16> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrhs32_256(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhs32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #256
; CHECK-NEXT: vldrh.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 256
%0 = bitcast i8* %z to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = sext <4 x i16> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrhu16_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 8
%2 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %1, <8 x i16>* %2, align 8
ret i8* %x
}
define i8* @post_ldrhu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 8
%2 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %1, <8 x i16>* %2, align 8
ret i8* %x
}
define i8* @post_ldrhu16_2(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu16_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #2
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 2
%0 = bitcast i8* %z to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 8
%2 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %1, <8 x i16>* %2, align 8
ret i8* %x
}
define i8* @post_ldrhu16_254(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu16_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #254
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 254
%0 = bitcast i8* %z to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 8
%2 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %1, <8 x i16>* %2, align 8
ret i8* %x
}
define i8* @post_ldrhu16_256(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrhu16_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #256
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 256
%0 = bitcast i8* %z to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 8
%2 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %1, <8 x i16>* %2, align 8
ret i8* %x
}
define i8* @post_ldrbu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = zext <4 x i8> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrb.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = zext <4 x i8> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbu32_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #127
; CHECK-NEXT: vldrb.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 127
%0 = bitcast i8* %z to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = zext <4 x i8> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbu32_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #128
; CHECK-NEXT: vldrb.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 128
%0 = bitcast i8* %z to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = zext <4 x i8> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = sext <4 x i8> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrb.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = sext <4 x i8> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbs32_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #127
; CHECK-NEXT: vldrb.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 127
%0 = bitcast i8* %z to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = sext <4 x i8> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbs32_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #128
; CHECK-NEXT: vldrb.s32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 128
%0 = bitcast i8* %z to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = sext <4 x i8> %1 to <4 x i32>
%3 = bitcast i8* %y to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbu16_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = zext <8 x i8> %1 to <8 x i16>
%3 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %2, <8 x i16>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrb.u16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = zext <8 x i8> %1 to <8 x i16>
%3 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %2, <8 x i16>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbu16_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #127
; CHECK-NEXT: vldrb.u16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 127
%0 = bitcast i8* %z to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = zext <8 x i8> %1 to <8 x i16>
%3 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %2, <8 x i16>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbu16_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #128
; CHECK-NEXT: vldrb.u16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 128
%0 = bitcast i8* %z to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = zext <8 x i8> %1 to <8 x i16>
%3 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %2, <8 x i16>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbs16_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s16 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = sext <8 x i8> %1 to <8 x i16>
%3 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %2, <8 x i16>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbs16_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrb.s16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = sext <8 x i8> %1 to <8 x i16>
%3 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %2, <8 x i16>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbs16_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #127
; CHECK-NEXT: vldrb.s16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 127
%0 = bitcast i8* %z to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = sext <8 x i8> %1 to <8 x i16>
%3 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %2, <8 x i16>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbs16_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbs16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #128
; CHECK-NEXT: vldrb.s16 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 128
%0 = bitcast i8* %z to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = sext <8 x i8> %1 to <8 x i16>
%3 = bitcast i8* %y to <8 x i16>*
store <8 x i16> %2, <8 x i16>* %3, align 8
ret i8* %x
}
define i8* @post_ldrbu8_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu8_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <16 x i8>*
%1 = load <16 x i8>, <16 x i8>* %0, align 8
%2 = bitcast i8* %y to <16 x i8>*
store <16 x i8> %1, <16 x i8>* %2, align 8
ret i8* %x
}
define i8* @post_ldrbu8_3(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu8_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r2, r0, #3
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <16 x i8>*
%1 = load <16 x i8>, <16 x i8>* %0, align 8
%2 = bitcast i8* %y to <16 x i8>*
store <16 x i8> %1, <16 x i8>* %2, align 8
ret i8* %x
}
define i8* @post_ldrbu8_127(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu8_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r2, r0, #127
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 127
%0 = bitcast i8* %z to <16 x i8>*
%1 = load <16 x i8>, <16 x i8>* %0, align 8
%2 = bitcast i8* %y to <16 x i8>*
store <16 x i8> %1, <16 x i8>* %2, align 8
ret i8* %x
}
define i8* @post_ldrbu8_128(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrbu8_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0, #128]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 128
%0 = bitcast i8* %z to <16 x i8>*
%1 = load <16 x i8>, <16 x i8>* %0, align 8
%2 = bitcast i8* %y to <16 x i8>*
store <16 x i8> %1, <16 x i8>* %2, align 8
ret i8* %x
}
define i8* @post_ldrwf32_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrwf32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <4 x float>*
%1 = load <4 x float>, <4 x float>* %0, align 8
%2 = bitcast i8* %y to <4 x float>*
store <4 x float> %1, <4 x float>* %2, align 8
ret i8* %x
}
define i8* @post_ldrwf16_4(i8* %x, i8* %y) {
; CHECK-LABEL: post_ldrwf16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 4
%0 = bitcast i8* %z to <8 x half>*
%1 = load <8 x half>, <8 x half>* %0, align 8
%2 = bitcast i8* %y to <8 x half>*
store <8 x half> %1, <8 x half>* %2, align 8
ret i8* %x
}
define i8* @post_strw32_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strw32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %z to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %y
}
define i8* @post_strw32_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strw32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: adds r1, r0, #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %z to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %y
}
define i8* @post_strw32_m4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strw32_m4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0, #-4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -4
%0 = bitcast i8* %x to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %z to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %y
}
define i8* @post_strw32_508(i8* %y, i8* %x) {
; CHECK-LABEL: post_strw32_508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #508
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 508
%0 = bitcast i8* %x to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %z to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %y
}
define i8* @post_strw32_512(i8* %y, i8* %x) {
; CHECK-LABEL: post_strw32_512:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #512
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 512
%0 = bitcast i8* %x to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %z to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %y
}
define i8* @post_strw32_m508(i8* %y, i8* %x) {
; CHECK-LABEL: post_strw32_m508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: sub.w r1, r0, #508
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -508
%0 = bitcast i8* %x to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %z to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %y
}
define i8* @post_strw32_m512(i8* %y, i8* %x) {
; CHECK-LABEL: post_strw32_m512:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: sub.w r1, r0, #512
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -512
%0 = bitcast i8* %x to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 8
%2 = bitcast i8* %z to <4 x i32>*
store <4 x i32> %1, <4 x i32>* %2, align 8
ret i8* %y
}
define i8* @post_strh32_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = bitcast i8* %z to <4 x i16>*
store <4 x i16> %1, <4 x i16>* %2, align 8
ret i8* %y
}
define i8* @post_strh32_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: adds r1, r0, #3
; CHECK-NEXT: vstrh.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = bitcast i8* %z to <4 x i16>*
store <4 x i16> %1, <4 x i16>* %2, align 8
ret i8* %y
}
define i8* @post_strh32_2(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: adds r1, r0, #2
; CHECK-NEXT: vstrh.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
%0 = bitcast i8* %x to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = bitcast i8* %z to <4 x i16>*
store <4 x i16> %1, <4 x i16>* %2, align 8
ret i8* %y
}
define i8* @post_strh32_254(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #254
; CHECK-NEXT: vstrh.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 254
%0 = bitcast i8* %x to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = bitcast i8* %z to <4 x i16>*
store <4 x i16> %1, <4 x i16>* %2, align 8
ret i8* %y
}
define i8* @post_strh32_256(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #256
; CHECK-NEXT: vstrh.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 256
%0 = bitcast i8* %x to <4 x i16>*
%1 = load <4 x i16>, <4 x i16>* %0, align 8
%2 = bitcast i8* %z to <4 x i16>*
store <4 x i16> %1, <4 x i16>* %2, align 8
ret i8* %y
}
define i8* @post_strh16_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 8
%2 = bitcast i8* %z to <8 x i16>*
store <8 x i16> %1, <8 x i16>* %2, align 8
ret i8* %y
}
define i8* @post_strh16_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: adds r1, r0, #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 8
%2 = bitcast i8* %z to <8 x i16>*
store <8 x i16> %1, <8 x i16>* %2, align 8
ret i8* %y
}
define i8* @post_strh16_2(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh16_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: adds r1, r0, #2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
%0 = bitcast i8* %x to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 8
%2 = bitcast i8* %z to <8 x i16>*
store <8 x i16> %1, <8 x i16>* %2, align 8
ret i8* %y
}
define i8* @post_strh16_254(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh16_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #254
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 254
%0 = bitcast i8* %x to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 8
%2 = bitcast i8* %z to <8 x i16>*
store <8 x i16> %1, <8 x i16>* %2, align 8
ret i8* %y
}
define i8* @post_strh16_256(i8* %y, i8* %x) {
; CHECK-LABEL: post_strh16_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #256
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 256
%0 = bitcast i8* %x to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 8
%2 = bitcast i8* %z to <8 x i16>*
store <8 x i16> %1, <8 x i16>* %2, align 8
ret i8* %y
}
define i8* @post_strb32_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = bitcast i8* %z to <4 x i8>*
store <4 x i8> %1, <4 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb32_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: adds r1, r0, #3
; CHECK-NEXT: vstrb.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = bitcast i8* %z to <4 x i8>*
store <4 x i8> %1, <4 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb32_127(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #127
; CHECK-NEXT: vstrb.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
%0 = bitcast i8* %x to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = bitcast i8* %z to <4 x i8>*
store <4 x i8> %1, <4 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb32_128(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb32_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #128
; CHECK-NEXT: vstrb.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 128
%0 = bitcast i8* %x to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 8
%2 = bitcast i8* %z to <4 x i8>*
store <4 x i8> %1, <4 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb16_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = bitcast i8* %z to <8 x i8>*
store <8 x i8> %1, <8 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb16_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: adds r1, r0, #3
; CHECK-NEXT: vstrb.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = bitcast i8* %z to <8 x i8>*
store <8 x i8> %1, <8 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb16_127(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #127
; CHECK-NEXT: vstrb.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
%0 = bitcast i8* %x to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = bitcast i8* %z to <8 x i8>*
store <8 x i8> %1, <8 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb16_128(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb16_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #128
; CHECK-NEXT: vstrb.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 128
%0 = bitcast i8* %x to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 8
%2 = bitcast i8* %z to <8 x i8>*
store <8 x i8> %1, <8 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb8_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb8_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <16 x i8>*
%1 = load <16 x i8>, <16 x i8>* %0, align 8
%2 = bitcast i8* %z to <16 x i8>*
store <16 x i8> %1, <16 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb8_3(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb8_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: adds r1, r0, #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <16 x i8>*
%1 = load <16 x i8>, <16 x i8>* %0, align 8
%2 = bitcast i8* %z to <16 x i8>*
store <16 x i8> %1, <16 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb8_127(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb8_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: add.w r1, r0, #127
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
%0 = bitcast i8* %x to <16 x i8>*
%1 = load <16 x i8>, <16 x i8>* %0, align 8
%2 = bitcast i8* %z to <16 x i8>*
store <16 x i8> %1, <16 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strb8_128(i8* %y, i8* %x) {
; CHECK-LABEL: post_strb8_128:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0, #128]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 128
%0 = bitcast i8* %x to <16 x i8>*
%1 = load <16 x i8>, <16 x i8>* %0, align 8
%2 = bitcast i8* %z to <16 x i8>*
store <16 x i8> %1, <16 x i8>* %2, align 8
ret i8* %y
}
define i8* @post_strf32_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strf32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x float>*
%1 = load <4 x float>, <4 x float>* %0, align 8
%2 = bitcast i8* %z to <4 x float>*
store <4 x float> %1, <4 x float>* %2, align 8
ret i8* %y
}
define i8* @post_strf16_4(i8* %y, i8* %x) {
; CHECK-LABEL: post_strf16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0, #4]
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x half>*
%1 = load <8 x half>, <8 x half>* %0, align 8
%2 = bitcast i8* %z to <8 x half>*
store <8 x half> %1, <8 x half>* %2, align 8
ret i8* %y
}