1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-masked-store.ll
David Green 684e62e531 [ARM] Remove hasSideEffects from FP converts
Whether an instruction is deemed to have side effects in determined by
whether it has a tblgen pattern that emits a single instruction.
Because of the way a lot of the the vcvt instructions are specified
either in dagtodag code or with patterns that emit multiple
instructions, they don't get marked as not having side effects.

This just marks them as not having side effects manually. It can help
especially with instruction scheduling, to not create artificial
barriers, but one of these tests also managed to produce fewer
instructions.

Differential Revision: https://reviews.llvm.org/D81639
2020-07-05 16:23:24 +01:00

1764 lines
61 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
define arm_aapcs_vfpcc void @masked_v4i32(<4 x i32> *%dest, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4i32:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
; CHECK-LE-NEXT: vstrwt.32 q0, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4i32:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
; CHECK-BE-NEXT: vstrwt.32 q1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %a, zeroinitializer
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a, <4 x i32>* %dest, i32 4, <4 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v4i32_align1(<4 x i32> *%dest, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4i32_align1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
; CHECK-LE-NEXT: vmrs r2, p0
; CHECK-LE-NEXT: and r1, r2, #1
; CHECK-LE-NEXT: rsbs r3, r1, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: bfi r1, r3, #0, #1
; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r1, r3, #1, #1
; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: bfi r1, r2, #3, #1
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne r2, s0
; CHECK-LE-NEXT: strne r2, [r0]
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi r2, s1
; CHECK-LE-NEXT: strmi r2, [r0, #4]
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi r2, s2
; CHECK-LE-NEXT: strmi r2, [r0, #8]
; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi r1, s3
; CHECK-LE-NEXT: strmi r1, [r0, #12]
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4i32_align1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
; CHECK-BE-NEXT: vmrs r2, p0
; CHECK-BE-NEXT: and r1, r2, #1
; CHECK-BE-NEXT: rsbs r3, r1, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: bfi r1, r3, #0, #1
; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #1, #1
; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: bfi r1, r2, #3, #1
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne r2, s4
; CHECK-BE-NEXT: strne r2, [r0]
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi r2, s5
; CHECK-BE-NEXT: strmi r2, [r0, #4]
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi r2, s6
; CHECK-BE-NEXT: strmi r2, [r0, #8]
; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi r1, s7
; CHECK-BE-NEXT: strmi r1, [r0, #12]
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %a, zeroinitializer
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a, <4 x i32>* %dest, i32 1, <4 x i1> %c)
ret void
}
define i8* @masked_v4i32_pre(i8* %y, i8* %x, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4i32_pre:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vldr d1, [sp]
; CHECK-LE-NEXT: vldrw.u32 q1, [r1]
; CHECK-LE-NEXT: vmov d0, r2, r3
; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4]!
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4i32_pre:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vldr d1, [sp]
; CHECK-BE-NEXT: vldrw.u32 q1, [r1]
; CHECK-BE-NEXT: vmov d0, r3, r2
; CHECK-BE-NEXT: vrev64.32 q2, q0
; CHECK-BE-NEXT: vpt.s32 gt, q2, zr
; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4]!
; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 4
%2 = bitcast i8* %z to <4 x i32>*
%c = icmp sgt <4 x i32> %a, zeroinitializer
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c)
ret i8* %z
}
define i8* @masked_v4i32_post(i8* %y, i8* %x, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4i32_post:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vldr d1, [sp]
; CHECK-LE-NEXT: vldrw.u32 q1, [r1]
; CHECK-LE-NEXT: vmov d0, r2, r3
; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
; CHECK-LE-NEXT: vstrwt.32 q1, [r0], #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4i32_post:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vldr d1, [sp]
; CHECK-BE-NEXT: vldrw.u32 q1, [r1]
; CHECK-BE-NEXT: vmov d0, r3, r2
; CHECK-BE-NEXT: vrev64.32 q2, q0
; CHECK-BE-NEXT: vpt.s32 gt, q2, zr
; CHECK-BE-NEXT: vstrwt.32 q1, [r0], #4
; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 4
%2 = bitcast i8* %y to <4 x i32>*
%c = icmp sgt <4 x i32> %a, zeroinitializer
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c)
ret i8* %z
}
define arm_aapcs_vfpcc void @masked_v8i16(<8 x i16> *%dest, <8 x i16> %a) {
; CHECK-LE-LABEL: masked_v8i16:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
; CHECK-LE-NEXT: vstrht.16 q0, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v8i16:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
; CHECK-BE-NEXT: vstrht.16 q1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <8 x i16> %a, zeroinitializer
call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %a, <8 x i16>* %dest, i32 2, <8 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v8i16_align1(<8 x i16> *%dest, <8 x i16> %a) {
; CHECK-LE-LABEL: masked_v8i16_align1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr
; CHECK-LE-NEXT: vmrs r1, p0
; CHECK-LE-NEXT: and r2, r1, #1
; CHECK-LE-NEXT: rsbs r3, r2, #0
; CHECK-LE-NEXT: movs r2, #0
; CHECK-LE-NEXT: bfi r2, r3, #0, #1
; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #1, #1
; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #2, #1
; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #3, #1
; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #4, #1
; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #5, #1
; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #6, #1
; CHECK-LE-NEXT: rsbs r1, r1, #0
; CHECK-LE-NEXT: bfi r2, r1, #7, #1
; CHECK-LE-NEXT: uxtb r1, r2
; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne.u16 r2, q0[0]
; CHECK-LE-NEXT: strhne r2, [r0]
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u16 r2, q0[1]
; CHECK-LE-NEXT: strhmi r2, [r0, #2]
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u16 r2, q0[2]
; CHECK-LE-NEXT: strhmi r2, [r0, #4]
; CHECK-LE-NEXT: lsls r2, r1, #28
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u16 r2, q0[3]
; CHECK-LE-NEXT: strhmi r2, [r0, #6]
; CHECK-LE-NEXT: lsls r2, r1, #27
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u16 r2, q0[4]
; CHECK-LE-NEXT: strhmi r2, [r0, #8]
; CHECK-LE-NEXT: lsls r2, r1, #26
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u16 r2, q0[5]
; CHECK-LE-NEXT: strhmi r2, [r0, #10]
; CHECK-LE-NEXT: lsls r2, r1, #25
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u16 r2, q0[6]
; CHECK-LE-NEXT: strhmi r2, [r0, #12]
; CHECK-LE-NEXT: lsls r1, r1, #24
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi.u16 r1, q0[7]
; CHECK-LE-NEXT: strhmi r1, [r0, #14]
; CHECK-LE-NEXT: add sp, #8
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v8i16_align1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr
; CHECK-BE-NEXT: vmrs r1, p0
; CHECK-BE-NEXT: and r2, r1, #1
; CHECK-BE-NEXT: rsbs r3, r2, #0
; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: bfi r2, r3, #0, #1
; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #1, #1
; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #2, #1
; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #3, #1
; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #4, #1
; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #5, #1
; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #6, #1
; CHECK-BE-NEXT: rsbs r1, r1, #0
; CHECK-BE-NEXT: bfi r2, r1, #7, #1
; CHECK-BE-NEXT: uxtb r1, r2
; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne.u16 r2, q1[0]
; CHECK-BE-NEXT: strhne r2, [r0]
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u16 r2, q1[1]
; CHECK-BE-NEXT: strhmi r2, [r0, #2]
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u16 r2, q1[2]
; CHECK-BE-NEXT: strhmi r2, [r0, #4]
; CHECK-BE-NEXT: lsls r2, r1, #28
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u16 r2, q1[3]
; CHECK-BE-NEXT: strhmi r2, [r0, #6]
; CHECK-BE-NEXT: lsls r2, r1, #27
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u16 r2, q1[4]
; CHECK-BE-NEXT: strhmi r2, [r0, #8]
; CHECK-BE-NEXT: lsls r2, r1, #26
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u16 r2, q1[5]
; CHECK-BE-NEXT: strhmi r2, [r0, #10]
; CHECK-BE-NEXT: lsls r2, r1, #25
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u16 r2, q1[6]
; CHECK-BE-NEXT: strhmi r2, [r0, #12]
; CHECK-BE-NEXT: lsls r1, r1, #24
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi.u16 r1, q1[7]
; CHECK-BE-NEXT: strhmi r1, [r0, #14]
; CHECK-BE-NEXT: add sp, #8
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <8 x i16> %a, zeroinitializer
call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %a, <8 x i16>* %dest, i32 1, <8 x i1> %c)
ret void
}
define i8* @masked_v8i16_pre(i8* %y, i8* %x, <8 x i16> %a) {
; CHECK-LE-LABEL: masked_v8i16_pre:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vldr d1, [sp]
; CHECK-LE-NEXT: vldrw.u32 q1, [r1]
; CHECK-LE-NEXT: vmov d0, r2, r3
; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4]!
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v8i16_pre:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vldr d1, [sp]
; CHECK-BE-NEXT: vldrh.u16 q1, [r1]
; CHECK-BE-NEXT: vmov d0, r3, r2
; CHECK-BE-NEXT: vrev64.16 q2, q0
; CHECK-BE-NEXT: vpt.s16 gt, q2, zr
; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4]!
; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 4
%2 = bitcast i8* %z to <8 x i16>*
%c = icmp sgt <8 x i16> %a, zeroinitializer
call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c)
ret i8* %z
}
define i8* @masked_v8i16_post(i8* %y, i8* %x, <8 x i16> %a) {
; CHECK-LE-LABEL: masked_v8i16_post:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vldr d1, [sp]
; CHECK-LE-NEXT: vldrw.u32 q1, [r1]
; CHECK-LE-NEXT: vmov d0, r2, r3
; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
; CHECK-LE-NEXT: vstrht.16 q1, [r0], #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v8i16_post:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vldr d1, [sp]
; CHECK-BE-NEXT: vldrh.u16 q1, [r1]
; CHECK-BE-NEXT: vmov d0, r3, r2
; CHECK-BE-NEXT: vrev64.16 q2, q0
; CHECK-BE-NEXT: vpt.s16 gt, q2, zr
; CHECK-BE-NEXT: vstrht.16 q1, [r0], #4
; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x i16>*
%1 = load <8 x i16>, <8 x i16>* %0, align 4
%2 = bitcast i8* %y to <8 x i16>*
%c = icmp sgt <8 x i16> %a, zeroinitializer
call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c)
ret i8* %z
}
define arm_aapcs_vfpcc void @masked_v16i8(<16 x i8> *%dest, <16 x i8> %a) {
; CHECK-LE-LABEL: masked_v16i8:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vpt.s8 gt, q0, zr
; CHECK-LE-NEXT: vstrbt.8 q0, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v16i8:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vpt.s8 gt, q1, zr
; CHECK-BE-NEXT: vstrbt.8 q1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <16 x i8> %a, zeroinitializer
call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %a, <16 x i8>* %dest, i32 1, <16 x i1> %c)
ret void
}
define i8* @masked_v16i8_pre(i8* %y, i8* %x, <16 x i8> %a) {
; CHECK-LE-LABEL: masked_v16i8_pre:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vldr d1, [sp]
; CHECK-LE-NEXT: vldrw.u32 q1, [r1]
; CHECK-LE-NEXT: vmov d0, r2, r3
; CHECK-LE-NEXT: vpt.s8 gt, q0, zr
; CHECK-LE-NEXT: vstrbt.8 q1, [r0, #4]!
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v16i8_pre:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vldr d1, [sp]
; CHECK-BE-NEXT: vldrb.u8 q1, [r1]
; CHECK-BE-NEXT: vmov d0, r3, r2
; CHECK-BE-NEXT: vrev64.8 q2, q0
; CHECK-BE-NEXT: vpt.s8 gt, q2, zr
; CHECK-BE-NEXT: vstrbt.8 q1, [r0, #4]!
; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <16 x i8>*
%1 = load <16 x i8>, <16 x i8>* %0, align 4
%2 = bitcast i8* %z to <16 x i8>*
%c = icmp sgt <16 x i8> %a, zeroinitializer
call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c)
ret i8* %z
}
define i8* @masked_v16i8_post(i8* %y, i8* %x, <16 x i8> %a) {
; CHECK-LE-LABEL: masked_v16i8_post:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vldr d1, [sp]
; CHECK-LE-NEXT: vldrw.u32 q1, [r1]
; CHECK-LE-NEXT: vmov d0, r2, r3
; CHECK-LE-NEXT: vpt.s8 gt, q0, zr
; CHECK-LE-NEXT: vstrbt.8 q1, [r0], #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v16i8_post:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vldr d1, [sp]
; CHECK-BE-NEXT: vldrb.u8 q1, [r1]
; CHECK-BE-NEXT: vmov d0, r3, r2
; CHECK-BE-NEXT: vrev64.8 q2, q0
; CHECK-BE-NEXT: vpt.s8 gt, q2, zr
; CHECK-BE-NEXT: vstrbt.8 q1, [r0], #4
; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <16 x i8>*
%1 = load <16 x i8>, <16 x i8>* %0, align 4
%2 = bitcast i8* %y to <16 x i8>*
%c = icmp sgt <16 x i8> %a, zeroinitializer
call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c)
ret i8* %z
}
define arm_aapcs_vfpcc void @masked_v4f32(<4 x float> *%dest, <4 x float> %a, <4 x i32> %b) {
; CHECK-LE-LABEL: masked_v4f32:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vpt.i32 ne, q1, zr
; CHECK-LE-NEXT: vstrwt.32 q0, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4f32:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.32 q2, q1
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vpt.i32 ne, q2, zr
; CHECK-BE-NEXT: vstrwt.32 q1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp ugt <4 x i32> %b, zeroinitializer
call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %a, <4 x float>* %dest, i32 4, <4 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v4f32_align1(<4 x float> *%dest, <4 x float> %a, <4 x i32> %b) {
; CHECK-LE-LABEL: masked_v4f32_align1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.i32 ne, q1, zr
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: vmrs r2, p0
; CHECK-LE-NEXT: and r3, r2, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r1, r3, #0, #1
; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r1, r3, #1, #1
; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: bfi r1, r2, #3, #1
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne r2, s0
; CHECK-LE-NEXT: strne r2, [r0]
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi r2, s1
; CHECK-LE-NEXT: strmi r2, [r0, #4]
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi r2, s2
; CHECK-LE-NEXT: strmi r2, [r0, #8]
; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi r1, s3
; CHECK-LE-NEXT: strmi r1, [r0, #12]
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4f32_align1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q2, q1
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vmrs r2, p0
; CHECK-BE-NEXT: and r3, r2, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #0, #1
; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #1, #1
; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: bfi r1, r2, #3, #1
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne r2, s4
; CHECK-BE-NEXT: strne r2, [r0]
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi r2, s5
; CHECK-BE-NEXT: strmi r2, [r0, #4]
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi r2, s6
; CHECK-BE-NEXT: strmi r2, [r0, #8]
; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi r1, s7
; CHECK-BE-NEXT: strmi r1, [r0, #12]
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp ugt <4 x i32> %b, zeroinitializer
call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %a, <4 x float>* %dest, i32 1, <4 x i1> %c)
ret void
}
define i8* @masked_v4f32_pre(i8* %y, i8* %x, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4f32_pre:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vldr d1, [sp]
; CHECK-LE-NEXT: vldrw.u32 q1, [r1]
; CHECK-LE-NEXT: vmov d0, r2, r3
; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4]!
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4f32_pre:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vldr d1, [sp]
; CHECK-BE-NEXT: vldrw.u32 q1, [r1]
; CHECK-BE-NEXT: vmov d0, r3, r2
; CHECK-BE-NEXT: vrev64.32 q2, q0
; CHECK-BE-NEXT: vpt.s32 gt, q2, zr
; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4]!
; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x float>*
%1 = load <4 x float>, <4 x float>* %0, align 4
%2 = bitcast i8* %z to <4 x float>*
%c = icmp sgt <4 x i32> %a, zeroinitializer
call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c)
ret i8* %z
}
define i8* @masked_v4f32_post(i8* %y, i8* %x, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4f32_post:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vldr d1, [sp]
; CHECK-LE-NEXT: vldrw.u32 q1, [r1]
; CHECK-LE-NEXT: vmov d0, r2, r3
; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
; CHECK-LE-NEXT: vstrwt.32 q1, [r0], #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4f32_post:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vldr d1, [sp]
; CHECK-BE-NEXT: vldrw.u32 q1, [r1]
; CHECK-BE-NEXT: vmov d0, r3, r2
; CHECK-BE-NEXT: vrev64.32 q2, q0
; CHECK-BE-NEXT: vpt.s32 gt, q2, zr
; CHECK-BE-NEXT: vstrwt.32 q1, [r0], #4
; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x float>*
%1 = load <4 x float>, <4 x float>* %0, align 4
%2 = bitcast i8* %y to <4 x float>*
%c = icmp sgt <4 x i32> %a, zeroinitializer
call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c)
ret i8* %z
}
define arm_aapcs_vfpcc void @masked_v8f16(<8 x half> *%dest, <8 x half> %a, <8 x i16> %b) {
; CHECK-LE-LABEL: masked_v8f16:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vpt.i16 ne, q1, zr
; CHECK-LE-NEXT: vstrht.16 q0, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v8f16:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.16 q2, q1
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vpt.i16 ne, q2, zr
; CHECK-BE-NEXT: vstrht.16 q1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp ugt <8 x i16> %b, zeroinitializer
call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %a, <8 x half>* %dest, i32 2, <8 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v8f16_align1(<8 x half> *%dest, <8 x half> %a, <8 x i16> %b) {
; CHECK-LE-LABEL: masked_v8f16_align1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #40
; CHECK-LE-NEXT: sub sp, #40
; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr
; CHECK-LE-NEXT: movs r2, #0
; CHECK-LE-NEXT: vmrs r1, p0
; CHECK-LE-NEXT: and r3, r1, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #0, #1
; CHECK-LE-NEXT: ubfx r3, r1, #2, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #1, #1
; CHECK-LE-NEXT: ubfx r3, r1, #4, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #2, #1
; CHECK-LE-NEXT: ubfx r3, r1, #6, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #3, #1
; CHECK-LE-NEXT: ubfx r3, r1, #8, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #4, #1
; CHECK-LE-NEXT: ubfx r3, r1, #10, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #5, #1
; CHECK-LE-NEXT: ubfx r3, r1, #12, #1
; CHECK-LE-NEXT: ubfx r1, r1, #14, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r2, r3, #6, #1
; CHECK-LE-NEXT: rsbs r1, r1, #0
; CHECK-LE-NEXT: bfi r2, r1, #7, #1
; CHECK-LE-NEXT: uxtb r1, r2
; CHECK-LE-NEXT: lsls r2, r2, #31
; CHECK-LE-NEXT: bne .LBB16_9
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: bmi .LBB16_10
; CHECK-LE-NEXT: .LBB16_2: @ %else2
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: bmi .LBB16_11
; CHECK-LE-NEXT: .LBB16_3: @ %else4
; CHECK-LE-NEXT: lsls r2, r1, #28
; CHECK-LE-NEXT: bmi .LBB16_12
; CHECK-LE-NEXT: .LBB16_4: @ %else6
; CHECK-LE-NEXT: lsls r2, r1, #27
; CHECK-LE-NEXT: bmi .LBB16_13
; CHECK-LE-NEXT: .LBB16_5: @ %else8
; CHECK-LE-NEXT: lsls r2, r1, #26
; CHECK-LE-NEXT: bmi .LBB16_14
; CHECK-LE-NEXT: .LBB16_6: @ %else10
; CHECK-LE-NEXT: lsls r2, r1, #25
; CHECK-LE-NEXT: bmi .LBB16_15
; CHECK-LE-NEXT: .LBB16_7: @ %else12
; CHECK-LE-NEXT: lsls r1, r1, #24
; CHECK-LE-NEXT: bmi .LBB16_16
; CHECK-LE-NEXT: .LBB16_8: @ %else14
; CHECK-LE-NEXT: add sp, #40
; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB16_9: @ %cond.store
; CHECK-LE-NEXT: vstr.16 s0, [sp, #28]
; CHECK-LE-NEXT: ldrh.w r2, [sp, #28]
; CHECK-LE-NEXT: strh r2, [r0]
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: bpl .LBB16_2
; CHECK-LE-NEXT: .LBB16_10: @ %cond.store1
; CHECK-LE-NEXT: vmovx.f16 s4, s0
; CHECK-LE-NEXT: vstr.16 s4, [sp, #24]
; CHECK-LE-NEXT: ldrh.w r2, [sp, #24]
; CHECK-LE-NEXT: strh r2, [r0, #2]
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: bpl .LBB16_3
; CHECK-LE-NEXT: .LBB16_11: @ %cond.store3
; CHECK-LE-NEXT: vstr.16 s1, [sp, #20]
; CHECK-LE-NEXT: ldrh.w r2, [sp, #20]
; CHECK-LE-NEXT: strh r2, [r0, #4]
; CHECK-LE-NEXT: lsls r2, r1, #28
; CHECK-LE-NEXT: bpl .LBB16_4
; CHECK-LE-NEXT: .LBB16_12: @ %cond.store5
; CHECK-LE-NEXT: vmovx.f16 s4, s1
; CHECK-LE-NEXT: vstr.16 s4, [sp, #16]
; CHECK-LE-NEXT: ldrh.w r2, [sp, #16]
; CHECK-LE-NEXT: strh r2, [r0, #6]
; CHECK-LE-NEXT: lsls r2, r1, #27
; CHECK-LE-NEXT: bpl .LBB16_5
; CHECK-LE-NEXT: .LBB16_13: @ %cond.store7
; CHECK-LE-NEXT: vstr.16 s2, [sp, #12]
; CHECK-LE-NEXT: ldrh.w r2, [sp, #12]
; CHECK-LE-NEXT: strh r2, [r0, #8]
; CHECK-LE-NEXT: lsls r2, r1, #26
; CHECK-LE-NEXT: bpl .LBB16_6
; CHECK-LE-NEXT: .LBB16_14: @ %cond.store9
; CHECK-LE-NEXT: vmovx.f16 s4, s2
; CHECK-LE-NEXT: vstr.16 s4, [sp, #8]
; CHECK-LE-NEXT: ldrh.w r2, [sp, #8]
; CHECK-LE-NEXT: strh r2, [r0, #10]
; CHECK-LE-NEXT: lsls r2, r1, #25
; CHECK-LE-NEXT: bpl .LBB16_7
; CHECK-LE-NEXT: .LBB16_15: @ %cond.store11
; CHECK-LE-NEXT: vstr.16 s3, [sp, #4]
; CHECK-LE-NEXT: ldrh.w r2, [sp, #4]
; CHECK-LE-NEXT: strh r2, [r0, #12]
; CHECK-LE-NEXT: lsls r1, r1, #24
; CHECK-LE-NEXT: bpl .LBB16_8
; CHECK-LE-NEXT: .LBB16_16: @ %cond.store13
; CHECK-LE-NEXT: vmovx.f16 s0, s3
; CHECK-LE-NEXT: vstr.16 s0, [sp]
; CHECK-LE-NEXT: ldrh.w r1, [sp]
; CHECK-LE-NEXT: strh r1, [r0, #14]
; CHECK-LE-NEXT: add sp, #40
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v8f16_align1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .pad #40
; CHECK-BE-NEXT: sub sp, #40
; CHECK-BE-NEXT: vrev64.16 q2, q1
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vcmp.i16 ne, q2, zr
; CHECK-BE-NEXT: vmrs r1, p0
; CHECK-BE-NEXT: and r2, r1, #1
; CHECK-BE-NEXT: rsbs r3, r2, #0
; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: bfi r2, r3, #0, #1
; CHECK-BE-NEXT: ubfx r3, r1, #2, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #1, #1
; CHECK-BE-NEXT: ubfx r3, r1, #4, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #2, #1
; CHECK-BE-NEXT: ubfx r3, r1, #6, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #3, #1
; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #4, #1
; CHECK-BE-NEXT: ubfx r3, r1, #10, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #5, #1
; CHECK-BE-NEXT: ubfx r3, r1, #12, #1
; CHECK-BE-NEXT: ubfx r1, r1, #14, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r2, r3, #6, #1
; CHECK-BE-NEXT: rsbs r1, r1, #0
; CHECK-BE-NEXT: bfi r2, r1, #7, #1
; CHECK-BE-NEXT: uxtb r1, r2
; CHECK-BE-NEXT: lsls r2, r2, #31
; CHECK-BE-NEXT: bne .LBB16_9
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: bmi .LBB16_10
; CHECK-BE-NEXT: .LBB16_2: @ %else2
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: bmi .LBB16_11
; CHECK-BE-NEXT: .LBB16_3: @ %else4
; CHECK-BE-NEXT: lsls r2, r1, #28
; CHECK-BE-NEXT: bmi .LBB16_12
; CHECK-BE-NEXT: .LBB16_4: @ %else6
; CHECK-BE-NEXT: lsls r2, r1, #27
; CHECK-BE-NEXT: bmi .LBB16_13
; CHECK-BE-NEXT: .LBB16_5: @ %else8
; CHECK-BE-NEXT: lsls r2, r1, #26
; CHECK-BE-NEXT: bmi .LBB16_14
; CHECK-BE-NEXT: .LBB16_6: @ %else10
; CHECK-BE-NEXT: lsls r2, r1, #25
; CHECK-BE-NEXT: bmi .LBB16_15
; CHECK-BE-NEXT: .LBB16_7: @ %else12
; CHECK-BE-NEXT: lsls r1, r1, #24
; CHECK-BE-NEXT: bmi .LBB16_16
; CHECK-BE-NEXT: .LBB16_8: @ %else14
; CHECK-BE-NEXT: add sp, #40
; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB16_9: @ %cond.store
; CHECK-BE-NEXT: vstr.16 s4, [sp, #28]
; CHECK-BE-NEXT: ldrh.w r2, [sp, #28]
; CHECK-BE-NEXT: strh r2, [r0]
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: bpl .LBB16_2
; CHECK-BE-NEXT: .LBB16_10: @ %cond.store1
; CHECK-BE-NEXT: vmovx.f16 s0, s4
; CHECK-BE-NEXT: vstr.16 s0, [sp, #24]
; CHECK-BE-NEXT: ldrh.w r2, [sp, #24]
; CHECK-BE-NEXT: strh r2, [r0, #2]
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: bpl .LBB16_3
; CHECK-BE-NEXT: .LBB16_11: @ %cond.store3
; CHECK-BE-NEXT: vstr.16 s5, [sp, #20]
; CHECK-BE-NEXT: ldrh.w r2, [sp, #20]
; CHECK-BE-NEXT: strh r2, [r0, #4]
; CHECK-BE-NEXT: lsls r2, r1, #28
; CHECK-BE-NEXT: bpl .LBB16_4
; CHECK-BE-NEXT: .LBB16_12: @ %cond.store5
; CHECK-BE-NEXT: vmovx.f16 s0, s5
; CHECK-BE-NEXT: vstr.16 s0, [sp, #16]
; CHECK-BE-NEXT: ldrh.w r2, [sp, #16]
; CHECK-BE-NEXT: strh r2, [r0, #6]
; CHECK-BE-NEXT: lsls r2, r1, #27
; CHECK-BE-NEXT: bpl .LBB16_5
; CHECK-BE-NEXT: .LBB16_13: @ %cond.store7
; CHECK-BE-NEXT: vstr.16 s6, [sp, #12]
; CHECK-BE-NEXT: ldrh.w r2, [sp, #12]
; CHECK-BE-NEXT: strh r2, [r0, #8]
; CHECK-BE-NEXT: lsls r2, r1, #26
; CHECK-BE-NEXT: bpl .LBB16_6
; CHECK-BE-NEXT: .LBB16_14: @ %cond.store9
; CHECK-BE-NEXT: vmovx.f16 s0, s6
; CHECK-BE-NEXT: vstr.16 s0, [sp, #8]
; CHECK-BE-NEXT: ldrh.w r2, [sp, #8]
; CHECK-BE-NEXT: strh r2, [r0, #10]
; CHECK-BE-NEXT: lsls r2, r1, #25
; CHECK-BE-NEXT: bpl .LBB16_7
; CHECK-BE-NEXT: .LBB16_15: @ %cond.store11
; CHECK-BE-NEXT: vstr.16 s7, [sp, #4]
; CHECK-BE-NEXT: ldrh.w r2, [sp, #4]
; CHECK-BE-NEXT: strh r2, [r0, #12]
; CHECK-BE-NEXT: lsls r1, r1, #24
; CHECK-BE-NEXT: bpl .LBB16_8
; CHECK-BE-NEXT: .LBB16_16: @ %cond.store13
; CHECK-BE-NEXT: vmovx.f16 s0, s7
; CHECK-BE-NEXT: vstr.16 s0, [sp]
; CHECK-BE-NEXT: ldrh.w r1, [sp]
; CHECK-BE-NEXT: strh r1, [r0, #14]
; CHECK-BE-NEXT: add sp, #40
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp ugt <8 x i16> %b, zeroinitializer
call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %a, <8 x half>* %dest, i32 1, <8 x i1> %c)
ret void
}
define i8* @masked_v8f16_pre(i8* %y, i8* %x, <8 x i16> %a) {
; CHECK-LE-LABEL: masked_v8f16_pre:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vldr d1, [sp]
; CHECK-LE-NEXT: vldrw.u32 q1, [r1]
; CHECK-LE-NEXT: vmov d0, r2, r3
; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4]!
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v8f16_pre:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vldr d1, [sp]
; CHECK-BE-NEXT: vldrh.u16 q1, [r1]
; CHECK-BE-NEXT: vmov d0, r3, r2
; CHECK-BE-NEXT: vrev64.16 q2, q0
; CHECK-BE-NEXT: vpt.s16 gt, q2, zr
; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4]!
; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x half>*
%1 = load <8 x half>, <8 x half>* %0, align 4
%2 = bitcast i8* %z to <8 x half>*
%c = icmp sgt <8 x i16> %a, zeroinitializer
call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c)
ret i8* %z
}
define i8* @masked_v8f16_post(i8* %y, i8* %x, <8 x i16> %a) {
; CHECK-LE-LABEL: masked_v8f16_post:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vldr d1, [sp]
; CHECK-LE-NEXT: vldrw.u32 q1, [r1]
; CHECK-LE-NEXT: vmov d0, r2, r3
; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
; CHECK-LE-NEXT: vstrht.16 q1, [r0], #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v8f16_post:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vldr d1, [sp]
; CHECK-BE-NEXT: vldrh.u16 q1, [r1]
; CHECK-BE-NEXT: vmov d0, r3, r2
; CHECK-BE-NEXT: vrev64.16 q2, q0
; CHECK-BE-NEXT: vpt.s16 gt, q2, zr
; CHECK-BE-NEXT: vstrht.16 q1, [r0], #4
; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x half>*
%1 = load <8 x half>, <8 x half>* %0, align 4
%2 = bitcast i8* %y to <8 x half>*
%c = icmp sgt <8 x i16> %a, zeroinitializer
call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c)
ret i8* %z
}
define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) {
; CHECK-LE-LABEL: masked_v2i64:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vmov r2, s0
; CHECK-LE-NEXT: movs r3, #0
; CHECK-LE-NEXT: vmov r1, s1
; CHECK-LE-NEXT: vmov r12, s3
; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: vmov r2, s2
; CHECK-LE-NEXT: sbcs.w r1, r3, r1
; CHECK-LE-NEXT: mov.w r1, #0
; CHECK-LE-NEXT: it lt
; CHECK-LE-NEXT: movlt r1, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: sbcs.w r2, r3, r12
; CHECK-LE-NEXT: it lt
; CHECK-LE-NEXT: movlt r3, #1
; CHECK-LE-NEXT: cmp r3, #0
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: mvnne r3, #1
; CHECK-LE-NEXT: bfi r3, r1, #0, #1
; CHECK-LE-NEXT: and r1, r3, #3
; CHECK-LE-NEXT: lsls r2, r3, #31
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: vstrne d0, [r0]
; CHECK-LE-NEXT: lsls r1, r1, #30
; CHECK-LE-NEXT: it mi
; CHECK-LE-NEXT: vstrmi d1, [r0, #8]
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v2i64:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: movs r3, #0
; CHECK-BE-NEXT: vmov r2, s7
; CHECK-BE-NEXT: vmov r1, s6
; CHECK-BE-NEXT: vmov r12, s4
; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: vmov r2, s5
; CHECK-BE-NEXT: sbcs.w r1, r3, r1
; CHECK-BE-NEXT: mov.w r1, #0
; CHECK-BE-NEXT: it lt
; CHECK-BE-NEXT: movlt r1, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: sbcs.w r2, r3, r12
; CHECK-BE-NEXT: it lt
; CHECK-BE-NEXT: movlt r3, #1
; CHECK-BE-NEXT: cmp r3, #0
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: mvnne r3, #1
; CHECK-BE-NEXT: bfi r3, r1, #0, #1
; CHECK-BE-NEXT: and r1, r3, #3
; CHECK-BE-NEXT: lsls r2, r3, #31
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: vstrne d0, [r0]
; CHECK-BE-NEXT: lsls r1, r1, #30
; CHECK-BE-NEXT: it mi
; CHECK-BE-NEXT: vstrmi d1, [r0, #8]
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <2 x i64> %a, zeroinitializer
call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %a, <2 x i64>* %dest, i32 8, <2 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a, <2 x i64> %b) {
; CHECK-LE-LABEL: masked_v2f64:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vmov r2, s4
; CHECK-LE-NEXT: movs r3, #0
; CHECK-LE-NEXT: vmov r1, s5
; CHECK-LE-NEXT: vmov r12, s7
; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: vmov r2, s6
; CHECK-LE-NEXT: sbcs.w r1, r3, r1
; CHECK-LE-NEXT: mov.w r1, #0
; CHECK-LE-NEXT: it lt
; CHECK-LE-NEXT: movlt r1, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: sbcs.w r2, r3, r12
; CHECK-LE-NEXT: it lt
; CHECK-LE-NEXT: movlt r3, #1
; CHECK-LE-NEXT: cmp r3, #0
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: mvnne r3, #1
; CHECK-LE-NEXT: bfi r3, r1, #0, #1
; CHECK-LE-NEXT: and r1, r3, #3
; CHECK-LE-NEXT: lsls r2, r3, #31
; CHECK-LE-NEXT: it ne
; CHECK-LE-NEXT: vstrne d0, [r0]
; CHECK-LE-NEXT: lsls r1, r1, #30
; CHECK-LE-NEXT: it mi
; CHECK-LE-NEXT: vstrmi d1, [r0, #8]
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v2f64:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q2, q1
; CHECK-BE-NEXT: movs r3, #0
; CHECK-BE-NEXT: vmov r2, s11
; CHECK-BE-NEXT: vmov r1, s10
; CHECK-BE-NEXT: vmov r12, s8
; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: vmov r2, s9
; CHECK-BE-NEXT: sbcs.w r1, r3, r1
; CHECK-BE-NEXT: mov.w r1, #0
; CHECK-BE-NEXT: it lt
; CHECK-BE-NEXT: movlt r1, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: sbcs.w r2, r3, r12
; CHECK-BE-NEXT: it lt
; CHECK-BE-NEXT: movlt r3, #1
; CHECK-BE-NEXT: cmp r3, #0
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: mvnne r3, #1
; CHECK-BE-NEXT: bfi r3, r1, #0, #1
; CHECK-BE-NEXT: and r1, r3, #3
; CHECK-BE-NEXT: lsls r2, r3, #31
; CHECK-BE-NEXT: it ne
; CHECK-BE-NEXT: vstrne d0, [r0]
; CHECK-BE-NEXT: lsls r1, r1, #30
; CHECK-BE-NEXT: it mi
; CHECK-BE-NEXT: vstrmi d1, [r0, #8]
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <2 x i64> %b, zeroinitializer
call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %a, <2 x double>* %dest, i32 8, <2 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v4i16(<4 x i16> *%dest, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4i16:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
; CHECK-LE-NEXT: vstrht.32 q0, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4i16:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
; CHECK-BE-NEXT: vstrht.32 q1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %a, zeroinitializer
%trunc = trunc <4 x i32> %a to <4 x i16>
call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %trunc, <4 x i16>* %dest, i32 2, <4 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v4i8(<4 x i8> *%dest, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4i8:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
; CHECK-LE-NEXT: vstrbt.32 q0, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4i8:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
; CHECK-BE-NEXT: vstrbt.32 q1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %a, zeroinitializer
%trunc = trunc <4 x i32> %a to <4 x i8>
call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %trunc, <4 x i8>* %dest, i32 1, <4 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v8i8(<8 x i8> *%dest, <8 x i16> %a) {
; CHECK-LE-LABEL: masked_v8i8:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
; CHECK-LE-NEXT: vstrbt.16 q0, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v8i8:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
; CHECK-BE-NEXT: vstrbt.16 q1, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <8 x i16> %a, zeroinitializer
%trunc = trunc <8 x i16> %a to <8 x i8>
call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %trunc, <8 x i8>* %dest, i32 1, <8 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v4i16_align1(<4 x i16> *%dest, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4i16_align1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
; CHECK-LE-NEXT: vmrs r2, p0
; CHECK-LE-NEXT: and r1, r2, #1
; CHECK-LE-NEXT: rsbs r3, r1, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: bfi r1, r3, #0, #1
; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r1, r3, #1, #1
; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: bfi r1, r2, #3, #1
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: itt ne
; CHECK-LE-NEXT: vmovne r2, s0
; CHECK-LE-NEXT: strhne r2, [r0]
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi r2, s1
; CHECK-LE-NEXT: strhmi r2, [r0, #2]
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi r2, s2
; CHECK-LE-NEXT: strhmi r2, [r0, #4]
; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: vmovmi r1, s3
; CHECK-LE-NEXT: strhmi r1, [r0, #6]
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4i16_align1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
; CHECK-BE-NEXT: vmrs r2, p0
; CHECK-BE-NEXT: and r1, r2, #1
; CHECK-BE-NEXT: rsbs r3, r1, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: bfi r1, r3, #0, #1
; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #1, #1
; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: bfi r1, r2, #3, #1
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: itt ne
; CHECK-BE-NEXT: vmovne r2, s4
; CHECK-BE-NEXT: strhne r2, [r0]
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi r2, s5
; CHECK-BE-NEXT: strhmi r2, [r0, #2]
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi r2, s6
; CHECK-BE-NEXT: strhmi r2, [r0, #4]
; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: itt mi
; CHECK-BE-NEXT: vmovmi r1, s7
; CHECK-BE-NEXT: strhmi r1, [r0, #6]
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %a, zeroinitializer
%trunc = trunc <4 x i32> %a to <4 x i16>
call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %trunc, <4 x i16>* %dest, i32 1, <4 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> %a) {
; CHECK-LE-LABEL: masked_v4f16_align4:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.f32 s0, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r1, #1
; CHECK-LE-NEXT: cmp r1, #0
; CHECK-LE-NEXT: vcmp.f32 s1, #0
; CHECK-LE-NEXT: cset r1, ne
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: and r1, r1, #1
; CHECK-LE-NEXT: vcmp.f32 s2, #0
; CHECK-LE-NEXT: rsb.w r3, r1, #0
; CHECK-LE-NEXT: mov.w r1, #0
; CHECK-LE-NEXT: bfi r1, r3, #0, #1
; CHECK-LE-NEXT: mov.w r3, #0
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r3, #1
; CHECK-LE-NEXT: cmp r3, #0
; CHECK-LE-NEXT: cset r3, ne
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: and r3, r3, #1
; CHECK-LE-NEXT: vcmp.f32 s3, #0
; CHECK-LE-NEXT: rsb.w r3, r3, #0
; CHECK-LE-NEXT: mov.w r2, #0
; CHECK-LE-NEXT: bfi r1, r3, #1, #1
; CHECK-LE-NEXT: mov.w r3, #0
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r3, #1
; CHECK-LE-NEXT: cmp r3, #0
; CHECK-LE-NEXT: cset r3, ne
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r2, #1
; CHECK-LE-NEXT: cmp r2, #0
; CHECK-LE-NEXT: and r3, r3, #1
; CHECK-LE-NEXT: cset r2, ne
; CHECK-LE-NEXT: and r2, r2, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1
; CHECK-LE-NEXT: bfi r1, r2, #3, #1
; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2
; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: bne .LBB25_5
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: bmi .LBB25_6
; CHECK-LE-NEXT: .LBB25_2: @ %else2
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: bmi .LBB25_7
; CHECK-LE-NEXT: .LBB25_3: @ %else4
; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: bmi .LBB25_8
; CHECK-LE-NEXT: .LBB25_4: @ %else6
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB25_5: @ %cond.store
; CHECK-LE-NEXT: vstr.16 s4, [r0]
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: bpl .LBB25_2
; CHECK-LE-NEXT: .LBB25_6: @ %cond.store1
; CHECK-LE-NEXT: vmovx.f16 s0, s4
; CHECK-LE-NEXT: vstr.16 s0, [r0, #2]
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: bpl .LBB25_3
; CHECK-LE-NEXT: .LBB25_7: @ %cond.store3
; CHECK-LE-NEXT: vstr.16 s5, [r0, #4]
; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: bpl .LBB25_4
; CHECK-LE-NEXT: .LBB25_8: @ %cond.store5
; CHECK-LE-NEXT: vmovx.f16 s0, s5
; CHECK-LE-NEXT: vstr.16 s0, [r0, #6]
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4f16_align4:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.f32 s4, #0
; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r1, #1
; CHECK-BE-NEXT: cmp r1, #0
; CHECK-BE-NEXT: vcmp.f32 s5, #0
; CHECK-BE-NEXT: cset r1, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: and r1, r1, #1
; CHECK-BE-NEXT: vcmp.f32 s6, #0
; CHECK-BE-NEXT: rsb.w r3, r1, #0
; CHECK-BE-NEXT: mov.w r1, #0
; CHECK-BE-NEXT: bfi r1, r3, #0, #1
; CHECK-BE-NEXT: mov.w r3, #0
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r3, #1
; CHECK-BE-NEXT: cmp r3, #0
; CHECK-BE-NEXT: cset r3, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: and r3, r3, #1
; CHECK-BE-NEXT: vcmp.f32 s7, #0
; CHECK-BE-NEXT: rsb.w r3, r3, #0
; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
; CHECK-BE-NEXT: bfi r1, r3, #1, #1
; CHECK-BE-NEXT: mov.w r3, #0
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r3, #1
; CHECK-BE-NEXT: cmp r3, #0
; CHECK-BE-NEXT: cset r3, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r2, #1
; CHECK-BE-NEXT: cmp r2, #0
; CHECK-BE-NEXT: and r3, r3, #1
; CHECK-BE-NEXT: cset r2, ne
; CHECK-BE-NEXT: and r2, r2, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5
; CHECK-BE-NEXT: bfi r1, r2, #3, #1
; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6
; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: bne .LBB25_5
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: bmi .LBB25_6
; CHECK-BE-NEXT: .LBB25_2: @ %else2
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: bmi .LBB25_7
; CHECK-BE-NEXT: .LBB25_3: @ %else4
; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: bmi .LBB25_8
; CHECK-BE-NEXT: .LBB25_4: @ %else6
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB25_5: @ %cond.store
; CHECK-BE-NEXT: vstr.16 s0, [r0]
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: bpl .LBB25_2
; CHECK-BE-NEXT: .LBB25_6: @ %cond.store1
; CHECK-BE-NEXT: vmovx.f16 s4, s0
; CHECK-BE-NEXT: vstr.16 s4, [r0, #2]
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: bpl .LBB25_3
; CHECK-BE-NEXT: .LBB25_7: @ %cond.store3
; CHECK-BE-NEXT: vstr.16 s1, [r0, #4]
; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: bpl .LBB25_4
; CHECK-BE-NEXT: .LBB25_8: @ %cond.store5
; CHECK-BE-NEXT: vmovx.f16 s0, s1
; CHECK-BE-NEXT: vstr.16 s0, [r0, #6]
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: bx lr
entry:
%c = fcmp ogt <4 x float> %a, zeroinitializer
%trunc = fptrunc <4 x float> %a to <4 x half>
call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %trunc, <4 x half>* %dest, i32 4, <4 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> %a) {
; CHECK-LE-LABEL: masked_v4f16_align2:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.f32 s0, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r1, #1
; CHECK-LE-NEXT: cmp r1, #0
; CHECK-LE-NEXT: vcmp.f32 s1, #0
; CHECK-LE-NEXT: cset r1, ne
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: and r1, r1, #1
; CHECK-LE-NEXT: vcmp.f32 s2, #0
; CHECK-LE-NEXT: rsb.w r3, r1, #0
; CHECK-LE-NEXT: mov.w r1, #0
; CHECK-LE-NEXT: bfi r1, r3, #0, #1
; CHECK-LE-NEXT: mov.w r3, #0
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r3, #1
; CHECK-LE-NEXT: cmp r3, #0
; CHECK-LE-NEXT: cset r3, ne
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: and r3, r3, #1
; CHECK-LE-NEXT: vcmp.f32 s3, #0
; CHECK-LE-NEXT: rsb.w r3, r3, #0
; CHECK-LE-NEXT: mov.w r2, #0
; CHECK-LE-NEXT: bfi r1, r3, #1, #1
; CHECK-LE-NEXT: mov.w r3, #0
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r3, #1
; CHECK-LE-NEXT: cmp r3, #0
; CHECK-LE-NEXT: cset r3, ne
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r2, #1
; CHECK-LE-NEXT: cmp r2, #0
; CHECK-LE-NEXT: and r3, r3, #1
; CHECK-LE-NEXT: cset r2, ne
; CHECK-LE-NEXT: and r2, r2, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1
; CHECK-LE-NEXT: bfi r1, r2, #3, #1
; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2
; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: bne .LBB26_5
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: bmi .LBB26_6
; CHECK-LE-NEXT: .LBB26_2: @ %else2
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: bmi .LBB26_7
; CHECK-LE-NEXT: .LBB26_3: @ %else4
; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: bmi .LBB26_8
; CHECK-LE-NEXT: .LBB26_4: @ %else6
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB26_5: @ %cond.store
; CHECK-LE-NEXT: vstr.16 s4, [r0]
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: bpl .LBB26_2
; CHECK-LE-NEXT: .LBB26_6: @ %cond.store1
; CHECK-LE-NEXT: vmovx.f16 s0, s4
; CHECK-LE-NEXT: vstr.16 s0, [r0, #2]
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: bpl .LBB26_3
; CHECK-LE-NEXT: .LBB26_7: @ %cond.store3
; CHECK-LE-NEXT: vstr.16 s5, [r0, #4]
; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: bpl .LBB26_4
; CHECK-LE-NEXT: .LBB26_8: @ %cond.store5
; CHECK-LE-NEXT: vmovx.f16 s0, s5
; CHECK-LE-NEXT: vstr.16 s0, [r0, #6]
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4f16_align2:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.f32 s4, #0
; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r1, #1
; CHECK-BE-NEXT: cmp r1, #0
; CHECK-BE-NEXT: vcmp.f32 s5, #0
; CHECK-BE-NEXT: cset r1, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: and r1, r1, #1
; CHECK-BE-NEXT: vcmp.f32 s6, #0
; CHECK-BE-NEXT: rsb.w r3, r1, #0
; CHECK-BE-NEXT: mov.w r1, #0
; CHECK-BE-NEXT: bfi r1, r3, #0, #1
; CHECK-BE-NEXT: mov.w r3, #0
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r3, #1
; CHECK-BE-NEXT: cmp r3, #0
; CHECK-BE-NEXT: cset r3, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: and r3, r3, #1
; CHECK-BE-NEXT: vcmp.f32 s7, #0
; CHECK-BE-NEXT: rsb.w r3, r3, #0
; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
; CHECK-BE-NEXT: bfi r1, r3, #1, #1
; CHECK-BE-NEXT: mov.w r3, #0
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r3, #1
; CHECK-BE-NEXT: cmp r3, #0
; CHECK-BE-NEXT: cset r3, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r2, #1
; CHECK-BE-NEXT: cmp r2, #0
; CHECK-BE-NEXT: and r3, r3, #1
; CHECK-BE-NEXT: cset r2, ne
; CHECK-BE-NEXT: and r2, r2, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5
; CHECK-BE-NEXT: bfi r1, r2, #3, #1
; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6
; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: bne .LBB26_5
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: bmi .LBB26_6
; CHECK-BE-NEXT: .LBB26_2: @ %else2
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: bmi .LBB26_7
; CHECK-BE-NEXT: .LBB26_3: @ %else4
; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: bmi .LBB26_8
; CHECK-BE-NEXT: .LBB26_4: @ %else6
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB26_5: @ %cond.store
; CHECK-BE-NEXT: vstr.16 s0, [r0]
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: bpl .LBB26_2
; CHECK-BE-NEXT: .LBB26_6: @ %cond.store1
; CHECK-BE-NEXT: vmovx.f16 s4, s0
; CHECK-BE-NEXT: vstr.16 s4, [r0, #2]
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: bpl .LBB26_3
; CHECK-BE-NEXT: .LBB26_7: @ %cond.store3
; CHECK-BE-NEXT: vstr.16 s1, [r0, #4]
; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: bpl .LBB26_4
; CHECK-BE-NEXT: .LBB26_8: @ %cond.store5
; CHECK-BE-NEXT: vmovx.f16 s0, s1
; CHECK-BE-NEXT: vstr.16 s0, [r0, #6]
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: bx lr
entry:
%c = fcmp ogt <4 x float> %a, zeroinitializer
%trunc = fptrunc <4 x float> %a to <4 x half>
call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %trunc, <4 x half>* %dest, i32 2, <4 x i1> %c)
ret void
}
define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> %a) {
; CHECK-LE-LABEL: masked_v4f16_align1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #20
; CHECK-LE-NEXT: sub sp, #20
; CHECK-LE-NEXT: vcmp.f32 s0, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r1, #1
; CHECK-LE-NEXT: cmp r1, #0
; CHECK-LE-NEXT: vcmp.f32 s1, #0
; CHECK-LE-NEXT: cset r1, ne
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: and r1, r1, #1
; CHECK-LE-NEXT: vcmp.f32 s2, #0
; CHECK-LE-NEXT: rsb.w r3, r1, #0
; CHECK-LE-NEXT: mov.w r1, #0
; CHECK-LE-NEXT: bfi r1, r3, #0, #1
; CHECK-LE-NEXT: mov.w r3, #0
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r3, #1
; CHECK-LE-NEXT: cmp r3, #0
; CHECK-LE-NEXT: cset r3, ne
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: and r3, r3, #1
; CHECK-LE-NEXT: vcmp.f32 s3, #0
; CHECK-LE-NEXT: rsb.w r3, r3, #0
; CHECK-LE-NEXT: mov.w r2, #0
; CHECK-LE-NEXT: bfi r1, r3, #1, #1
; CHECK-LE-NEXT: mov.w r3, #0
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r3, #1
; CHECK-LE-NEXT: cmp r3, #0
; CHECK-LE-NEXT: cset r3, ne
; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-LE-NEXT: it gt
; CHECK-LE-NEXT: movgt r2, #1
; CHECK-LE-NEXT: cmp r2, #0
; CHECK-LE-NEXT: and r3, r3, #1
; CHECK-LE-NEXT: cset r2, ne
; CHECK-LE-NEXT: and r2, r2, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
; CHECK-LE-NEXT: bfi r1, r3, #2, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1
; CHECK-LE-NEXT: bfi r1, r2, #3, #1
; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2
; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3
; CHECK-LE-NEXT: lsls r2, r1, #31
; CHECK-LE-NEXT: bne .LBB27_5
; CHECK-LE-NEXT: @ %bb.1: @ %else
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: bmi .LBB27_6
; CHECK-LE-NEXT: .LBB27_2: @ %else2
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: bmi .LBB27_7
; CHECK-LE-NEXT: .LBB27_3: @ %else4
; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: bmi .LBB27_8
; CHECK-LE-NEXT: .LBB27_4: @ %else6
; CHECK-LE-NEXT: add sp, #20
; CHECK-LE-NEXT: bx lr
; CHECK-LE-NEXT: .LBB27_5: @ %cond.store
; CHECK-LE-NEXT: vstr.16 s4, [sp, #12]
; CHECK-LE-NEXT: ldrh.w r2, [sp, #12]
; CHECK-LE-NEXT: strh r2, [r0]
; CHECK-LE-NEXT: lsls r2, r1, #30
; CHECK-LE-NEXT: bpl .LBB27_2
; CHECK-LE-NEXT: .LBB27_6: @ %cond.store1
; CHECK-LE-NEXT: vmovx.f16 s0, s4
; CHECK-LE-NEXT: vstr.16 s0, [sp, #8]
; CHECK-LE-NEXT: ldrh.w r2, [sp, #8]
; CHECK-LE-NEXT: strh r2, [r0, #2]
; CHECK-LE-NEXT: lsls r2, r1, #29
; CHECK-LE-NEXT: bpl .LBB27_3
; CHECK-LE-NEXT: .LBB27_7: @ %cond.store3
; CHECK-LE-NEXT: vstr.16 s5, [sp, #4]
; CHECK-LE-NEXT: ldrh.w r2, [sp, #4]
; CHECK-LE-NEXT: strh r2, [r0, #4]
; CHECK-LE-NEXT: lsls r1, r1, #28
; CHECK-LE-NEXT: bpl .LBB27_4
; CHECK-LE-NEXT: .LBB27_8: @ %cond.store5
; CHECK-LE-NEXT: vmovx.f16 s0, s5
; CHECK-LE-NEXT: vstr.16 s0, [sp]
; CHECK-LE-NEXT: ldrh.w r1, [sp]
; CHECK-LE-NEXT: strh r1, [r0, #6]
; CHECK-LE-NEXT: add sp, #20
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: masked_v4f16_align1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .pad #20
; CHECK-BE-NEXT: sub sp, #20
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vcmp.f32 s4, #0
; CHECK-BE-NEXT: movs r2, #0
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r1, #1
; CHECK-BE-NEXT: cmp r1, #0
; CHECK-BE-NEXT: vcmp.f32 s5, #0
; CHECK-BE-NEXT: cset r1, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: and r1, r1, #1
; CHECK-BE-NEXT: vcmp.f32 s6, #0
; CHECK-BE-NEXT: rsb.w r3, r1, #0
; CHECK-BE-NEXT: mov.w r1, #0
; CHECK-BE-NEXT: bfi r1, r3, #0, #1
; CHECK-BE-NEXT: mov.w r3, #0
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r3, #1
; CHECK-BE-NEXT: cmp r3, #0
; CHECK-BE-NEXT: cset r3, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: and r3, r3, #1
; CHECK-BE-NEXT: vcmp.f32 s7, #0
; CHECK-BE-NEXT: rsb.w r3, r3, #0
; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
; CHECK-BE-NEXT: bfi r1, r3, #1, #1
; CHECK-BE-NEXT: mov.w r3, #0
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r3, #1
; CHECK-BE-NEXT: cmp r3, #0
; CHECK-BE-NEXT: cset r3, ne
; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-BE-NEXT: it gt
; CHECK-BE-NEXT: movgt r2, #1
; CHECK-BE-NEXT: cmp r2, #0
; CHECK-BE-NEXT: and r3, r3, #1
; CHECK-BE-NEXT: cset r2, ne
; CHECK-BE-NEXT: and r2, r2, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5
; CHECK-BE-NEXT: bfi r1, r2, #3, #1
; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6
; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7
; CHECK-BE-NEXT: lsls r2, r1, #31
; CHECK-BE-NEXT: bne .LBB27_5
; CHECK-BE-NEXT: @ %bb.1: @ %else
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: bmi .LBB27_6
; CHECK-BE-NEXT: .LBB27_2: @ %else2
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: bmi .LBB27_7
; CHECK-BE-NEXT: .LBB27_3: @ %else4
; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: bmi .LBB27_8
; CHECK-BE-NEXT: .LBB27_4: @ %else6
; CHECK-BE-NEXT: add sp, #20
; CHECK-BE-NEXT: bx lr
; CHECK-BE-NEXT: .LBB27_5: @ %cond.store
; CHECK-BE-NEXT: vstr.16 s0, [sp, #12]
; CHECK-BE-NEXT: ldrh.w r2, [sp, #12]
; CHECK-BE-NEXT: strh r2, [r0]
; CHECK-BE-NEXT: lsls r2, r1, #30
; CHECK-BE-NEXT: bpl .LBB27_2
; CHECK-BE-NEXT: .LBB27_6: @ %cond.store1
; CHECK-BE-NEXT: vmovx.f16 s4, s0
; CHECK-BE-NEXT: vstr.16 s4, [sp, #8]
; CHECK-BE-NEXT: ldrh.w r2, [sp, #8]
; CHECK-BE-NEXT: strh r2, [r0, #2]
; CHECK-BE-NEXT: lsls r2, r1, #29
; CHECK-BE-NEXT: bpl .LBB27_3
; CHECK-BE-NEXT: .LBB27_7: @ %cond.store3
; CHECK-BE-NEXT: vstr.16 s1, [sp, #4]
; CHECK-BE-NEXT: ldrh.w r2, [sp, #4]
; CHECK-BE-NEXT: strh r2, [r0, #4]
; CHECK-BE-NEXT: lsls r1, r1, #28
; CHECK-BE-NEXT: bpl .LBB27_4
; CHECK-BE-NEXT: .LBB27_8: @ %cond.store5
; CHECK-BE-NEXT: vmovx.f16 s0, s1
; CHECK-BE-NEXT: vstr.16 s0, [sp]
; CHECK-BE-NEXT: ldrh.w r1, [sp]
; CHECK-BE-NEXT: strh r1, [r0, #6]
; CHECK-BE-NEXT: add sp, #20
; CHECK-BE-NEXT: bx lr
entry:
%c = fcmp ogt <4 x float> %a, zeroinitializer
%trunc = fptrunc <4 x float> %a to <4 x half>
call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %trunc, <4 x half>* %dest, i32 1, <4 x i1> %c)
ret void
}
declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>)
declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
declare void @llvm.masked.store.v4f16.p0v4f16(<4 x half>, <4 x half>*, i32, <4 x i1>)
declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>)
declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)