1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-vcmpfr.ll
David Green 2653cea642 [ARM] Fold VCMP into VPT
MVE has VPT instructions, which perform the duties of both a VCMP and a VPST in
a single instruction, performing the compare and starting the VPT block in one.
This teaches the MVEVPTBlockPass to fold them, searching back through the
basicblock for a valid VCMP and creating the VPT from its operands.

There are some changes to the VPT instructions to accommodate this, altering
the order of the operands to match the VCMP better, and changing P0 register
defs to be VPR defs, as is used in other places.

Differential Revision: https://reviews.llvm.org/D66577

llvm-svn: 371982
2019-09-16 13:02:41 +00:00

2628 lines
94 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_oeq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_oeq_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 eq, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp oeq <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_one_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r1, #1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r2, #1
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r3, #1
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_one_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vdup.32 q1, r0
; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, r0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp one <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ogt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmpe.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmpe.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmpe.f32 s2, s4
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmpe.f32 s3, s4
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ogt_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp ogt <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_oge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmpe.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmpe.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmpe.f32 s2, s4
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmpe.f32 s3, s4
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_oge_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp oge <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_olt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmpe.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmpe.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmpe.f32 s2, s4
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmpe.f32 s3, s4
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_olt_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vdup.32 q1, r0
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp olt <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ole_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmpe.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmpe.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmpe.f32 s2, s4
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmpe.f32 s3, s4
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ole_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vdup.32 q1, r0
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp ole <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ueq_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r2, #1
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r3, #1
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ueq_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vdup.32 q1, r0
; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp ueq <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_une_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmp.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmp.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmp.f32 s2, s4
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmp.f32 s3, s4
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_une_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 ne, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp une <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ugt_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmpe.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmpe.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmpe.f32 s2, s4
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmpe.f32 s3, s4
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vdup.32 q1, r0
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp ugt <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_uge_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmpe.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmpe.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmpe.f32 s2, s4
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmpe.f32 s3, s4
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_uge_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vdup.32 q1, r0
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp uge <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ult_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmpe.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmpe.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmpe.f32 s2, s4
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmpe.f32 s3, s4
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ult_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp ult <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ule_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmpe.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmpe.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmpe.f32 s2, s4
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmpe.f32 s3, s4
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ule_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp ule <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_ord_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmpe.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmpe.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmpe.f32 s2, s4
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmpe.f32 s3, s4
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ord_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vdup.32 q1, r0
; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, r0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp ord <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
; CHECK-MVE-LABEL: vcmp_uno_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcmpe.f32 s0, s4
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmpe.f32 s1, s4
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vcmpe.f32 s2, s4
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r3, #0
; CHECK-MVE-NEXT: vcmpe.f32 s3, s4
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r3, #1
; CHECK-MVE-NEXT: cmp r3, #0
; CHECK-MVE-NEXT: cset r3, ne
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11
; CHECK-MVE-NEXT: lsls r0, r3, #31
; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10
; CHECK-MVE-NEXT: lsls r0, r2, #31
; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9
; CHECK-MVE-NEXT: lsls r0, r1, #31
; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_uno_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vdup.32 q1, r0
; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%c = fcmp uno <4 x float> %src, %sp
%s = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_oeq_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmp.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmp.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmp.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmp.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp oeq <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_one_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmp.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r2, #1
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vcmp.f16 s1, s16
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vcmp.f16 s2, s16
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vcmp.f16 s3, s16
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_one_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vdup.16 q3, r0
; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0
; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, r0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp one <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_ogt_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmpe.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmpe.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it gt
; CHECK-MVE-NEXT: movgt r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ogt_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp ogt <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_oge_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmpe.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmpe.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ge
; CHECK-MVE-NEXT: movge r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_oge_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp oge <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_olt_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmpe.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmpe.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it mi
; CHECK-MVE-NEXT: movmi r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_olt_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vdup.16 q3, r0
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp olt <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_ole_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmpe.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmpe.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ls
; CHECK-MVE-NEXT: movls r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ole_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vdup.16 q3, r0
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q3, q0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp ole <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_ueq_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_ueq_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmp.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: mov.w r2, #0
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r2, #1
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vcmp.f16 s1, s16
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vcmp.f16 s2, s16
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vcmp.f16 s3, s16
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vdup.16 q3, r0
; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0
; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp ueq <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_une_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmp.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmp.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmp.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmp.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it ne
; CHECK-MVE-NEXT: movne r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_une_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vcmp.f16 ne, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp une <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_ugt_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmpe.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmpe.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it hi
; CHECK-MVE-NEXT: movhi r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vdup.16 q3, r0
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q3, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp ugt <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_uge_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmpe.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmpe.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it pl
; CHECK-MVE-NEXT: movpl r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_uge_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vdup.16 q3, r0
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp uge <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_ult_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmpe.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmpe.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it lt
; CHECK-MVE-NEXT: movlt r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ult_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp ult <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_ule_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmpe.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmpe.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it le
; CHECK-MVE-NEXT: movle r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ule_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp ule <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_ord_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmpe.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmpe.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vc
; CHECK-MVE-NEXT: movvc r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ord_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vdup.16 q3, r0
; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0
; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, r0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp ord <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_uno_v8f16:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: vldr.16 s16, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: vcmpe.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmpe.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmpe.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vcmpe.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: vcmpe.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it vs
; CHECK-MVE-NEXT: movvs r1, #1
; CHECK-MVE-NEXT: vmov r0, s18
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_uno_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vldr.16 s12, [r0]
; CHECK-MVEFP-NEXT: vmov r0, s12
; CHECK-MVEFP-NEXT: vdup.16 q3, r0
; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0
; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
%i = insertelement <8 x half> undef, half %src2, i32 0
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%c = fcmp uno <8 x half> %src, %sp
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
ret <8 x half> %s
}