1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[ARM] Better patterns for fp <> predicate vectors

These are some better patterns for converting between predicates and floating
points. Much like the extends, we select "1"/"-1" or "0" depending on the
predicate value. Or we perform a compare against 0 to convert to a predicate.

Differential Revision: https://reviews.llvm.org/D65103

llvm-svn: 367191
This commit is contained in:
David Green 2019-07-28 13:53:39 +00:00
parent be00ed3113
commit d3183024b0
3 changed files with 93 additions and 68 deletions

View File

@ -349,10 +349,6 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
}
}

View File

@ -4561,6 +4561,7 @@ let Predicates = [HasMVEInt] in {
(v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
(MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
// Pred <-> Int
def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
@ -4583,6 +4584,31 @@ let Predicates = [HasMVEInt] in {
(v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
}
let Predicates = [HasMVEFloat] in {
// Pred <-> Float
// 112 is 1.0 in float
def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
(v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
// 2620 in 1.0 in half
def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
(v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
// 240 is -1.0 in float
def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
(v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
// 2748 is -1.0 in half
def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
(v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))),
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))),
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
}
def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary,
"vpnot", "", "", vpred_n, "", []> {
let Inst{31-0} = 0b11111110001100010000111101001101;

View File

@ -166,20 +166,10 @@ entry:
define arm_aapcs_vfpcc <4 x float> @uitofp_v4i1_v4f32(<4 x i32> %src) {
; CHECK-LABEL: uitofp_v4i1_v4f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: ubfx r1, r0, #8, #1
; CHECK-NEXT: ubfx r2, r0, #12, #1
; CHECK-NEXT: vmov s0, r2
; CHECK-NEXT: vmov s4, r1
; CHECK-NEXT: vcvt.f32.u32 s3, s0
; CHECK-NEXT: ubfx r2, r0, #4, #1
; CHECK-NEXT: vcvt.f32.u32 s2, s4
; CHECK-NEXT: and r0, r0, #1
; CHECK-NEXT: vmov s4, r2
; CHECK-NEXT: vcvt.f32.u32 s1, s4
; CHECK-NEXT: vmov s4, r0
; CHECK-NEXT: vcvt.f32.u32 s0, s4
; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %src, zeroinitializer
@ -190,24 +180,10 @@ entry:
define arm_aapcs_vfpcc <4 x float> @sitofp_v4i1_v4f32(<4 x i32> %src) {
; CHECK-LABEL: sitofp_v4i1_v4f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.f32 q2, #-1.000000e+00
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: and r1, r0, #1
; CHECK-NEXT: ubfx r2, r0, #8, #1
; CHECK-NEXT: ubfx r3, r0, #4, #1
; CHECK-NEXT: ubfx r0, r0, #12, #1
; CHECK-NEXT: rsbs r2, r2, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: vmov s4, r2
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: rsbs r0, r3, #0
; CHECK-NEXT: vcvt.f32.s32 s3, s0
; CHECK-NEXT: vcvt.f32.s32 s2, s4
; CHECK-NEXT: vmov s4, r0
; CHECK-NEXT: rsbs r0, r1, #0
; CHECK-NEXT: vcvt.f32.s32 s1, s4
; CHECK-NEXT: vmov s4, r0
; CHECK-NEXT: vcvt.f32.s32 s0, s4
; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %src, zeroinitializer
@ -218,26 +194,9 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fptoui_v4i1_v4f32(<4 x float> %src) {
; CHECK-LABEL: fptoui_v4i1_v4f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f32 s4, s0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: vcvt.s32.f32 s4, s1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r0, r1, #0, #4
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: vcvt.s32.f32 s4, s2
; CHECK-NEXT: vcvt.s32.f32 s0, s3
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r0, r1, #4, #4
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r0, r1, #8, #4
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r0, r1, #12, #4
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
; CHECK-NEXT: vcmp.f32 ne, q0, zr
; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
@ -249,22 +208,9 @@ entry:
define arm_aapcs_vfpcc <4 x float> @fptosi_v4i1_v4f32(<4 x float> %src) {
; CHECK-LABEL: fptosi_v4i1_v4f32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f32 s4, s0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: vcvt.s32.f32 s4, s1
; CHECK-NEXT: bfi r0, r1, #0, #4
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: vcvt.s32.f32 s4, s2
; CHECK-NEXT: bfi r0, r1, #4, #4
; CHECK-NEXT: vcvt.s32.f32 s0, s3
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: bfi r0, r1, #8, #4
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: bfi r0, r1, #12, #4
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
; CHECK-NEXT: vcmp.f32 ne, q0, zr
; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
@ -273,3 +219,60 @@ entry:
ret <4 x float> %s
}
define arm_aapcs_vfpcc <8 x half> @uitofp_v8i1_v8f16(<8 x i16> %src) {
; CHECK-LABEL: uitofp_v8i1_v8f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q1, #0x0
; CHECK-NEXT: vmov.i16 q2, #0x3c00
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <8 x i16> %src, zeroinitializer
%0 = uitofp <8 x i1> %c to <8 x half>
ret <8 x half> %0
}
define arm_aapcs_vfpcc <8 x half> @sitofp_v8i1_v8f16(<8 x i16> %src) {
; CHECK-LABEL: sitofp_v8i1_v8f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q1, #0x0
; CHECK-NEXT: vmov.i16 q2, #0xbc00
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <8 x i16> %src, zeroinitializer
%0 = sitofp <8 x i1> %c to <8 x half>
ret <8 x half> %0
}
define arm_aapcs_vfpcc <8 x half> @fptoui_v8i1_v8f16(<8 x half> %src) {
; CHECK-LABEL: fptoui_v8i1_v8f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i16 q2, #0x3c00
; CHECK-NEXT: vcmp.f16 ne, q0, zr
; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
%0 = fptoui <8 x half> %src to <8 x i1>
%s = select <8 x i1> %0, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, <8 x half> zeroinitializer
ret <8 x half> %s
}
define arm_aapcs_vfpcc <8 x half> @fptosi_v8i1_v8f16(<8 x half> %src) {
; CHECK-LABEL: fptosi_v8i1_v8f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i16 q2, #0x3c00
; CHECK-NEXT: vcmp.f16 ne, q0, zr
; CHECK-NEXT: vpsel q0, q2, q1
; CHECK-NEXT: bx lr
entry:
%0 = fptosi <8 x half> %src to <8 x i1>
%s = select <8 x i1> %0, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, <8 x half> zeroinitializer
ret <8 x half> %s
}