mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ARM] Better patterns for fp <> predicate vectors
These are some better patterns for converting between predicates and floating points. Much like the extends, we select "1"/"-1" or "0" depending on the predicate value. Or we perform a compare against 0 to convert to a predicate. Differential Revision: https://reviews.llvm.org/D65103 llvm-svn: 367191
This commit is contained in:
parent
be00ed3113
commit
d3183024b0
@ -349,10 +349,6 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
|
||||
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
|
||||
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
|
||||
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4561,6 +4561,7 @@ let Predicates = [HasMVEInt] in {
|
||||
(v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
|
||||
(MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
|
||||
|
||||
// Pred <-> Int
|
||||
def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
|
||||
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
|
||||
def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
|
||||
@ -4583,6 +4584,31 @@ let Predicates = [HasMVEInt] in {
|
||||
(v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEFloat] in {
|
||||
// Pred <-> Float
|
||||
// 112 is 1.0 in float
|
||||
def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
|
||||
(v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
|
||||
// 2620 in 1.0 in half
|
||||
def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
|
||||
(v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
|
||||
// 240 is -1.0 in float
|
||||
def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
|
||||
(v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
|
||||
// 2748 is -1.0 in half
|
||||
def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
|
||||
(v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
|
||||
|
||||
def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
|
||||
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
|
||||
def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))),
|
||||
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
|
||||
def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))),
|
||||
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
|
||||
def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))),
|
||||
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
|
||||
}
|
||||
|
||||
def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary,
|
||||
"vpnot", "", "", vpred_n, "", []> {
|
||||
let Inst{31-0} = 0b11111110001100010000111101001101;
|
||||
|
@ -166,20 +166,10 @@ entry:
|
||||
define arm_aapcs_vfpcc <4 x float> @uitofp_v4i1_v4f32(<4 x i32> %src) {
|
||||
; CHECK-LABEL: uitofp_v4i1_v4f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
|
||||
; CHECK-NEXT: vcmp.s32 gt, q0, zr
|
||||
; CHECK-NEXT: vmrs r0, p0
|
||||
; CHECK-NEXT: ubfx r1, r0, #8, #1
|
||||
; CHECK-NEXT: ubfx r2, r0, #12, #1
|
||||
; CHECK-NEXT: vmov s0, r2
|
||||
; CHECK-NEXT: vmov s4, r1
|
||||
; CHECK-NEXT: vcvt.f32.u32 s3, s0
|
||||
; CHECK-NEXT: ubfx r2, r0, #4, #1
|
||||
; CHECK-NEXT: vcvt.f32.u32 s2, s4
|
||||
; CHECK-NEXT: and r0, r0, #1
|
||||
; CHECK-NEXT: vmov s4, r2
|
||||
; CHECK-NEXT: vcvt.f32.u32 s1, s4
|
||||
; CHECK-NEXT: vmov s4, r0
|
||||
; CHECK-NEXT: vcvt.f32.u32 s0, s4
|
||||
; CHECK-NEXT: vpsel q0, q2, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp sgt <4 x i32> %src, zeroinitializer
|
||||
@ -190,24 +180,10 @@ entry:
|
||||
define arm_aapcs_vfpcc <4 x float> @sitofp_v4i1_v4f32(<4 x i32> %src) {
|
||||
; CHECK-LABEL: sitofp_v4i1_v4f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vmov.f32 q2, #-1.000000e+00
|
||||
; CHECK-NEXT: vcmp.s32 gt, q0, zr
|
||||
; CHECK-NEXT: vmrs r0, p0
|
||||
; CHECK-NEXT: and r1, r0, #1
|
||||
; CHECK-NEXT: ubfx r2, r0, #8, #1
|
||||
; CHECK-NEXT: ubfx r3, r0, #4, #1
|
||||
; CHECK-NEXT: ubfx r0, r0, #12, #1
|
||||
; CHECK-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-NEXT: rsbs r0, r0, #0
|
||||
; CHECK-NEXT: vmov s4, r2
|
||||
; CHECK-NEXT: vmov s0, r0
|
||||
; CHECK-NEXT: rsbs r0, r3, #0
|
||||
; CHECK-NEXT: vcvt.f32.s32 s3, s0
|
||||
; CHECK-NEXT: vcvt.f32.s32 s2, s4
|
||||
; CHECK-NEXT: vmov s4, r0
|
||||
; CHECK-NEXT: rsbs r0, r1, #0
|
||||
; CHECK-NEXT: vcvt.f32.s32 s1, s4
|
||||
; CHECK-NEXT: vmov s4, r0
|
||||
; CHECK-NEXT: vcvt.f32.s32 s0, s4
|
||||
; CHECK-NEXT: vpsel q0, q2, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp sgt <4 x i32> %src, zeroinitializer
|
||||
@ -218,26 +194,9 @@ entry:
|
||||
define arm_aapcs_vfpcc <4 x float> @fptoui_v4i1_v4f32(<4 x float> %src) {
|
||||
; CHECK-LABEL: fptoui_v4i1_v4f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vcvt.s32.f32 s4, s0
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vcvt.s32.f32 s4, s1
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r0, r1, #0, #4
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vcvt.s32.f32 s4, s2
|
||||
; CHECK-NEXT: vcvt.s32.f32 s0, s3
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r0, r1, #4, #4
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r0, r1, #8, #4
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: rsbs r1, r1, #0
|
||||
; CHECK-NEXT: bfi r0, r1, #12, #4
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
|
||||
; CHECK-NEXT: vcmp.f32 ne, q0, zr
|
||||
; CHECK-NEXT: vpsel q0, q2, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -249,22 +208,9 @@ entry:
|
||||
define arm_aapcs_vfpcc <4 x float> @fptosi_v4i1_v4f32(<4 x float> %src) {
|
||||
; CHECK-LABEL: fptosi_v4i1_v4f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vcvt.s32.f32 s4, s0
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vcvt.s32.f32 s4, s1
|
||||
; CHECK-NEXT: bfi r0, r1, #0, #4
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vcvt.s32.f32 s4, s2
|
||||
; CHECK-NEXT: bfi r0, r1, #4, #4
|
||||
; CHECK-NEXT: vcvt.s32.f32 s0, s3
|
||||
; CHECK-NEXT: vmov r1, s4
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: bfi r0, r1, #8, #4
|
||||
; CHECK-NEXT: vmov r1, s0
|
||||
; CHECK-NEXT: bfi r0, r1, #12, #4
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
|
||||
; CHECK-NEXT: vcmp.f32 ne, q0, zr
|
||||
; CHECK-NEXT: vpsel q0, q2, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -273,3 +219,60 @@ entry:
|
||||
ret <4 x float> %s
|
||||
}
|
||||
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @uitofp_v8i1_v8f16(<8 x i16> %src) {
|
||||
; CHECK-LABEL: uitofp_v8i1_v8f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x0
|
||||
; CHECK-NEXT: vmov.i16 q2, #0x3c00
|
||||
; CHECK-NEXT: vcmp.s16 gt, q0, zr
|
||||
; CHECK-NEXT: vpsel q0, q2, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp sgt <8 x i16> %src, zeroinitializer
|
||||
%0 = uitofp <8 x i1> %c to <8 x half>
|
||||
ret <8 x half> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @sitofp_v8i1_v8f16(<8 x i16> %src) {
|
||||
; CHECK-LABEL: sitofp_v8i1_v8f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x0
|
||||
; CHECK-NEXT: vmov.i16 q2, #0xbc00
|
||||
; CHECK-NEXT: vcmp.s16 gt, q0, zr
|
||||
; CHECK-NEXT: vpsel q0, q2, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c = icmp sgt <8 x i16> %src, zeroinitializer
|
||||
%0 = sitofp <8 x i1> %c to <8 x half>
|
||||
ret <8 x half> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @fptoui_v8i1_v8f16(<8 x half> %src) {
|
||||
; CHECK-LABEL: fptoui_v8i1_v8f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vmov.i16 q2, #0x3c00
|
||||
; CHECK-NEXT: vcmp.f16 ne, q0, zr
|
||||
; CHECK-NEXT: vpsel q0, q2, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = fptoui <8 x half> %src to <8 x i1>
|
||||
%s = select <8 x i1> %0, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, <8 x half> zeroinitializer
|
||||
ret <8 x half> %s
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @fptosi_v8i1_v8f16(<8 x half> %src) {
|
||||
; CHECK-LABEL: fptosi_v8i1_v8f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x0
|
||||
; CHECK-NEXT: vmov.i16 q2, #0x3c00
|
||||
; CHECK-NEXT: vcmp.f16 ne, q0, zr
|
||||
; CHECK-NEXT: vpsel q0, q2, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = fptosi <8 x half> %src to <8 x i1>
|
||||
%s = select <8 x i1> %0, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, <8 x half> zeroinitializer
|
||||
ret <8 x half> %s
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user