mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ARM] MVE floating point compares and selects
Much like integers, this adds MVE floating point compares and select. It requires a lot more buildvector/shuffle code because we may need to expand the compares without mve.fp, and requires support for and/or because of the way we lower llvm condition codes. Some original code by David Sherwood Differential Revision: https://reviews.llvm.org/D65054 llvm-svn: 366909
This commit is contained in:
parent
07500b5f5e
commit
41233e3473
@ -288,6 +288,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
|
||||
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
|
||||
if (HasMVEFP) {
|
||||
setOperationAction(ISD::FMINNUM, VT, Legal);
|
||||
@ -346,6 +347,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
|
||||
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
|
||||
}
|
||||
}
|
||||
@ -5895,6 +5897,11 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
|
||||
if (Op.getValueType().getVectorElementType() != MVT::i1)
|
||||
return SDValue();
|
||||
|
||||
// Make sure we expand floating point setcc to scalar if we do not have
|
||||
// mve.fp, so that we can handle them from there.
|
||||
if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
|
||||
return SDValue();
|
||||
|
||||
CmpVT = VT;
|
||||
}
|
||||
|
||||
@ -5925,7 +5932,12 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
|
||||
switch (SetCCOpcode) {
|
||||
default: llvm_unreachable("Illegal FP comparison");
|
||||
case ISD::SETUNE:
|
||||
case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
|
||||
case ISD::SETNE:
|
||||
if (ST->hasMVEFloatOps()) {
|
||||
Opc = ARMISD::VCNE; break;
|
||||
} else {
|
||||
Invert = true; LLVM_FALLTHROUGH;
|
||||
}
|
||||
case ISD::SETOEQ:
|
||||
case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
|
||||
case ISD::SETOLT:
|
||||
|
@ -3000,6 +3000,20 @@ multiclass unpred_vcmp_r<SDPatternOperator opnode, string suffix, int fc> {
|
||||
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
|
||||
}
|
||||
|
||||
multiclass unpred_vcmpf_z<SDPatternOperator opnode, int fc> {
|
||||
def f16 : Pat<(v8i1 (opnode (v8f16 MQPR:$v1))),
|
||||
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>;
|
||||
def f32 : Pat<(v4i1 (opnode (v4f32 MQPR:$v1))),
|
||||
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
|
||||
}
|
||||
|
||||
multiclass unpred_vcmpf_r<SDPatternOperator opnode, int fc> {
|
||||
def f16 : Pat<(v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
|
||||
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
|
||||
def f32 : Pat<(v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
|
||||
(v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
defm MVE_VCEQZ : unpred_vcmp_z<ARMvceqz, "i", 0>;
|
||||
defm MVE_VCNEZ : unpred_vcmp_z<ARMvcnez, "i", 1>;
|
||||
@ -3016,6 +3030,20 @@ let Predicates = [HasMVEInt] in {
|
||||
defm MVE_VCGEU : unpred_vcmp_r<ARMvcgeu, "u", 2>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEFloat] in {
|
||||
defm MVE_VFCEQZ : unpred_vcmpf_z<ARMvceqz, 0>;
|
||||
defm MVE_VFCNEZ : unpred_vcmpf_z<ARMvcnez, 1>;
|
||||
defm MVE_VFCLEZ : unpred_vcmpf_z<ARMvclez, 13>;
|
||||
defm MVE_VFCGTZ : unpred_vcmpf_z<ARMvcgtz, 12>;
|
||||
defm MVE_VFCLTZ : unpred_vcmpf_z<ARMvcltz, 11>;
|
||||
defm MVE_VFCGEZ : unpred_vcmpf_z<ARMvcgez, 10>;
|
||||
|
||||
defm MVE_VFCGT : unpred_vcmpf_r<ARMvcgt, 12>;
|
||||
defm MVE_VFCGE : unpred_vcmpf_r<ARMvcge, 10>;
|
||||
defm MVE_VFCEQ : unpred_vcmpf_r<ARMvceq, 0>;
|
||||
defm MVE_VFCNE : unpred_vcmpf_r<ARMvcne, 1>;
|
||||
}
|
||||
|
||||
|
||||
// Extra "worst case" and/or/xor partterns, going into and out of GRP
|
||||
multiclass two_predops<SDPatternOperator opnode, Instruction insn> {
|
||||
@ -4457,6 +4485,11 @@ let Predicates = [HasMVEInt] in {
|
||||
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
|
||||
(v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
|
||||
|
||||
def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
|
||||
(v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
|
||||
def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
|
||||
(v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
|
||||
|
||||
def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
|
||||
(v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
|
||||
(MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>;
|
||||
@ -4467,6 +4500,13 @@ let Predicates = [HasMVEInt] in {
|
||||
(v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
|
||||
(MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
|
||||
|
||||
def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
|
||||
(v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
|
||||
(MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
|
||||
def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
|
||||
(v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
|
||||
(MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
|
||||
|
||||
def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
|
||||
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
|
||||
def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
|
||||
|
3441
test/CodeGen/Thumb2/mve-vcmpf.ll
Normal file
3441
test/CodeGen/Thumb2/mve-vcmpf.ll
Normal file
File diff suppressed because it is too large
Load Diff
3225
test/CodeGen/Thumb2/mve-vcmpfz.ll
Normal file
3225
test/CodeGen/Thumb2/mve-vcmpfz.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -37,6 +37,30 @@ entry:
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @vpsel_f16(<8 x i1> *%mask, <8 x half> %src1, <8 x half> %src2) {
|
||||
; CHECK-LABEL: vpsel_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr p0, [r0]
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load <8 x i1>, <8 x i1>* %mask, align 4
|
||||
%1 = select <8 x i1> %0, <8 x half> %src1, <8 x half> %src2
|
||||
ret <8 x half> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @vpsel_f32(<4 x i1> *%mask, <4 x float> %src1, <4 x float> %src2) {
|
||||
; CHECK-LABEL: vpsel_f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr p0, [r0]
|
||||
; CHECK-NEXT: vpsel q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load <4 x i1>, <4 x i1>* %mask, align 4
|
||||
%1 = select <4 x i1> %0, <4 x float> %src1, <4 x float> %src2
|
||||
ret <4 x float> %1
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @foo(<4 x i32> %vec.ind) {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: @ %bb.0:
|
||||
|
Loading…
x
Reference in New Issue
Block a user