From 59062585b047f2f6ff07fcc15d3064deba600b0f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 19 Jan 2019 21:26:20 +0000 Subject: [PATCH] [X86] Add masked MCVTSI2P/MCVTUI2P ISD opcodes to model the cvtqq2ps cvtuqq2ps nodes that produce less than 128-bits of results. These nodes zero the upper half of the result and can't be represented with vselect. llvm-svn: 351666 --- lib/Target/X86/X86ISelLowering.cpp | 5 +- lib/Target/X86/X86ISelLowering.h | 1 + lib/Target/X86/X86InstrAVX512.td | 71 ++++++++++++++++++++++--- lib/Target/X86/X86InstrFragmentsSIMD.td | 10 ++++ lib/Target/X86/X86IntrinsicsInfo.h | 18 +++---- 5 files changed, 88 insertions(+), 17 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index efa4cdb8cf9..b4b86a91c85 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -22118,7 +22118,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getMergeValues(Results, dl); } case CVTPD2PS_MASK: - case CVTPD2I_MASK: + case CVTPD2DQ_MASK: + case CVTQQ2PS_MASK: case TRUNCATE_TO_REG: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); @@ -27464,6 +27465,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTTS2UI_RND: return "X86ISD::CVTTS2UI_RND"; case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P"; case X86ISD::CVTUI2P: return "X86ISD::CVTUI2P"; + case X86ISD::MCVTSI2P: return "X86ISD::MCVTSI2P"; + case X86ISD::MCVTUI2P: return "X86ISD::MCVTUI2P"; case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS"; case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS"; case X86ISD::MULTISHIFT: return "X86ISD::MULTISHIFT"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3789b74ca89..60bc2765e84 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -515,6 +515,7 @@ namespace llvm { // Masked versions of above. Used for v2f64->v4f32. // SRC, PASSTHRU, MASK MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI, + MCVTSI2P, MCVTUI2P, // Save xmm argument registers to the stack, according to %al. An operator // is needed so that this can be expanded with control flow. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 2239951c407..bc8475cd87b 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -8383,8 +8383,7 @@ multiclass avx512_cvttps2qq opc, string OpcodeStr, SDNode OpNode, // Convert Signed/Unsigned Quardword to Float multiclass avx512_cvtqq2ps opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, SDNode OpNodeRnd, - X86SchedWriteWidths sched> { + SDNode OpNodeRnd, X86SchedWriteWidths sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp, @@ -8396,9 +8395,9 @@ multiclass avx512_cvtqq2ps opc, string OpcodeStr, SDNode OpNode, // memory forms of these instructions in Asm Parcer. They have the same // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. - defm Z128 : avx512_vcvt_fp, EVEX_V128, - NotEVEX2VEXConvertible; + defm Z128 : avx512_vcvt_fp, + EVEX_V128, NotEVEX2VEXConvertible; defm Z256 : avx512_vcvt_fp, EVEX_V256, NotEVEX2VEXConvertible; @@ -8501,11 +8500,11 @@ defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; -defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP, +defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS, EVEX_CD8<64, CD8VF>; -defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP, +defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD, EVEX_CD8<64, CD8VF>; @@ -8815,6 +8814,64 @@ let Predicates = [HasDQI, HasVLX] in { def : Pat<(X86vzmovl (v2f64 (bitconvert (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))), (VCVTUQQ2PSZ128rr VR128X:$src)>; + + // Special patterns to allow use of X86VMSintToFP for masking. Instruction + // patterns have been disabled with null_frag. + def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))), + (VCVTQQ2PSZ128rr VR128X:$src)>; + def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), + VK2WM:$mask), + (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; + def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; + + def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))), + (VCVTQQ2PSZ128rm addr:$src)>; + def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), + VK2WM:$mask), + (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; + + def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))), + (VCVTQQ2PSZ128rmb addr:$src)>; + def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + (v4f32 VR128X:$src0), VK2WM:$mask), + (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + v4f32x_info.ImmAllZerosV, VK2WM:$mask), + (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; + + // Special patterns to allow use of X86VMUintToFP for masking. Instruction + // patterns have been disabled with null_frag. + def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))), + (VCVTUQQ2PSZ128rr VR128X:$src)>; + def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), + VK2WM:$mask), + (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; + def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; + + def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))), + (VCVTUQQ2PSZ128rm addr:$src)>; + def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), + VK2WM:$mask), + (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV, + VK2WM:$mask), + (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; + + def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))), + (VCVTUQQ2PSZ128rmb addr:$src)>; + def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + (v4f32 VR128X:$src0), VK2WM:$mask), + (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; + def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + v4f32x_info.ImmAllZerosV, VK2WM:$mask), + (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; } let Predicates = [HasDQI, NoVLX] in { diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ef90e53df64..03baf8054de 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -597,6 +597,13 @@ def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>; def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>; +// Masked versions of above +def SDTMVintToFP: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<0>, SDTCisInt<1>, + SDTCisSameSizeAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<1, 3>]>; def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisFP<1>, SDTCisSameSizeAs<0, 1>, @@ -604,6 +611,9 @@ def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<1, 3>]>; +def X86VMSintToFP : SDNode<"X86ISD::MCVTSI2P", SDTMVintToFP>; +def X86VMUintToFP : SDNode<"X86ISD::MCVTUI2P", SDTMVintToFP>; + def X86mcvtp2Int : SDNode<"X86ISD::MCVTP2SI", SDTMFloatToInt>; def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>; def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index e33e66e2ded..01a56a41396 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -30,7 +30,7 @@ enum IntrinsicType : uint16_t { IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK, COMPRESS_EXPAND_IN_REG, - TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2I_MASK, + TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2DQ_MASK, CVTQQ2PS_MASK, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, FIXUPIMMS_MASKZ, GATHER_AVX2, @@ -509,7 +509,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CONFLICT, 0), X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), //er - X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2I_MASK, + X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2DQ_MASK, X86ISD::CVTP2SI, X86ISD::MCVTP2SI), X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND), @@ -523,7 +523,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, CVTPD2I_MASK, + X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, CVTPD2DQ_MASK, X86ISD::CVTP2UI, X86ISD::MCVTP2UI), X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK, X86ISD::CVTP2UI, 0), @@ -563,8 +563,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND), X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_512, INTR_TYPE_1OP_MASK, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, INTR_TYPE_1OP_MASK, - X86ISD::CVTSI2P, 0), + X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, CVTQQ2PS_MASK, + X86ISD::CVTSI2P, X86ISD::MCVTSI2P), X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_256, INTR_TYPE_1OP_MASK, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK, @@ -573,7 +573,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VFPROUNDS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VFPEXTS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2I_MASK, + X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2DQ_MASK, X86ISD::CVTTP2SI, X86ISD::MCVTTP2SI), X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), @@ -583,7 +583,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2I_MASK, + X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2DQ_MASK, X86ISD::CVTTP2UI, X86ISD::MCVTTP2UI), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK, X86ISD::CVTTP2UI, 0), @@ -619,8 +619,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK, ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, INTR_TYPE_1OP_MASK, - X86ISD::CVTUI2P, 0), + X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, CVTQQ2PS_MASK, + X86ISD::CVTUI2P, X86ISD::MCVTUI2P), X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_256, INTR_TYPE_1OP_MASK, ISD::UINT_TO_FP, 0), X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK,