From d20252fbdf1d3cdd6209adb5b10ada9152d38b37 Mon Sep 17 00:00:00 2001 From: "Kevin P. Neal" Date: Wed, 28 Aug 2019 16:33:36 +0000 Subject: [PATCH] [FPEnv] Add fptosi and fptoui constrained intrinsics. This implements constrained floating point intrinsics for FP to signed and unsigned integers. Quoting from D32319: The purpose of the constrained intrinsics is to force the optimizer to respect the restrictions that will be necessary to support things like the STDC FENV_ACCESS ON pragma without interfering with optimizations when these restrictions are not needed. Reviewed by: Andrew Kaylor, Craig Topper, Hal Finkel, Cameron McInally, Roman Lebedev, Kit Barton Approved by: Craig Topper Differential Revision: http://reviews.llvm.org/D63782 llvm-svn: 370228 --- docs/LangRef.rst | 66 ++ include/llvm/CodeGen/ISDOpcodes.h | 7 + include/llvm/CodeGen/SelectionDAGNodes.h | 2 + include/llvm/CodeGen/TargetLowering.h | 4 +- include/llvm/IR/IntrinsicInst.h | 2 + include/llvm/IR/Intrinsics.td | 11 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 29 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 20 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorOps.cpp | 12 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 31 + lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 + .../SelectionDAG/SelectionDAGBuilder.cpp | 8 + .../SelectionDAG/SelectionDAGDumper.cpp | 2 + lib/CodeGen/SelectionDAG/TargetLowering.cpp | 48 +- lib/CodeGen/TargetLoweringBase.cpp | 2 + lib/IR/IntrinsicInst.cpp | 2 + lib/IR/Verifier.cpp | 29 + .../PowerPC/fp-intrinsics-fptosi-legal.ll | 19 + test/CodeGen/X86/fp-intrinsics.ll | 36 +- .../X86/vector-constrained-fp-intrinsics.ll | 882 ++++++++++++++++++ test/Feature/fp-intrinsics.ll | 25 + 22 files changed, 1223 insertions(+), 17 deletions(-) create mode 100644 test/CodeGen/PowerPC/fp-intrinsics-fptosi-legal.ll diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 55ba443dddf..39095303361 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -15280,6 +15280,72 @@ The result produced is the product of the first two operands added to the third operand computed with infinite precision, and then rounded to the target precision. +'``llvm.experimental.constrained.fptoui``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fptoui( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fptoui``' intrinsic converts a +floating-point ``value`` to its unsigned integer equivalent of type ``ty2``. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fptoui``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is an unsigned integer converted from the floating +point operand. The value is truncated, so it is rounded towards zero. + +'``llvm.experimental.constrained.fptosi``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fptosi( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fptosi``' intrinsic converts +:ref:`floating-point ` ``value`` to type ``ty2``. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fptosi``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is a signed integer converted from the floating +point operand. The value is truncated, so it is rounded towards zero. + '``llvm.experimental.constrained.fptrunc``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 0d8060bcb8c..b1a72b17e78 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -302,6 +302,13 @@ namespace ISD { STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC, + /// STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or + /// unsigned integer. These have the same semantics as fptosi and fptoui + /// in IR. + /// They are used to limit optimizations while the DAG is being optimized. + STRICT_FP_TO_SINT, + STRICT_FP_TO_UINT, + /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating /// point type down to the precision of the destination VT. TRUNC is a /// flag, which is always an integer that is zero or one. If TRUNC is 0, diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 62764004d77..57fa8f26965 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -709,6 +709,8 @@ public: case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: return true; diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index 87145a7d78f..6b623e881ad 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -960,6 +960,8 @@ public: case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; + case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break; case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; } @@ -3999,7 +4001,7 @@ public: /// \param N Node to expand /// \param Result output after conversion /// \returns True, if the expansion was successful, false otherwise - bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const; /// Expand UINT(i64) to double(f64) conversion /// \param N Node to expand diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h index 438bdb29b70..f415336119f 100644 --- a/include/llvm/IR/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -259,6 +259,8 @@ namespace llvm { case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index acf3489ab41..1a67f830435 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -622,6 +622,14 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_fptosi : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_fptoui : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty, @@ -712,7 +720,8 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { llvm_metadata_ty, llvm_metadata_ty ]>; } -// FIXME: Add intrinsics for fcmp, fptoui and fptosi. +// FIXME: Add intrinsic for fcmp. +// FIXME: Consider maybe adding intrinsics for sitofp, uitofp. //===------------------------- Expect Intrinsics --------------------------===// // diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e1775d5cb53..7ceab40c1a5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -236,6 +236,16 @@ public: } ReplacedNode(Old); } + + void ReplaceNodeWithValue(SDValue Old, SDValue New) { + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + DAG.ReplaceAllUsesOfValueWith(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); + ReplacedNode(Old.getNode()); + } }; } // end anonymous namespace @@ -2880,10 +2890,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; + case ISD::STRICT_FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) { + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n"); + return true; + } + break; case ISD::FP_TO_UINT: - if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG)) + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) Results.push_back(Tmp1); break; + case ISD::STRICT_FP_TO_UINT: + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) { + // Relink the chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2); + // Replace the new UINT result. + ReplaceNodeWithValue(SDValue(Node, 0), Tmp1); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n"); + return true; + } + break; case ISD::LROUND: Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, RTLIB::LROUND_F64, RTLIB::LROUND_F80, diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5a2d335ca1e..cb8ab110142 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -112,6 +112,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; @@ -494,7 +496,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; - SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + if (N->getOpcode() == ISD::STRICT_FP_TO_UINT && + !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) + NewOpc = ISD::STRICT_FP_TO_SINT; + + SDValue Res; + if (N->isStrictFPOpcode()) { + Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other }, + { N->getOperand(0), N->getOperand(1) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + } else + Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the @@ -503,7 +518,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example: // before legalization: fp-to-uint16, 65534. -> 0xfffe // after legalization: fp-to-sint32, 65534. -> 0x0000fffe - return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || + N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1d0f8d07797..4fd04229fc4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -715,6 +715,7 @@ private: bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); SDValue ScalarizeVecOp_UnaryOp(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index a892fccc8a1..4b8656805f3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -333,6 +333,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -862,6 +864,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: return ExpandStrictFPOp(Op); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -1186,9 +1190,13 @@ SDValue VectorLegalizer::ExpandABS(SDValue Op) { SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { // Attempt to expand using TargetLowering. - SDValue Result; - if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG)) + SDValue Result, Chain; + if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) { + if (Op.getNode()->isStrictFPOpcode()) + // Relink the chain + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain); return Result; + } // Otherwise go ahead and unroll. return DAG.UnrollVectorOp(Op.getNode()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1cfa68a232d..24f7f7edd1f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -171,6 +171,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_EXTEND: R = ScalarizeVecRes_StrictFPOp(N); break; @@ -604,6 +606,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + Res = ScalarizeVecOp_UnaryOp_StrictFP(N); + break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; @@ -679,6 +685,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op); } +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>. +/// Do the strict FP operation on the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), + { N->getValueType(0).getScalarType(), MVT::Other }, + { N->getOperand(0), Elt }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector Ops(N->getNumOperands()); @@ -883,7 +906,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: @@ -1987,6 +2012,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: @@ -2814,6 +2841,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FP_EXTEND: case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: Res = WidenVecRes_Convert_StrictFP(N); break; @@ -4129,7 +4158,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index fe3996bdb7d..7a9ef2b327e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7778,6 +7778,8 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; + case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; } assert(Node->getNumValues() == 2 && "Unexpected number of results!"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index cf438691308..105fb42de61 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6106,6 +6106,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -6899,6 +6901,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_fma: Opcode = ISD::STRICT_FMA; break; + case Intrinsic::experimental_constrained_fptosi: + Opcode = ISD::STRICT_FP_TO_SINT; + break; + case Intrinsic::experimental_constrained_fptoui: + Opcode = ISD::STRICT_FP_TO_UINT; + break; case Intrinsic::experimental_constrained_fptrunc: Opcode = ISD::STRICT_FP_ROUND; break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index da3049881d3..ff2e34d5255 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -325,7 +325,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SINT_TO_FP: return "sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6fd90ef3301..8c322df862d 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5597,7 +5597,8 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); SDLoc dl(SDValue(Node, 0)); @@ -5606,6 +5607,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, if (SrcVT != MVT::f32 || DstVT != MVT::i64) return false; + if (Node->isStrictFPOpcode()) + // When a NaN is converted to an integer a trap is allowed. We can't + // use this expansion here because it would eliminate that trap. Other + // traps are also allowed and cannot be eliminated. See + // IEEE 754-2008 sec 5.8. + return false; + // Expand f32 -> i64 conversion // This algorithm comes from compiler-rt's implementation of fixsfdi: // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c @@ -5659,9 +5667,11 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, } bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, + SDValue &Chain, SelectionDAG &DAG) const { SDLoc dl(SDValue(Node, 0)); - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -5669,7 +5679,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); // Only expand vector types if we have the appropriate vector bit operations. - if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) || + unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : + ISD::FP_TO_SINT; + if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) || !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT))) return false; @@ -5681,14 +5693,21 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); if (APFloat::opOverflow & APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { - Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + if (Node->isStrictFPOpcode()) { + Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), Src }); + Chain = Result.getValue(1); + } else + Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); return true; } SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); - bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + bool Strict = Node->isStrictFPOpcode() || + shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + if (Strict) { // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). @@ -5698,12 +5717,23 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // Result = fp_to_sint(Val) ^ Ofs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, - DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + SDValue SrcBiased; + if (Node->isStrictFPOpcode()) + SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Node->getOperand(0), Src, Cst }); + else + SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); + SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), DAG.getConstant(SignMask, dl, DstVT)); - Result = DAG.getNode(ISD::XOR, dl, DstVT, - DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs); + SDValue SInt; + if (Node->isStrictFPOpcode()) { + SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { SrcBiased.getValue(1), Val }); + Chain = SInt.getValue(1); + } else + SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 40ecef5855d..dcedca7e9fb 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -716,6 +716,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::STRICT_FMINNUM, VT, Expand); setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand); setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand); + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand); + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand); // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp index b2471128007..a426c9b4df1 100644 --- a/lib/IR/IntrinsicInst.cpp +++ b/lib/IR/IntrinsicInst.cpp @@ -189,6 +189,8 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const { switch (getIntrinsicID()) { default: return false; + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 0350edb2454..59e404b7953 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -4282,6 +4282,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -4773,6 +4775,33 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { HasRoundingMD = true; break; + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: { + Assert((NumOperands == 2), + "invalid arguments for constrained FP intrinsic", &FPI); + HasExceptionMD = true; + + Value *Operand = FPI.getArgOperand(0); + uint64_t NumSrcElem = 0; + Assert(Operand->getType()->isFPOrFPVectorTy(), + "Intrinsic first argument must be floating point", &FPI); + if (auto *OperandT = dyn_cast(Operand->getType())) { + NumSrcElem = OperandT->getNumElements(); + } + + Operand = &FPI; + Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(), + "Intrinsic first argument and result disagree on vector use", &FPI); + Assert(Operand->getType()->isIntOrIntVectorTy(), + "Intrinsic result must be an integer", &FPI); + if (auto *OperandT = dyn_cast(Operand->getType())) { + Assert(NumSrcElem == OperandT->getNumElements(), + "Intrinsic first argument and result vector lengths must be equal", + &FPI); + } + } + break; + case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: { if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) { diff --git a/test/CodeGen/PowerPC/fp-intrinsics-fptosi-legal.ll b/test/CodeGen/PowerPC/fp-intrinsics-fptosi-legal.ll new file mode 100644 index 00000000000..98f2f36db74 --- /dev/null +++ b/test/CodeGen/PowerPC/fp-intrinsics-fptosi-legal.ll @@ -0,0 +1,19 @@ +; RUN: llc -O3 -mtriple=powerpc-unknown-linux-gnu -mcpu=e500 -mattr=spe < %s | FileCheck %s + +; PowerPC SPE is a rare in-tree target that has the FP_TO_SINT node marked +; as Legal. + +; Verify that fptosi(42.1) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +; CHECK-LABEL: @f20 +; COMMON: cfdctsiz +define i32 @f20(double %a) { +entry: + %result = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double 42.1, + metadata !"fpexcept.strict") + ret i32 %result +} + +@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) diff --git a/test/CodeGen/X86/fp-intrinsics.ll b/test/CodeGen/X86/fp-intrinsics.ll index ffcbdef3da1..cbb1c386a62 100644 --- a/test/CodeGen/X86/fp-intrinsics.ll +++ b/test/CodeGen/X86/fp-intrinsics.ll @@ -286,6 +286,39 @@ entry: ret double %rem } +; Verify that fptoui(%x) isn't simplified when the rounding mode is +; unknown. The expansion should have only one conversion instruction. +; Verify that no gross errors happen. +; CHECK-LABEL: @f20u +; NO-FMA: cmpltsd +; NO-FMA: movapd +; NO-FMA: andpd +; NO-FMA: xorl +; NO-FMA: ucomisd +; NO-FMA: subsd +; NO-FMA: andnpd +; NO-FMA: orpd +; NO-FMA: cvttsd2si +; NO-FMA: setae +; NO-FMA: shll +; NO-FMA: xorl +; +; HAS-FMA: vcmpltsd +; HAS-FMA: vsubsd +; HAS-FMA: vblendvpd +; HAS-FMA: vcvttsd2si +; HAS-FMA: xorl +; HAS-FMA: vucomisd +; HAS-FMA: setae +; HAS-FMA: shll +; HAS-FMA: xorl +define i32 @f20u(double %x) { +entry: + %result = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, + metadata !"fpexcept.strict") + ret i32 %result +} + ; Verify that round(42.1) isn't simplified when the rounding mode is ; unknown. ; Verify that no gross errors happen. @@ -329,6 +362,7 @@ declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadat declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) - diff --git a/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index c0d7252ddb7..19b2b4864ea 100644 --- a/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -3868,6 +3868,856 @@ entry: ret <4 x double> %min } +define <1 x i32> @constrained_vector_fptosi_v1i32_v1f32() { +; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: retq +entry: + %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32( + <1 x float>, + metadata !"fpexcept.strict") + ret <1 x i32> %result +} + +define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32() { +; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32( + <2 x float>, + metadata !"fpexcept.strict") + ret <2 x i32> %result +} + +define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() { +; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32( + <3 x float>, + metadata !"fpexcept.strict") + ret <3 x i32> %result +} + +define <4 x i32> @constrained_vector_fptosi_v4i32_v4f32() { +; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm2 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32( + <4 x float>, + metadata !"fpexcept.strict") + ret <4 x i32> %result +} + +define <1 x i64> @constrained_vector_fptosi_v1i64_v1f32() { +; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: retq +entry: + %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32( + <1 x float>, + metadata !"fpexcept.strict") + ret <1 x i64> %result +} + +define <2 x i64> @constrained_vector_fptosi_v2i64_v2f32() { +; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v2i64_v2f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq +entry: + %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32( + <2 x float>, + metadata !"fpexcept.strict") + ret <2 x i64> %result +} + +define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() { +; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rdx +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rcx +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v3i64_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32( + <3 x float>, + metadata !"fpexcept.strict") + ret <3 x i64> %result +} + +define <4 x i64> @constrained_vector_fptosi_v4i64_v4f32() { +; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm2 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v4i64_v4f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm2 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32( + <4 x float>, + metadata !"fpexcept.strict") + ret <4 x i64> %result +} + +define <1 x i32> @constrained_vector_fptosi_v1i32_v1f64() { +; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: retq +entry: + %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64( + <1 x double>, + metadata !"fpexcept.strict") + ret <1 x i32> %result +} + + +define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64() { +; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64( + <2 x double>, + metadata !"fpexcept.strict") + ret <2 x i32> %result +} + +define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() { +; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64( + <3 x double>, + metadata !"fpexcept.strict") + ret <3 x i32> %result +} + +define <4 x i32> @constrained_vector_fptosi_v4i32_v4f64() { +; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm2 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64( + <4 x double>, + metadata !"fpexcept.strict") + ret <4 x i32> %result +} + +define <1 x i64> @constrained_vector_fptosi_v1i64_v1f64() { +; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: retq +entry: + %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64( + <1 x double>, + metadata !"fpexcept.strict") + ret <1 x i64> %result +} + +define <2 x i64> @constrained_vector_fptosi_v2i64_v2f64() { +; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v2i64_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq +entry: + %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64( + <2 x double>, + metadata !"fpexcept.strict") + ret <2 x i64> %result +} + +define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() { +; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rdx +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rcx +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v3i64_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64( + <3 x double>, + metadata !"fpexcept.strict") + ret <3 x i64> %result +} + +define <4 x i64> @constrained_vector_fptosi_v4i64_v4f64() { +; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm2 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptosi_v4i64_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm2 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64( + <4 x double>, + metadata !"fpexcept.strict") + ret <4 x i64> %result +} + +define <1 x i32> @constrained_vector_fptoui_v1i32_v1f32() { +; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v1i32_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: retq +entry: + %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32( + <1 x float>, + metadata !"fpexcept.strict") + ret <1 x i32> %result +} + +define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32() { +; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v2i32_v2f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32( + <2 x float>, + metadata !"fpexcept.strict") + ret <2 x i32> %result +} + +define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() { +; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v3i32_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32( + <3 x float>, + metadata !"fpexcept.strict") + ret <3 x i32> %result +} + +define <4 x i32> @constrained_vector_fptoui_v4i32_v4f32() { +; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm2 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v4i32_v4f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32( + <4 x float>, + metadata !"fpexcept.strict") + ret <4 x i32> %result +} + +define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() { +; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v1i64_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: retq +entry: + %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32( + <1 x float>, + metadata !"fpexcept.strict") + ret <1 x i64> %result +} + +define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() { +; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v2i64_v2f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq +entry: + %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32( + <2 x float>, + metadata !"fpexcept.strict") + ret <2 x i64> %result +} + +define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() { +; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rdx +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rcx +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v3i64_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32( + <3 x float>, + metadata !"fpexcept.strict") + ret <3 x i64> %result +} + +define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() { +; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm2 +; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v4i64_v4f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm2 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32( + <4 x float>, + metadata !"fpexcept.strict") + ret <4 x i64> %result +} + +define <1 x i32> @constrained_vector_fptoui_v1i32_v1f64() { +; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v1i32_v1f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: retq +entry: + %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64( + <1 x double>, + metadata !"fpexcept.strict") + ret <1 x i32> %result +} + +define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64() { +; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v2i32_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64( + <2 x double>, + metadata !"fpexcept.strict") + ret <2 x i32> %result +} + +define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() { +; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v3i32_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64( + <3 x double>, + metadata !"fpexcept.strict") + ret <3 x i32> %result +} + +define <4 x i32> @constrained_vector_fptoui_v4i32_v4f64() { +; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm2 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v4i32_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64( + <4 x double>, + metadata !"fpexcept.strict") + ret <4 x i32> %result +} + +define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() { +; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v1i64_v1f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: retq +entry: + %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64( + <1 x double>, + metadata !"fpexcept.strict") + ret <1 x i64> %result +} + +define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() { +; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v2i64_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq +entry: + %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64( + <2 x double>, + metadata !"fpexcept.strict") + ret <2 x i64> %result +} + +define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() { +; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rdx +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rcx +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v3i64_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64( + <3 x double>, + metadata !"fpexcept.strict") + ret <3 x i64> %result +} + +define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() { +; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm2 +; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptoui_v4i64_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovq %rax, %xmm2 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64( + <4 x double>, + metadata !"fpexcept.strict") + ret <4 x i64> %result +} + + define <1 x float> @constrained_vector_fptrunc_v1f64() { ; CHECK-LABEL: constrained_vector_fptrunc_v1f64: ; CHECK: # %bb.0: # %entry @@ -4661,6 +5511,14 @@ declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, met declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata) +declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata) +declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata) +declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata) +declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata) +declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata) +declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata) +declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata) declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata) @@ -4688,6 +5546,14 @@ declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metad declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata) +declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(<1 x float>, metadata) +declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(<1 x float>, metadata) +declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double>, metadata) +declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double>, metadata) +declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(<1 x float>, metadata) +declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(<1 x float>, metadata) +declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double>, metadata) +declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double>, metadata) declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata) declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata) declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata) @@ -4734,6 +5600,14 @@ declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata) +declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(<3 x float>, metadata) +declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(<3 x float>, metadata) +declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(<3 x double>, metadata) +declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(<3 x double>, metadata) +declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(<3 x float>, metadata) +declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(<3 x float>, metadata) +declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(<3 x double>, metadata) +declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(<3 x double>, metadata) declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata) @@ -4765,6 +5639,14 @@ declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, met declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata) +declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata) +declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double>, metadata) +declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata) +declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata) +declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata) +declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double>, metadata) +declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double>, metadata) declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata) declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata) diff --git a/test/Feature/fp-intrinsics.ll b/test/Feature/fp-intrinsics.ll index 895858e5a91..40641472d1c 100644 --- a/test/Feature/fp-intrinsics.ll +++ b/test/Feature/fp-intrinsics.ll @@ -242,6 +242,29 @@ entry: ret double %result } +; Verify that fptoui(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f18 +; CHECK: call zeroext i32 @llvm.experimental.constrained.fptoui +define zeroext i32 @f18() { +entry: + %result = call zeroext i32 @llvm.experimental.constrained.fptoui.i32.f64( + double 42.1, + metadata !"fpexcept.strict") + ret i32 %result +} + +; Verify that fptosi(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f19 +; CHECK: call i32 @llvm.experimental.constrained.fptosi +define i32 @f19() { +entry: + %result = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double 42.1, + metadata !"fpexcept.strict") + ret i32 %result +} + ; Verify that fptrunc(42.1) isn't simplified when the rounding mode is ; unknown. ; CHECK-LABEL: f20 @@ -284,5 +307,7 @@ declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadat declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)