From eeea5b4c4ac728e7aba7f13bc76a9be441861aac Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 9 Jul 2021 11:48:25 +0100 Subject: [PATCH] [X86] ReplaceNodeResults - fp_to_sint/uint - manually widen v2i32 results to let us add AssertSext/AssertZext Its proving tricky to move this to the generic legalizer code, so manually insert the v2i32 subvector into v4i32, insert the AssertSext/AssertZext node, then extract the subvector again. This avoids masks in the truncation/pack code, which means we avoid a PSHUFB in the fp_to_sint/uint code for sub-128 bit types (specific targets can still combine the packs to a pshufb if they have fast variable per-lane shuffles). This was noticed when I was trying to improve fp_to_sint/uint costs with D103695 (and some targets had very high fp_to_sint costs due to the PSHUFB), so we can then update the fp_to_uint codegen from D89697. --- lib/Target/X86/X86ISelLowering.cpp | 19 ++-- test/CodeGen/X86/vec-strict-fptoint-128.ll | 108 +++++++++++---------- test/CodeGen/X86/vec_cast3.ll | 10 +- test/CodeGen/X86/vec_fp_to_int.ll | 60 +++++++----- 4 files changed, 109 insertions(+), 88 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 35b4f62abb3..0410a692331 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -30749,12 +30749,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } else Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src); - // Preserve what we know about the size of the original result. Except - // when the result is v2i32 since we can't widen the assert. - if (PromoteVT != MVT::v2i32) - Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, - dl, PromoteVT, Res, - DAG.getValueType(VT.getVectorElementType())); + // Preserve what we know about the size of the original result. If the + // result is v2i32, we have to manually widen the assert. + if (PromoteVT == MVT::v2i32) + Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res, + DAG.getUNDEF(MVT::v2i32)); + + Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, dl, + Res.getValueType(), Res, + DAG.getValueType(VT.getVectorElementType())); + + if (PromoteVT == MVT::v2i32) + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, + DAG.getIntPtrConstant(0, dl)); // Truncate back to the original width. Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); diff --git a/test/CodeGen/X86/vec-strict-fptoint-128.ll b/test/CodeGen/X86/vec-strict-fptoint-128.ll index 8e790e97288..7fae417a169 100644 --- a/test/CodeGen/X86/vec-strict-fptoint-128.ll +++ b/test/CodeGen/X86/vec-strict-fptoint-128.ll @@ -1829,43 +1829,43 @@ define <2 x i16> @strict_vector_fptosi_v2f64_to_v2i16(<2 x double> %a) #0 { ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; SSE-32: # %bb.0: ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE-32-NEXT: packssdw %xmm0, %xmm0 ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; SSE-64: # %bb.0: ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE-64-NEXT: packssdw %xmm0, %xmm0 ; SSE-64-NEXT: retq ; ; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX512VLDQ-NEXT: ret{{[l|q]}} %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a, metadata !"fpexcept.strict") #0 @@ -1888,31 +1888,31 @@ define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 { ; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX512VLDQ-NEXT: ret{{[l|q]}} %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a, metadata !"fpexcept.strict") #0 @@ -1924,49 +1924,49 @@ define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 { ; SSE-32: # %bb.0: ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE-32-NEXT: packssdw %xmm0, %xmm0 ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; SSE-64: # %bb.0: ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE-64-NEXT: packssdw %xmm0, %xmm0 ; SSE-64-NEXT: retq ; ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX: # %bb.0: ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX512VLDQ-NEXT: ret{{[l|q]}} %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a, metadata !"fpexcept.strict") #0 @@ -1992,35 +1992,35 @@ define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 { ; AVX: # %bb.0: ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX512VLDQ-NEXT: ret{{[l|q]}} %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a, metadata !"fpexcept.strict") #0 @@ -2031,29 +2031,29 @@ define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 { ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; SSE-32: # %bb.0: ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; SSE-32-NEXT: packuswb %xmm0, %xmm0 -; SSE-32-NEXT: packuswb %xmm0, %xmm0 +; SSE-32-NEXT: packssdw %xmm0, %xmm0 +; SSE-32-NEXT: packsswb %xmm0, %xmm0 ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; SSE-64: # %bb.0: ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-64-NEXT: packuswb %xmm0, %xmm0 -; SSE-64-NEXT: packuswb %xmm0, %xmm0 +; SSE-64-NEXT: packssdw %xmm0, %xmm0 +; SSE-64-NEXT: packsswb %xmm0, %xmm0 ; SSE-64-NEXT: retq ; ; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8: @@ -2065,7 +2065,8 @@ define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 { ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8: @@ -2082,7 +2083,6 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 { ; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; SSE-32: # %bb.0: ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: retl @@ -2090,7 +2090,6 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 { ; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; SSE-64: # %bb.0: ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: retq @@ -2098,13 +2097,15 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 { ; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8: @@ -2116,7 +2117,8 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 { ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8: @@ -2134,32 +2136,32 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 { ; SSE-32: # %bb.0: ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; SSE-32-NEXT: packuswb %xmm0, %xmm0 -; SSE-32-NEXT: packuswb %xmm0, %xmm0 +; SSE-32-NEXT: packssdw %xmm0, %xmm0 +; SSE-32-NEXT: packsswb %xmm0, %xmm0 ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; SSE-64: # %bb.0: ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-64-NEXT: packuswb %xmm0, %xmm0 -; SSE-64-NEXT: packuswb %xmm0, %xmm0 +; SSE-64-NEXT: packssdw %xmm0, %xmm0 +; SSE-64-NEXT: packsswb %xmm0, %xmm0 ; SSE-64-NEXT: retq ; ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8: @@ -2173,7 +2175,8 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8: @@ -2192,7 +2195,6 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 { ; SSE-32: # %bb.0: ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: retl @@ -2201,7 +2203,6 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 { ; SSE-64: # %bb.0: ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: retq @@ -2210,14 +2211,16 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 { ; AVX: # %bb.0: ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ; ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: ret{{[l|q]}} ; ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8: @@ -2231,7 +2234,8 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} ; ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8: diff --git a/test/CodeGen/X86/vec_cast3.ll b/test/CodeGen/X86/vec_cast3.ll index 749188c5542..57911e1eb96 100644 --- a/test/CodeGen/X86/vec_cast3.ll +++ b/test/CodeGen/X86/vec_cast3.ll @@ -67,7 +67,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) { ; CHECK-LABEL: cvt_v2f32_v2i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retl %res = fptosi <2 x float> %src to <2 x i8> ret <2 x i8> %res @@ -77,7 +78,7 @@ define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) { ; CHECK-LABEL: cvt_v2f32_v2i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retl %res = fptosi <2 x float> %src to <2 x i16> ret <2 x i16> %res @@ -96,7 +97,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) { ; CHECK-LABEL: cvt_v2f32_v2u8: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retl %res = fptoui <2 x float> %src to <2 x i8> ret <2 x i8> %res @@ -106,7 +108,7 @@ define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) { ; CHECK-LABEL: cvt_v2f32_v2u16: ; CHECK: ## %bb.0: ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retl %res = fptoui <2 x float> %src to <2 x i16> ret <2 x i16> %res diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll index dfbebfd6a8c..ff8e59c04c6 100644 --- a/test/CodeGen/X86/vec_fp_to_int.ll +++ b/test/CodeGen/X86/vec_fp_to_int.ll @@ -2297,21 +2297,22 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) { ; SSE-LABEL: fptosi_2f32_to_2i8: ; SSE: # %bb.0: ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: packuswb %xmm0, %xmm0 -; SSE-NEXT: packuswb %xmm0, %xmm0 +; SSE-NEXT: packssdw %xmm0, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 ; SSE-NEXT: retq ; ; VEX-LABEL: fptosi_2f32_to_2i8: ; VEX: # %bb.0: ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 -; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; VEX-NEXT: retq ; ; AVX512F-LABEL: fptosi_2f32_to_2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fptosi_2f32_to_2i8: @@ -2323,7 +2324,8 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) { ; AVX512DQ-LABEL: fptosi_2f32_to_2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8: @@ -2339,13 +2341,13 @@ define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) { ; SSE-LABEL: fptosi_2f32_to_2i16: ; SSE: # %bb.0: ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE-NEXT: packssdw %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fptosi_2f32_to_2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %cvt = fptosi <2 x float> %a to <2 x i16> ret <2 x i16> %cvt @@ -2355,7 +2357,6 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) { ; SSE-LABEL: fptoui_2f32_to_2i8: ; SSE: # %bb.0: ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: retq @@ -2363,13 +2364,15 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) { ; VEX-LABEL: fptoui_2f32_to_2i8: ; VEX: # %bb.0: ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 -; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f32_to_2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fptoui_2f32_to_2i8: @@ -2381,7 +2384,8 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) { ; AVX512DQ-LABEL: fptoui_2f32_to_2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8: @@ -2403,7 +2407,7 @@ define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) { ; AVX-LABEL: fptoui_2f32_to_2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %cvt = fptoui <2 x float> %a to <2 x i16> ret <2 x i16> %cvt @@ -2413,21 +2417,22 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) { ; SSE-LABEL: fptosi_2f64_to_2i8: ; SSE: # %bb.0: ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: packuswb %xmm0, %xmm0 -; SSE-NEXT: packuswb %xmm0, %xmm0 +; SSE-NEXT: packssdw %xmm0, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 ; SSE-NEXT: retq ; ; VEX-LABEL: fptosi_2f64_to_2i8: ; VEX: # %bb.0: ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; VEX-NEXT: retq ; ; AVX512F-LABEL: fptosi_2f64_to_2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fptosi_2f64_to_2i8: @@ -2439,7 +2444,8 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) { ; AVX512DQ-LABEL: fptosi_2f64_to_2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8: @@ -2455,13 +2461,13 @@ define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) { ; SSE-LABEL: fptosi_2f64_to_2i16: ; SSE: # %bb.0: ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE-NEXT: packssdw %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fptosi_2f64_to_2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %cvt = fptosi <2 x double> %a to <2 x i16> ret <2 x i16> %cvt @@ -2471,7 +2477,6 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) { ; SSE-LABEL: fptoui_2f64_to_2i8: ; SSE: # %bb.0: ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: retq @@ -2479,13 +2484,15 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) { ; VEX-LABEL: fptoui_2f64_to_2i8: ; VEX: # %bb.0: ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; VEX-NEXT: retq ; ; AVX512F-LABEL: fptoui_2f64_to_2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fptoui_2f64_to_2i8: @@ -2497,7 +2504,8 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) { ; AVX512DQ-LABEL: fptoui_2f64_to_2i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8: @@ -2519,7 +2527,7 @@ define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) { ; AVX-LABEL: fptoui_2f64_to_2i16: ; AVX: # %bb.0: ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %cvt = fptoui <2 x double> %a to <2 x i16> ret <2 x i16> %cvt