1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

[X86] ReplaceNodeResults - fp_to_sint/uint - manually widen v2i32 results to let us add AssertSext/AssertZext

Its proving tricky to move this to the generic legalizer code, so manually insert the v2i32 subvector into v4i32, insert the AssertSext/AssertZext node, then extract the subvector again.

This avoids masks in the truncation/pack code, which means we avoid a PSHUFB in the fp_to_sint/uint code for sub-128 bit types (specific targets can still combine the packs to a pshufb if they have fast variable per-lane shuffles).

This was noticed when I was trying to improve fp_to_sint/uint costs with D103695 (and some targets had very high fp_to_sint costs due to the PSHUFB), so we can then update the fp_to_uint codegen from D89697.
This commit is contained in:
Simon Pilgrim 2021-07-09 11:48:25 +01:00
parent 974341bf3b
commit eeea5b4c4a
4 changed files with 109 additions and 88 deletions

View File

@ -30749,12 +30749,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
} else } else
Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src); Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
// Preserve what we know about the size of the original result. Except // Preserve what we know about the size of the original result. If the
// when the result is v2i32 since we can't widen the assert. // result is v2i32, we have to manually widen the assert.
if (PromoteVT != MVT::v2i32) if (PromoteVT == MVT::v2i32)
Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res,
dl, PromoteVT, Res, DAG.getUNDEF(MVT::v2i32));
DAG.getValueType(VT.getVectorElementType()));
Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, dl,
Res.getValueType(), Res,
DAG.getValueType(VT.getVectorElementType()));
if (PromoteVT == MVT::v2i32)
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
DAG.getIntPtrConstant(0, dl));
// Truncate back to the original width. // Truncate back to the original width.
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);

View File

@ -1829,43 +1829,43 @@ define <2 x i16> @strict_vector_fptosi_v2f64_to_v2i16(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; SSE-32: # %bb.0: ; SSE-32: # %bb.0:
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-32-NEXT: packssdw %xmm0, %xmm0
; SSE-32-NEXT: retl ; SSE-32-NEXT: retl
; ;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; SSE-64: # %bb.0: ; SSE-64: # %bb.0:
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-64-NEXT: packssdw %xmm0, %xmm0
; SSE-64-NEXT: retq ; SSE-64-NEXT: retq
; ;
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}} ; AVX-NEXT: ret{{[l|q]}}
; ;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}} ; AVX512F-NEXT: ret{{[l|q]}}
; ;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VL: # %bb.0: ; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}} ; AVX512VL-NEXT: ret{{[l|q]}}
; ;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}} ; AVX512DQ-NEXT: ret{{[l|q]}}
; ;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16: ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VLDQ: # %bb.0: ; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}} ; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a, %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a,
metadata !"fpexcept.strict") #0 metadata !"fpexcept.strict") #0
@ -1888,31 +1888,31 @@ define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 {
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}} ; AVX-NEXT: ret{{[l|q]}}
; ;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}} ; AVX512F-NEXT: ret{{[l|q]}}
; ;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VL: # %bb.0: ; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}} ; AVX512VL-NEXT: ret{{[l|q]}}
; ;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}} ; AVX512DQ-NEXT: ret{{[l|q]}}
; ;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16: ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VLDQ: # %bb.0: ; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}} ; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a, %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a,
metadata !"fpexcept.strict") #0 metadata !"fpexcept.strict") #0
@ -1924,49 +1924,49 @@ define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 {
; SSE-32: # %bb.0: ; SSE-32: # %bb.0:
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-32-NEXT: packssdw %xmm0, %xmm0
; SSE-32-NEXT: retl ; SSE-32-NEXT: retl
; ;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; SSE-64: # %bb.0: ; SSE-64: # %bb.0:
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-64-NEXT: packssdw %xmm0, %xmm0
; SSE-64-NEXT: retq ; SSE-64-NEXT: retq
; ;
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}} ; AVX-NEXT: ret{{[l|q]}}
; ;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}} ; AVX512F-NEXT: ret{{[l|q]}}
; ;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VL: # %bb.0: ; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}} ; AVX512VL-NEXT: ret{{[l|q]}}
; ;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}} ; AVX512DQ-NEXT: ret{{[l|q]}}
; ;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16: ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VLDQ: # %bb.0: ; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}} ; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a, %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
metadata !"fpexcept.strict") #0 metadata !"fpexcept.strict") #0
@ -1992,35 +1992,35 @@ define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 {
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}} ; AVX-NEXT: ret{{[l|q]}}
; ;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}} ; AVX512F-NEXT: ret{{[l|q]}}
; ;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VL: # %bb.0: ; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}} ; AVX512VL-NEXT: ret{{[l|q]}}
; ;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}} ; AVX512DQ-NEXT: ret{{[l|q]}}
; ;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16: ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VLDQ: # %bb.0: ; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}} ; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a, %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
metadata !"fpexcept.strict") #0 metadata !"fpexcept.strict") #0
@ -2031,29 +2031,29 @@ define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; SSE-32: # %bb.0: ; SSE-32: # %bb.0:
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; SSE-32-NEXT: packssdw %xmm0, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packsswb %xmm0, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: retl ; SSE-32-NEXT: retl
; ;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; SSE-64: # %bb.0: ; SSE-64: # %bb.0:
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-64-NEXT: packssdw %xmm0, %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packsswb %xmm0, %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: retq ; SSE-64-NEXT: retq
; ;
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}} ; AVX-NEXT: ret{{[l|q]}}
; ;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}} ; AVX512F-NEXT: ret{{[l|q]}}
; ;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
@ -2065,7 +2065,8 @@ define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}} ; AVX512DQ-NEXT: ret{{[l|q]}}
; ;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8: ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
@ -2082,7 +2083,6 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; SSE-32: # %bb.0: ; SSE-32: # %bb.0:
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 ; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: retl ; SSE-32-NEXT: retl
@ -2090,7 +2090,6 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; SSE-64: # %bb.0: ; SSE-64: # %bb.0:
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 ; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: retq ; SSE-64-NEXT: retq
@ -2098,13 +2097,15 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}} ; AVX-NEXT: ret{{[l|q]}}
; ;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}} ; AVX512F-NEXT: ret{{[l|q]}}
; ;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
@ -2116,7 +2117,8 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}} ; AVX512DQ-NEXT: ret{{[l|q]}}
; ;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8: ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
@ -2134,32 +2136,32 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
; SSE-32: # %bb.0: ; SSE-32: # %bb.0:
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; SSE-32-NEXT: packssdw %xmm0, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packsswb %xmm0, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: retl ; SSE-32-NEXT: retl
; ;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; SSE-64: # %bb.0: ; SSE-64: # %bb.0:
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-64-NEXT: packssdw %xmm0, %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packsswb %xmm0, %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: retq ; SSE-64-NEXT: retq
; ;
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}} ; AVX-NEXT: ret{{[l|q]}}
; ;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}} ; AVX512F-NEXT: ret{{[l|q]}}
; ;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
@ -2173,7 +2175,8 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}} ; AVX512DQ-NEXT: ret{{[l|q]}}
; ;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8: ; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
@ -2192,7 +2195,6 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; SSE-32: # %bb.0: ; SSE-32: # %bb.0:
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0 ; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: retl ; SSE-32-NEXT: retl
@ -2201,7 +2203,6 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; SSE-64: # %bb.0: ; SSE-64: # %bb.0:
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0 ; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: retq ; SSE-64-NEXT: retq
@ -2210,14 +2211,16 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}} ; AVX-NEXT: ret{{[l|q]}}
; ;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8: ; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}} ; AVX512F-NEXT: ret{{[l|q]}}
; ;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8: ; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
@ -2231,7 +2234,8 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}} ; AVX512DQ-NEXT: ret{{[l|q]}}
; ;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8: ; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:

View File

@ -67,7 +67,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2i8: ; CHECK-LABEL: cvt_v2f32_v2i8:
; CHECK: ## %bb.0: ; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%res = fptosi <2 x float> %src to <2 x i8> %res = fptosi <2 x float> %src to <2 x i8>
ret <2 x i8> %res ret <2 x i8> %res
@ -77,7 +78,7 @@ define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2i16: ; CHECK-LABEL: cvt_v2f32_v2i16:
; CHECK: ## %bb.0: ; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%res = fptosi <2 x float> %src to <2 x i16> %res = fptosi <2 x float> %src to <2 x i16>
ret <2 x i16> %res ret <2 x i16> %res
@ -96,7 +97,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2u8: ; CHECK-LABEL: cvt_v2f32_v2u8:
; CHECK: ## %bb.0: ; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%res = fptoui <2 x float> %src to <2 x i8> %res = fptoui <2 x float> %src to <2 x i8>
ret <2 x i8> %res ret <2 x i8> %res
@ -106,7 +108,7 @@ define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
; CHECK-LABEL: cvt_v2f32_v2u16: ; CHECK-LABEL: cvt_v2f32_v2u16:
; CHECK: ## %bb.0: ; CHECK: ## %bb.0:
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%res = fptoui <2 x float> %src to <2 x i16> %res = fptoui <2 x float> %src to <2 x i16>
ret <2 x i16> %res ret <2 x i16> %res

View File

@ -2297,21 +2297,22 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i8: ; SSE-LABEL: fptosi_2f32_to_2i8:
; SSE: # %bb.0: ; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: packsswb %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq ; SSE-NEXT: retq
; ;
; VEX-LABEL: fptosi_2f32_to_2i8: ; VEX-LABEL: fptosi_2f32_to_2i8:
; VEX: # %bb.0: ; VEX: # %bb.0:
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq ; VEX-NEXT: retq
; ;
; AVX512F-LABEL: fptosi_2f32_to_2i8: ; AVX512F-LABEL: fptosi_2f32_to_2i8:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512VL-LABEL: fptosi_2f32_to_2i8: ; AVX512VL-LABEL: fptosi_2f32_to_2i8:
@ -2323,7 +2324,8 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
; AVX512DQ-LABEL: fptosi_2f32_to_2i8: ; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: retq
; ;
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8: ; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
@ -2339,13 +2341,13 @@ define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i16: ; SSE-LABEL: fptosi_2f32_to_2i16:
; SSE: # %bb.0: ; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: retq ; SSE-NEXT: retq
; ;
; AVX-LABEL: fptosi_2f32_to_2i16: ; AVX-LABEL: fptosi_2f32_to_2i16:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq ; AVX-NEXT: retq
%cvt = fptosi <2 x float> %a to <2 x i16> %cvt = fptosi <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt ret <2 x i16> %cvt
@ -2355,7 +2357,6 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
; SSE-LABEL: fptoui_2f32_to_2i8: ; SSE-LABEL: fptoui_2f32_to_2i8:
; SSE: # %bb.0: ; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq ; SSE-NEXT: retq
@ -2363,13 +2364,15 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
; VEX-LABEL: fptoui_2f32_to_2i8: ; VEX-LABEL: fptoui_2f32_to_2i8:
; VEX: # %bb.0: ; VEX: # %bb.0:
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 ; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq ; VEX-NEXT: retq
; ;
; AVX512F-LABEL: fptoui_2f32_to_2i8: ; AVX512F-LABEL: fptoui_2f32_to_2i8:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512VL-LABEL: fptoui_2f32_to_2i8: ; AVX512VL-LABEL: fptoui_2f32_to_2i8:
@ -2381,7 +2384,8 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
; AVX512DQ-LABEL: fptoui_2f32_to_2i8: ; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: retq
; ;
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8: ; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
@ -2403,7 +2407,7 @@ define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
; AVX-LABEL: fptoui_2f32_to_2i16: ; AVX-LABEL: fptoui_2f32_to_2i16:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq ; AVX-NEXT: retq
%cvt = fptoui <2 x float> %a to <2 x i16> %cvt = fptoui <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt ret <2 x i16> %cvt
@ -2413,21 +2417,22 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i8: ; SSE-LABEL: fptosi_2f64_to_2i8:
; SSE: # %bb.0: ; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: packsswb %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq ; SSE-NEXT: retq
; ;
; VEX-LABEL: fptosi_2f64_to_2i8: ; VEX-LABEL: fptosi_2f64_to_2i8:
; VEX: # %bb.0: ; VEX: # %bb.0:
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq ; VEX-NEXT: retq
; ;
; AVX512F-LABEL: fptosi_2f64_to_2i8: ; AVX512F-LABEL: fptosi_2f64_to_2i8:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512VL-LABEL: fptosi_2f64_to_2i8: ; AVX512VL-LABEL: fptosi_2f64_to_2i8:
@ -2439,7 +2444,8 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
; AVX512DQ-LABEL: fptosi_2f64_to_2i8: ; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: retq
; ;
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8: ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
@ -2455,13 +2461,13 @@ define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i16: ; SSE-LABEL: fptosi_2f64_to_2i16:
; SSE: # %bb.0: ; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: retq ; SSE-NEXT: retq
; ;
; AVX-LABEL: fptosi_2f64_to_2i16: ; AVX-LABEL: fptosi_2f64_to_2i16:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq ; AVX-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i16> %cvt = fptosi <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt ret <2 x i16> %cvt
@ -2471,7 +2477,6 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
; SSE-LABEL: fptoui_2f64_to_2i8: ; SSE-LABEL: fptoui_2f64_to_2i8:
; SSE: # %bb.0: ; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0 ; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq ; SSE-NEXT: retq
@ -2479,13 +2484,15 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
; VEX-LABEL: fptoui_2f64_to_2i8: ; VEX-LABEL: fptoui_2f64_to_2i8:
; VEX: # %bb.0: ; VEX: # %bb.0:
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 ; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq ; VEX-NEXT: retq
; ;
; AVX512F-LABEL: fptoui_2f64_to_2i8: ; AVX512F-LABEL: fptoui_2f64_to_2i8:
; AVX512F: # %bb.0: ; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512VL-LABEL: fptoui_2f64_to_2i8: ; AVX512VL-LABEL: fptoui_2f64_to_2i8:
@ -2497,7 +2504,8 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
; AVX512DQ-LABEL: fptoui_2f64_to_2i8: ; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
; AVX512DQ: # %bb.0: ; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: retq
; ;
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8: ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
@ -2519,7 +2527,7 @@ define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
; AVX-LABEL: fptoui_2f64_to_2i16: ; AVX-LABEL: fptoui_2f64_to_2i16:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq ; AVX-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i16> %cvt = fptoui <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt ret <2 x i16> %cvt