mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[X86] ReplaceNodeResults - fp_to_sint/uint - manually widen v2i32 results to let us add AssertSext/AssertZext
Its proving tricky to move this to the generic legalizer code, so manually insert the v2i32 subvector into v4i32, insert the AssertSext/AssertZext node, then extract the subvector again. This avoids masks in the truncation/pack code, which means we avoid a PSHUFB in the fp_to_sint/uint code for sub-128 bit types (specific targets can still combine the packs to a pshufb if they have fast variable per-lane shuffles). This was noticed when I was trying to improve fp_to_sint/uint costs with D103695 (and some targets had very high fp_to_sint costs due to the PSHUFB), so we can then update the fp_to_uint codegen from D89697.
This commit is contained in:
parent
974341bf3b
commit
eeea5b4c4a
@ -30749,13 +30749,20 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||||||
} else
|
} else
|
||||||
Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
|
Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
|
||||||
|
|
||||||
// Preserve what we know about the size of the original result. Except
|
// Preserve what we know about the size of the original result. If the
|
||||||
// when the result is v2i32 since we can't widen the assert.
|
// result is v2i32, we have to manually widen the assert.
|
||||||
if (PromoteVT != MVT::v2i32)
|
if (PromoteVT == MVT::v2i32)
|
||||||
Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext,
|
Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res,
|
||||||
dl, PromoteVT, Res,
|
DAG.getUNDEF(MVT::v2i32));
|
||||||
|
|
||||||
|
Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, dl,
|
||||||
|
Res.getValueType(), Res,
|
||||||
DAG.getValueType(VT.getVectorElementType()));
|
DAG.getValueType(VT.getVectorElementType()));
|
||||||
|
|
||||||
|
if (PromoteVT == MVT::v2i32)
|
||||||
|
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
|
||||||
|
DAG.getIntPtrConstant(0, dl));
|
||||||
|
|
||||||
// Truncate back to the original width.
|
// Truncate back to the original width.
|
||||||
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
|
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
|
||||||
|
|
||||||
|
@ -1829,43 +1829,43 @@ define <2 x i16> @strict_vector_fptosi_v2f64_to_v2i16(<2 x double> %a) #0 {
|
|||||||
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
||||||
; SSE-32: # %bb.0:
|
; SSE-32: # %bb.0:
|
||||||
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
|
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; SSE-32-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: retl
|
; SSE-32-NEXT: retl
|
||||||
;
|
;
|
||||||
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
||||||
; SSE-64: # %bb.0:
|
; SSE-64: # %bb.0:
|
||||||
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
|
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; SSE-64-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: retq
|
; SSE-64-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: ret{{[l|q]}}
|
; AVX-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: ret{{[l|q]}}
|
; AVX512F-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
|
||||||
; AVX512VLDQ: # %bb.0:
|
; AVX512VLDQ: # %bb.0:
|
||||||
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512VLDQ-NEXT: ret{{[l|q]}}
|
; AVX512VLDQ-NEXT: ret{{[l|q]}}
|
||||||
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a,
|
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a,
|
||||||
metadata !"fpexcept.strict") #0
|
metadata !"fpexcept.strict") #0
|
||||||
@ -1888,31 +1888,31 @@ define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 {
|
|||||||
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
|
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: ret{{[l|q]}}
|
; AVX-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
|
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: ret{{[l|q]}}
|
; AVX512F-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
|
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
|
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
|
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
|
||||||
; AVX512VLDQ: # %bb.0:
|
; AVX512VLDQ: # %bb.0:
|
||||||
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512VLDQ-NEXT: ret{{[l|q]}}
|
; AVX512VLDQ-NEXT: ret{{[l|q]}}
|
||||||
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a,
|
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a,
|
||||||
metadata !"fpexcept.strict") #0
|
metadata !"fpexcept.strict") #0
|
||||||
@ -1924,49 +1924,49 @@ define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 {
|
|||||||
; SSE-32: # %bb.0:
|
; SSE-32: # %bb.0:
|
||||||
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
|
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; SSE-32-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: retl
|
; SSE-32-NEXT: retl
|
||||||
;
|
;
|
||||||
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
||||||
; SSE-64: # %bb.0:
|
; SSE-64: # %bb.0:
|
||||||
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
|
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; SSE-64-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: retq
|
; SSE-64-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: ret{{[l|q]}}
|
; AVX-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: ret{{[l|q]}}
|
; AVX512F-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
|
||||||
; AVX512VLDQ: # %bb.0:
|
; AVX512VLDQ: # %bb.0:
|
||||||
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512VLDQ-NEXT: ret{{[l|q]}}
|
; AVX512VLDQ-NEXT: ret{{[l|q]}}
|
||||||
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
|
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
|
||||||
metadata !"fpexcept.strict") #0
|
metadata !"fpexcept.strict") #0
|
||||||
@ -1992,35 +1992,35 @@ define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 {
|
|||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: ret{{[l|q]}}
|
; AVX-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
|
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: ret{{[l|q]}}
|
; AVX512F-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
|
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
|
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
|
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
|
||||||
; AVX512VLDQ: # %bb.0:
|
; AVX512VLDQ: # %bb.0:
|
||||||
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX512VLDQ-NEXT: ret{{[l|q]}}
|
; AVX512VLDQ-NEXT: ret{{[l|q]}}
|
||||||
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
|
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
|
||||||
metadata !"fpexcept.strict") #0
|
metadata !"fpexcept.strict") #0
|
||||||
@ -2031,29 +2031,29 @@ define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
|
|||||||
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
||||||
; SSE-32: # %bb.0:
|
; SSE-32: # %bb.0:
|
||||||
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
|
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
; SSE-32-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
; SSE-32-NEXT: packsswb %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
|
||||||
; SSE-32-NEXT: retl
|
; SSE-32-NEXT: retl
|
||||||
;
|
;
|
||||||
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
||||||
; SSE-64: # %bb.0:
|
; SSE-64: # %bb.0:
|
||||||
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
|
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
; SSE-64-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
; SSE-64-NEXT: packsswb %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
|
||||||
; SSE-64-NEXT: retq
|
; SSE-64-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: ret{{[l|q]}}
|
; AVX-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: ret{{[l|q]}}
|
; AVX512F-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
||||||
@ -2065,7 +2065,8 @@ define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
|
|||||||
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
|
||||||
@ -2082,7 +2083,6 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
|
|||||||
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
||||||
; SSE-32: # %bb.0:
|
; SSE-32: # %bb.0:
|
||||||
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
|
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
|
||||||
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: retl
|
; SSE-32-NEXT: retl
|
||||||
@ -2090,7 +2090,6 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
|
|||||||
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
||||||
; SSE-64: # %bb.0:
|
; SSE-64: # %bb.0:
|
||||||
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
|
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
||||||
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: retq
|
; SSE-64-NEXT: retq
|
||||||
@ -2098,13 +2097,15 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
|
|||||||
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: ret{{[l|q]}}
|
; AVX-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: ret{{[l|q]}}
|
; AVX512F-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
||||||
@ -2116,7 +2117,8 @@ define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
|
|||||||
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
|
||||||
@ -2134,32 +2136,32 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
|
|||||||
; SSE-32: # %bb.0:
|
; SSE-32: # %bb.0:
|
||||||
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
|
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
; SSE-32-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
; SSE-32-NEXT: packsswb %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
|
||||||
; SSE-32-NEXT: retl
|
; SSE-32-NEXT: retl
|
||||||
;
|
;
|
||||||
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
|
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
|
||||||
; SSE-64: # %bb.0:
|
; SSE-64: # %bb.0:
|
||||||
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
|
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
; SSE-64-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
; SSE-64-NEXT: packsswb %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
|
||||||
; SSE-64-NEXT: retq
|
; SSE-64-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
|
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: ret{{[l|q]}}
|
; AVX-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
|
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: ret{{[l|q]}}
|
; AVX512F-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
|
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
|
||||||
@ -2173,7 +2175,8 @@ define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
|
|||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
|
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
|
||||||
@ -2192,7 +2195,6 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
|
|||||||
; SSE-32: # %bb.0:
|
; SSE-32: # %bb.0:
|
||||||
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
|
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
|
||||||
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
; SSE-32-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-32-NEXT: retl
|
; SSE-32-NEXT: retl
|
||||||
@ -2201,7 +2203,6 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
|
|||||||
; SSE-64: # %bb.0:
|
; SSE-64: # %bb.0:
|
||||||
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
|
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
||||||
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
; SSE-64-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-64-NEXT: retq
|
; SSE-64-NEXT: retq
|
||||||
@ -2210,14 +2211,16 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
|
|||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: ret{{[l|q]}}
|
; AVX-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
|
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: ret{{[l|q]}}
|
; AVX512F-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
|
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
|
||||||
@ -2231,7 +2234,8 @@ define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
|
|||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
|
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
|
||||||
|
@ -67,7 +67,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
|
|||||||
; CHECK-LABEL: cvt_v2f32_v2i8:
|
; CHECK-LABEL: cvt_v2f32_v2i8:
|
||||||
; CHECK: ## %bb.0:
|
; CHECK: ## %bb.0:
|
||||||
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
%res = fptosi <2 x float> %src to <2 x i8>
|
%res = fptosi <2 x float> %src to <2 x i8>
|
||||||
ret <2 x i8> %res
|
ret <2 x i8> %res
|
||||||
@ -77,7 +78,7 @@ define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
|
|||||||
; CHECK-LABEL: cvt_v2f32_v2i16:
|
; CHECK-LABEL: cvt_v2f32_v2i16:
|
||||||
; CHECK: ## %bb.0:
|
; CHECK: ## %bb.0:
|
||||||
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; CHECK-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
%res = fptosi <2 x float> %src to <2 x i16>
|
%res = fptosi <2 x float> %src to <2 x i16>
|
||||||
ret <2 x i16> %res
|
ret <2 x i16> %res
|
||||||
@ -96,7 +97,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
|
|||||||
; CHECK-LABEL: cvt_v2f32_v2u8:
|
; CHECK-LABEL: cvt_v2f32_v2u8:
|
||||||
; CHECK: ## %bb.0:
|
; CHECK: ## %bb.0:
|
||||||
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
%res = fptoui <2 x float> %src to <2 x i8>
|
%res = fptoui <2 x float> %src to <2 x i8>
|
||||||
ret <2 x i8> %res
|
ret <2 x i8> %res
|
||||||
@ -106,7 +108,7 @@ define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
|
|||||||
; CHECK-LABEL: cvt_v2f32_v2u16:
|
; CHECK-LABEL: cvt_v2f32_v2u16:
|
||||||
; CHECK: ## %bb.0:
|
; CHECK: ## %bb.0:
|
||||||
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; CHECK-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
%res = fptoui <2 x float> %src to <2 x i16>
|
%res = fptoui <2 x float> %src to <2 x i16>
|
||||||
ret <2 x i16> %res
|
ret <2 x i16> %res
|
||||||
|
@ -2297,21 +2297,22 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
|
|||||||
; SSE-LABEL: fptosi_2f32_to_2i8:
|
; SSE-LABEL: fptosi_2f32_to_2i8:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
; SSE-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
; SSE-NEXT: packsswb %xmm0, %xmm0
|
||||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; VEX-LABEL: fptosi_2f32_to_2i8:
|
; VEX-LABEL: fptosi_2f32_to_2i8:
|
||||||
; VEX: # %bb.0:
|
; VEX: # %bb.0:
|
||||||
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
|
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; VEX-NEXT: retq
|
; VEX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: fptosi_2f32_to_2i8:
|
; AVX512F-LABEL: fptosi_2f32_to_2i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: fptosi_2f32_to_2i8:
|
; AVX512VL-LABEL: fptosi_2f32_to_2i8:
|
||||||
@ -2323,7 +2324,8 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
|
|||||||
; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
|
; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: retq
|
; AVX512DQ-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
|
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
|
||||||
@ -2339,13 +2341,13 @@ define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
|
|||||||
; SSE-LABEL: fptosi_2f32_to_2i16:
|
; SSE-LABEL: fptosi_2f32_to_2i16:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; SSE-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: fptosi_2f32_to_2i16:
|
; AVX-LABEL: fptosi_2f32_to_2i16:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%cvt = fptosi <2 x float> %a to <2 x i16>
|
%cvt = fptosi <2 x float> %a to <2 x i16>
|
||||||
ret <2 x i16> %cvt
|
ret <2 x i16> %cvt
|
||||||
@ -2355,7 +2357,6 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
|
|||||||
; SSE-LABEL: fptoui_2f32_to_2i8:
|
; SSE-LABEL: fptoui_2f32_to_2i8:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
||||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
||||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
@ -2363,13 +2364,15 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
|
|||||||
; VEX-LABEL: fptoui_2f32_to_2i8:
|
; VEX-LABEL: fptoui_2f32_to_2i8:
|
||||||
; VEX: # %bb.0:
|
; VEX: # %bb.0:
|
||||||
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
|
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; VEX-NEXT: retq
|
; VEX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: fptoui_2f32_to_2i8:
|
; AVX512F-LABEL: fptoui_2f32_to_2i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: fptoui_2f32_to_2i8:
|
; AVX512VL-LABEL: fptoui_2f32_to_2i8:
|
||||||
@ -2381,7 +2384,8 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
|
|||||||
; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
|
; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: retq
|
; AVX512DQ-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
|
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
|
||||||
@ -2403,7 +2407,7 @@ define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
|
|||||||
; AVX-LABEL: fptoui_2f32_to_2i16:
|
; AVX-LABEL: fptoui_2f32_to_2i16:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%cvt = fptoui <2 x float> %a to <2 x i16>
|
%cvt = fptoui <2 x float> %a to <2 x i16>
|
||||||
ret <2 x i16> %cvt
|
ret <2 x i16> %cvt
|
||||||
@ -2413,21 +2417,22 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
|
|||||||
; SSE-LABEL: fptosi_2f64_to_2i8:
|
; SSE-LABEL: fptosi_2f64_to_2i8:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||||
; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
; SSE-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
; SSE-NEXT: packsswb %xmm0, %xmm0
|
||||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; VEX-LABEL: fptosi_2f64_to_2i8:
|
; VEX-LABEL: fptosi_2f64_to_2i8:
|
||||||
; VEX: # %bb.0:
|
; VEX: # %bb.0:
|
||||||
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; VEX-NEXT: retq
|
; VEX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: fptosi_2f64_to_2i8:
|
; AVX512F-LABEL: fptosi_2f64_to_2i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: fptosi_2f64_to_2i8:
|
; AVX512VL-LABEL: fptosi_2f64_to_2i8:
|
||||||
@ -2439,7 +2444,8 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
|
|||||||
; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
|
; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: retq
|
; AVX512DQ-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
|
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
|
||||||
@ -2455,13 +2461,13 @@ define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
|
|||||||
; SSE-LABEL: fptosi_2f64_to_2i16:
|
; SSE-LABEL: fptosi_2f64_to_2i16:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; SSE-NEXT: packssdw %xmm0, %xmm0
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: fptosi_2f64_to_2i16:
|
; AVX-LABEL: fptosi_2f64_to_2i16:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%cvt = fptosi <2 x double> %a to <2 x i16>
|
%cvt = fptosi <2 x double> %a to <2 x i16>
|
||||||
ret <2 x i16> %cvt
|
ret <2 x i16> %cvt
|
||||||
@ -2471,7 +2477,6 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
|
|||||||
; SSE-LABEL: fptoui_2f64_to_2i8:
|
; SSE-LABEL: fptoui_2f64_to_2i8:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||||
; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
||||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-NEXT: packuswb %xmm0, %xmm0
|
; SSE-NEXT: packuswb %xmm0, %xmm0
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
@ -2479,13 +2484,15 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
|
|||||||
; VEX-LABEL: fptoui_2f64_to_2i8:
|
; VEX-LABEL: fptoui_2f64_to_2i8:
|
||||||
; VEX: # %bb.0:
|
; VEX: # %bb.0:
|
||||||
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; VEX-NEXT: retq
|
; VEX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: fptoui_2f64_to_2i8:
|
; AVX512F-LABEL: fptoui_2f64_to_2i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: fptoui_2f64_to_2i8:
|
; AVX512VL-LABEL: fptoui_2f64_to_2i8:
|
||||||
@ -2497,7 +2504,8 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
|
|||||||
; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
|
; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: retq
|
; AVX512DQ-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
|
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
|
||||||
@ -2519,7 +2527,7 @@ define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
|
|||||||
; AVX-LABEL: fptoui_2f64_to_2i16:
|
; AVX-LABEL: fptoui_2f64_to_2i16:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%cvt = fptoui <2 x double> %a to <2 x i16>
|
%cvt = fptoui <2 x double> %a to <2 x i16>
|
||||||
ret <2 x i16> %cvt
|
ret <2 x i16> %cvt
|
||||||
|
Loading…
Reference in New Issue
Block a user