[X86] Remove setOperationAction for FP_TO_SINT v8i16.

This is no longer needed after widening legalization as we custom legalize v8i8 ourselves. Added entries to the cost model, but bumped the cost slightly to account for the truncate shuffle that wasn't costed before.
2024-11-23 19:23:23 +01:00 · 2019-11-12 22:44:48 -08:00 · 2019-11-12 22:44:48 -08:00 · e9ebc959cb
commit e9ebc959cb
parent d054b2dd05
3 changed files with 15 additions and 20 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -937,14 +937,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i16, Custom);
    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i16, Custom);

-    // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
-    // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
-    // split again based on the input type, this will cause an AssertSExt i16 to
-    // be emitted instead of an AssertZExt. This will allow packssdw followed by
-    // packuswb to be used to truncate to v8i8. This is necessary since packusdw
-    // isn't available until sse4.1.
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Custom);
-
    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
    setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);

--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@ -1576,6 +1576,9 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },

+    { ISD::FP_TO_SINT,  MVT::v4i16,  MVT::v4f32,  2 },
+    { ISD::FP_TO_SINT,  MVT::v2i16,  MVT::v2f64,  2 },
+
    { ISD::FP_TO_SINT,  MVT::v2i32,  MVT::v2f64,  3 },

    { ISD::UINT_TO_FP,  MVT::f64,    MVT::i64,    6 },
--- a/test/Analysis/CostModel/X86/fptosi.ll
+++ b/test/Analysis/CostModel/X86/fptosi.ll
@ -92,28 +92,28 @@ define i32 @fptosi_double_i32(i32 %arg) {
 define i32 @fptosi_double_i16(i32 %arg) {
 ; SSE-LABEL: 'fptosi_double_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fptosi_double_i16'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptosi_double_i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'fptosi_double_i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@ -235,28 +235,28 @@ define i32 @fptosi_float_i32(i32 %arg) {
 define i32 @fptosi_float_i16(i32 %arg) {
 ; SSE-LABEL: 'fptosi_float_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fptosi_float_i16'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptosi_float_i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'fptosi_float_i16'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef