1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

[x86] fix cost of SINT_TO_FP for i32 --> float (PR21356, PR28434)

This is "cvtdq2ps" which does not appear to be particularly slow on any CPU
according to Agner's tables. Choosing "5" as a cost here as suggested in:
https://llvm.org/bugs/show_bug.cgi?id=21356
...but it seems very conservative given that the instruction is fully pipelined,
and I think these costs are supposed to model throughput.

Note that related costs are also most likely too high, but this fixes PR21356
and partly fixes PR28434.

llvm-svn: 274658
This commit is contained in:
Sanjay Patel 2016-07-06 19:15:54 +00:00
parent c71eae70be
commit cd5177a158
2 changed files with 7 additions and 7 deletions

View File

@ -752,7 +752,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },

View File

@ -341,7 +341,7 @@ define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) {
define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
; SSE2-LABEL: sitofpv4i8v4float
; SSE2: cost of 15 {{.*}} sitofp
; SSE2: cost of 5 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv4i8v4float
; AVX1: cost of 3 {{.*}} sitofp
@ -421,7 +421,7 @@ define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) {
define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
; SSE2-LABEL: sitofpv4i16v4float
; SSE2: cost of 15 {{.*}} sitofp
; SSE2: cost of 5 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv4i16v4float
; AVX1: cost of 3 {{.*}} sitofp
@ -501,7 +501,7 @@ define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) {
define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
; SSE2-LABEL: sitofpv4i32v4float
; SSE2: cost of 15 {{.*}} sitofp
; SSE2: cost of 5 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv4i32v4float
; AVX1: cost of 1 {{.*}} sitofp
@ -517,7 +517,7 @@ define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
; SSE2-LABEL: sitofpv8i32v8float
; SSE2: cost of 30 {{.*}} sitofp
; SSE2: cost of 10 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv8i32v8float
; AVX1: cost of 1 {{.*}} sitofp
@ -533,7 +533,7 @@ define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
; SSE2-LABEL: sitofpv16i32v16float
; SSE2: cost of 60 {{.*}} sitofp
; SSE2: cost of 20 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i32v16float
; AVX1: cost of 3 {{.*}} sitofp
@ -549,7 +549,7 @@ define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
; SSE2-LABEL: sitofpv32i32v32float
; SSE2: cost of 120 {{.*}} sitofp
; SSE2: cost of 40 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv32i32v32float
; AVX1: cost of 7 {{.*}} sitofp