mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[x86] fix cost of SINT_TO_FP for i32 --> float (PR21356, PR28434)
This is "cvtdq2ps" which does not appear to be particularly slow on any CPU according to Agner's tables. Choosing "5" as a cost here as suggested in: https://llvm.org/bugs/show_bug.cgi?id=21356 ...but it seems very conservative given that the instruction is fully pipelined, and I think these costs are supposed to model throughput. Note that related costs are also most likely too high, but this fixes PR21356 and partly fixes PR28434. llvm-svn: 274658
This commit is contained in:
parent
c71eae70be
commit
cd5177a158
@ -752,7 +752,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
|
||||
|
@ -341,7 +341,7 @@ define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) {
|
||||
|
||||
define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
|
||||
; SSE2-LABEL: sitofpv4i8v4float
|
||||
; SSE2: cost of 15 {{.*}} sitofp
|
||||
; SSE2: cost of 5 {{.*}} sitofp
|
||||
;
|
||||
; AVX1-LABEL: sitofpv4i8v4float
|
||||
; AVX1: cost of 3 {{.*}} sitofp
|
||||
@ -421,7 +421,7 @@ define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) {
|
||||
|
||||
define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
|
||||
; SSE2-LABEL: sitofpv4i16v4float
|
||||
; SSE2: cost of 15 {{.*}} sitofp
|
||||
; SSE2: cost of 5 {{.*}} sitofp
|
||||
;
|
||||
; AVX1-LABEL: sitofpv4i16v4float
|
||||
; AVX1: cost of 3 {{.*}} sitofp
|
||||
@ -501,7 +501,7 @@ define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) {
|
||||
|
||||
define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
|
||||
; SSE2-LABEL: sitofpv4i32v4float
|
||||
; SSE2: cost of 15 {{.*}} sitofp
|
||||
; SSE2: cost of 5 {{.*}} sitofp
|
||||
;
|
||||
; AVX1-LABEL: sitofpv4i32v4float
|
||||
; AVX1: cost of 1 {{.*}} sitofp
|
||||
@ -517,7 +517,7 @@ define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
|
||||
|
||||
define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
|
||||
; SSE2-LABEL: sitofpv8i32v8float
|
||||
; SSE2: cost of 30 {{.*}} sitofp
|
||||
; SSE2: cost of 10 {{.*}} sitofp
|
||||
;
|
||||
; AVX1-LABEL: sitofpv8i32v8float
|
||||
; AVX1: cost of 1 {{.*}} sitofp
|
||||
@ -533,7 +533,7 @@ define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
|
||||
|
||||
define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
|
||||
; SSE2-LABEL: sitofpv16i32v16float
|
||||
; SSE2: cost of 60 {{.*}} sitofp
|
||||
; SSE2: cost of 20 {{.*}} sitofp
|
||||
;
|
||||
; AVX1-LABEL: sitofpv16i32v16float
|
||||
; AVX1: cost of 3 {{.*}} sitofp
|
||||
@ -549,7 +549,7 @@ define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
|
||||
|
||||
define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
|
||||
; SSE2-LABEL: sitofpv32i32v32float
|
||||
; SSE2: cost of 120 {{.*}} sitofp
|
||||
; SSE2: cost of 40 {{.*}} sitofp
|
||||
;
|
||||
; AVX1-LABEL: sitofpv32i32v32float
|
||||
; AVX1: cost of 7 {{.*}} sitofp
|
||||
|
Loading…
Reference in New Issue
Block a user