mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[X86] Lower the cost of v2i32->v2f64 sint_to_fp under vector widening legalization.
I don't really understand the costs we're using for fp_to_sint, but prior to widening legalization we used 20 as the cost for this via the v2i64->v2f64 entry. That number seems better than the 40 we got with widening legalization. So now we need either a v2i32->v2f64 entry or a v4i32->v2f64 entry depending on whether AVX is enabled or not since we skip the first SSE2 table look up under AVX. llvm-svn: 369628
This commit is contained in:
parent
757587cb21
commit
9c998ad3e9
@ -1568,6 +1568,11 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
|
||||
};
|
||||
|
||||
static const TypeConversionCostTblEntry SSE2ConversionTblWide[] = {
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 },
|
||||
};
|
||||
|
||||
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
|
||||
// These are somewhat magic numbers justified by looking at the output of
|
||||
// Intel's IACA, running some kernels and making sure when we take
|
||||
@ -1633,6 +1638,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
|
||||
std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
|
||||
|
||||
if (ST->hasSSE2() && !ST->hasAVX() &&
|
||||
ExperimentalVectorWideningLegalization) {
|
||||
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTblWide, ISD,
|
||||
LTDest.second, LTSrc.second))
|
||||
return LTSrc.first * Entry->Cost;
|
||||
}
|
||||
|
||||
if (ST->hasSSE2() && !ST->hasAVX()) {
|
||||
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
|
||||
LTDest.second, LTSrc.second))
|
||||
@ -1705,6 +1717,12 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
return Entry->Cost;
|
||||
}
|
||||
|
||||
if (ST->hasSSE2() && ExperimentalVectorWideningLegalization) {
|
||||
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTblWide, ISD,
|
||||
SimpleDstTy, SimpleSrcTy))
|
||||
return Entry->Cost;
|
||||
}
|
||||
|
||||
if (ST->hasSSE2()) {
|
||||
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
|
||||
SimpleDstTy, SimpleSrcTy))
|
||||
|
@ -314,13 +314,13 @@ define i32 @masks4(<4 x i1> %in) {
|
||||
define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
|
||||
; SSE-LABEL: 'sitofp4'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX-LABEL: 'sitofp4'
|
||||
|
@ -85,28 +85,28 @@ define i32 @sitofp_i16_double() {
|
||||
define i32 @sitofp_i32_double() {
|
||||
; SSE-LABEL: 'sitofp_i32_double'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX-LABEL: 'sitofp_i32_double'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; AVX512-LABEL: 'sitofp_i32_double'
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; BTVER2-LABEL: 'sitofp_i32_double'
|
||||
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double
|
||||
; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||
; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
|
||||
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
|
||||
; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
|
||||
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
|
Loading…
Reference in New Issue
Block a user