mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[CostModel][X86] Updated vXi8 and vXi16 Reverse/Alternate shuffle costs
Actual codegen is much better than the extract+insert patterns that was assumed. llvm-svn: 290962
This commit is contained in:
parent
3b5ff305e9
commit
f1fa399ee0
@ -645,7 +645,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
{ TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
|
||||
{ TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
|
||||
|
||||
{ TTI::SK_Alternate, MVT::v16i16, 1 } // vpblendw
|
||||
{ TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
|
||||
{ TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
|
||||
};
|
||||
|
||||
if (ST->hasAVX2())
|
||||
@ -666,11 +667,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
{ TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
|
||||
{ TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
|
||||
{ TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
|
||||
|
||||
{ TTI::SK_Alternate, MVT::v16i16, 5 }, // 2*vextractf128 + 2*vpblendw
|
||||
// + vinsertf128
|
||||
{ TTI::SK_Alternate, MVT::v32i8, 9 } // 2*vextractf128 + 4*vpshufb
|
||||
// + 2*vpor + vinsertf128
|
||||
{ TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
|
||||
{ TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
|
||||
};
|
||||
|
||||
if (ST->hasAVX())
|
||||
@ -683,7 +681,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
{ TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
|
||||
{ TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
|
||||
{ TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
|
||||
{ TTI::SK_Alternate, MVT::v16i8, 3 } // 2*pshufb + por
|
||||
{ TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
|
||||
};
|
||||
|
||||
if (ST->hasSSE41())
|
||||
@ -713,8 +711,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
{ TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
|
||||
{ TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
|
||||
{ TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
|
||||
{ TTI::SK_Alternate, MVT::v8i16, 8 }, // 4*pextrw + 4*pinsrw.
|
||||
{ TTI::SK_Alternate, MVT::v16i8, 48 }, // 8*(pinsrw + pextrw + and +movb + movzb + or)
|
||||
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
|
||||
{ TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
|
||||
};
|
||||
|
||||
if (ST->hasSSE2())
|
||||
@ -722,8 +720,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSE1ShuffleTbl[] = {
|
||||
{ TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
|
||||
{ TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
|
||||
{ TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
|
||||
{ TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
|
||||
};
|
||||
|
||||
if (ST->hasSSE1())
|
||||
|
@ -207,7 +207,7 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16':
|
||||
; SSE2: Cost Model: {{.*}} 8 for instruction: %1 = shufflevector
|
||||
; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
@ -219,7 +219,7 @@ define <8 x i16> @test_v8i16_2(<8 x i16> %a, <8 x i16> %b) {
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16_2':
|
||||
; SSE2: Cost Model: {{.*}} 8 for instruction: %1 = shufflevector
|
||||
; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
@ -280,11 +280,11 @@ define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) {
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8':
|
||||
; SSE2: Cost Model: {{.*}} 48 for instruction: %1 = shufflevector
|
||||
; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; AVX2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
|
||||
|
||||
define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) {
|
||||
@ -292,11 +292,11 @@ define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) {
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8_2':
|
||||
; SSE2: Cost Model: {{.*}} 48 for instruction: %1 = shufflevector
|
||||
; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; AVX2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
|
||||
|
||||
define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
|
||||
@ -304,10 +304,10 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
|
||||
ret <16 x i16> %1
|
||||
}
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16':
|
||||
; SSE2: Cost Model: {{.*}} 16 for instruction: %1 = shufflevector
|
||||
; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
|
||||
; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 5 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
|
||||
|
||||
@ -316,10 +316,10 @@ define <16 x i16> @test_v16i16_2(<16 x i16> %a, <16 x i16> %b) {
|
||||
ret <16 x i16> %1
|
||||
}
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16_2':
|
||||
; SSE2: Cost Model: {{.*}} 16 for instruction: %1 = shufflevector
|
||||
; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
|
||||
; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 5 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
|
||||
define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) {
|
||||
@ -327,11 +327,11 @@ define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) {
|
||||
ret <32 x i8> %1
|
||||
}
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8':
|
||||
; SSE2: Cost Model: {{.*}} 96 for instruction: %1 = shufflevector
|
||||
; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
|
||||
; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector
|
||||
; AVX2: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
|
||||
|
||||
define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) {
|
||||
@ -339,9 +339,9 @@ define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) {
|
||||
ret <32 x i8> %1
|
||||
}
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8_2':
|
||||
; SSE2: Cost Model: {{.*}} 96 for instruction: %1 = shufflevector
|
||||
; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
|
||||
; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector
|
||||
; AVX2: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector
|
||||
; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector
|
||||
; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector
|
||||
; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user