1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[CostModel][X86] Fix AVX2 v16i16 shift 'splat' costs.

llvm-svn: 291366
This commit is contained in:
Simon Pilgrim 2017-01-07 22:08:09 +00:00
parent 269a611dc0
commit c8cdf126d3
3 changed files with 31 additions and 14 deletions

View File

@ -291,6 +291,20 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
}
static const CostTblEntry AVX2UniformCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SRL, MVT::v16i16, 1 }, // psrlw.
{ ISD::SRA, MVT::v16i16, 1 }, // psraw.
};
if (ST->hasAVX2() &&
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
if (const auto *Entry =
CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
}
static const CostTblEntry XOPShiftCostTable[] = {
// 128bit shifts take 1cy, but right shifts require negation beforehand.
{ ISD::SHL, MVT::v16i8, 1 },
@ -325,8 +339,7 @@ int X86TTIImpl::getArithmeticInstrCost(
if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry
SSE2UniformCostTable[] = {
static const CostTblEntry SSE2UniformCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
{ ISD::SHL, MVT::v32i8, 2 }, // psllw.

View File

@ -578,9 +578,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; SSE2: Found an estimated cost of 2 for instruction: %shift
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 2 for instruction: %shift
; AVX512: Found an estimated cost of 2 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
@ -590,10 +591,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
; SSE2: Found an estimated cost of 4 for instruction: %shift
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
; AVX512F: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 2 for instruction: %shift
; AVX512F: Found an estimated cost of 2 for instruction: %shift
; AVX512BW: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 8 for instruction: %shift
; XOPAVX: Found an estimated cost of 8 for instruction: %shift
; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <32 x i16> %shift
}

View File

@ -589,9 +589,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; SSE2: Found an estimated cost of 2 for instruction: %shift
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 2 for instruction: %shift
; AVX512: Found an estimated cost of 2 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; AVX512: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
@ -601,10 +602,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
; SSE2: Found an estimated cost of 4 for instruction: %shift
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
; AVX512F: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 2 for instruction: %shift
; AVX512F: Found an estimated cost of 2 for instruction: %shift
; AVX512BW: Found an estimated cost of 1 for instruction: %shift
; XOP: Found an estimated cost of 8 for instruction: %shift
; XOPAVX: Found an estimated cost of 8 for instruction: %shift
; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <32 x i16> %shift
}