mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[CostModel][X86] Fix AVX2 v16i16 shift 'splat' costs.
llvm-svn: 291366
This commit is contained in:
parent
269a611dc0
commit
c8cdf126d3
@ -291,6 +291,20 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX2UniformCostTable[] = {
|
||||
// Uniform splats are cheaper for the following instructions.
|
||||
{ ISD::SRL, MVT::v16i16, 1 }, // psrlw.
|
||||
{ ISD::SRA, MVT::v16i16, 1 }, // psraw.
|
||||
};
|
||||
|
||||
if (ST->hasAVX2() &&
|
||||
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
|
||||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry XOPShiftCostTable[] = {
|
||||
// 128bit shifts take 1cy, but right shifts require negation beforehand.
|
||||
{ ISD::SHL, MVT::v16i8, 1 },
|
||||
@ -325,8 +339,7 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry
|
||||
SSE2UniformCostTable[] = {
|
||||
static const CostTblEntry SSE2UniformCostTable[] = {
|
||||
// Uniform splats are cheaper for the following instructions.
|
||||
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
|
||||
{ ISD::SHL, MVT::v32i8, 2 }, // psllw.
|
||||
|
@ -578,9 +578,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
|
||||
; SSE2: Found an estimated cost of 2 for instruction: %shift
|
||||
; SSE41: Found an estimated cost of 2 for instruction: %shift
|
||||
; AVX: Found an estimated cost of 2 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 2 for instruction: %shift
|
||||
; AVX512: Found an estimated cost of 2 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 4 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX512: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
|
||||
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
%shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
ret <16 x i16> %shift
|
||||
}
|
||||
@ -590,10 +591,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
|
||||
; SSE2: Found an estimated cost of 4 for instruction: %shift
|
||||
; SSE41: Found an estimated cost of 4 for instruction: %shift
|
||||
; AVX: Found an estimated cost of 4 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 4 for instruction: %shift
|
||||
; AVX512F: Found an estimated cost of 4 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 2 for instruction: %shift
|
||||
; AVX512F: Found an estimated cost of 2 for instruction: %shift
|
||||
; AVX512BW: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 8 for instruction: %shift
|
||||
; XOPAVX: Found an estimated cost of 8 for instruction: %shift
|
||||
; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
|
||||
%shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
ret <32 x i16> %shift
|
||||
}
|
||||
|
@ -589,9 +589,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
|
||||
; SSE2: Found an estimated cost of 2 for instruction: %shift
|
||||
; SSE41: Found an estimated cost of 2 for instruction: %shift
|
||||
; AVX: Found an estimated cost of 2 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 2 for instruction: %shift
|
||||
; AVX512: Found an estimated cost of 2 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 4 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
; AVX512: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
|
||||
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
|
||||
%shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
ret <16 x i16> %shift
|
||||
}
|
||||
@ -601,10 +602,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
|
||||
; SSE2: Found an estimated cost of 4 for instruction: %shift
|
||||
; SSE41: Found an estimated cost of 4 for instruction: %shift
|
||||
; AVX: Found an estimated cost of 4 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 4 for instruction: %shift
|
||||
; AVX512F: Found an estimated cost of 4 for instruction: %shift
|
||||
; AVX2: Found an estimated cost of 2 for instruction: %shift
|
||||
; AVX512F: Found an estimated cost of 2 for instruction: %shift
|
||||
; AVX512BW: Found an estimated cost of 1 for instruction: %shift
|
||||
; XOP: Found an estimated cost of 8 for instruction: %shift
|
||||
; XOPAVX: Found an estimated cost of 8 for instruction: %shift
|
||||
; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
|
||||
%shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
ret <32 x i16> %shift
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user