From c8cdf126d3dd0a4799e88551e83c76a0eda6259d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 7 Jan 2017 22:08:09 +0000 Subject: [PATCH] [CostModel][X86] Fix AVX2 v16i16 shift 'splat' costs. llvm-svn: 291366 --- lib/Target/X86/X86TargetTransformInfo.cpp | 17 +++++++++++++++-- test/Analysis/CostModel/X86/vshift-ashr-cost.ll | 14 ++++++++------ test/Analysis/CostModel/X86/vshift-lshr-cost.ll | 14 ++++++++------ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 829b47b7aa2..30b20555257 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -291,6 +291,20 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; } + static const CostTblEntry AVX2UniformCostTable[] = { + // Uniform splats are cheaper for the following instructions. + { ISD::SRL, MVT::v16i16, 1 }, // psrlw. + { ISD::SRA, MVT::v16i16, 1 }, // psraw. + }; + + if (ST->hasAVX2() && + ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || + (Op2Info == TargetTransformInfo::OK_UniformValue))) { + if (const auto *Entry = + CostTableLookup(AVX2UniformCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + static const CostTblEntry XOPShiftCostTable[] = { // 128bit shifts take 1cy, but right shifts require negation beforehand. { ISD::SHL, MVT::v16i8, 1 }, @@ -325,8 +339,7 @@ int X86TTIImpl::getArithmeticInstrCost( if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry - SSE2UniformCostTable[] = { + static const CostTblEntry SSE2UniformCostTable[] = { // Uniform splats are cheaper for the following instructions. { ISD::SHL, MVT::v16i8, 1 }, // psllw. { ISD::SHL, MVT::v32i8, 2 }, // psllw. diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index ab1eb730109..e75b5dc3ddc 100644 --- a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -578,9 +578,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift -; AVX2: Found an estimated cost of 2 for instruction: %shift -; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <16 x i16> %a, ret <16 x i16> %shift } @@ -590,10 +591,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift -; AVX2: Found an estimated cost of 4 for instruction: %shift -; AVX512F: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = ashr <32 x i16> %a, ret <32 x i16> %shift } diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index fea727147ff..c1c674875c9 100644 --- a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -589,9 +589,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift -; AVX2: Found an estimated cost of 2 for instruction: %shift -; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <16 x i16> %a, ret <16 x i16> %shift } @@ -601,10 +602,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift -; AVX2: Found an estimated cost of 4 for instruction: %shift -; AVX512F: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <32 x i16> %a, ret <32 x i16> %shift }