mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[CostModel][X86] Fixed AVX1/AVX512 sdiv/udiv uniformconst costs for 256/512 bit integer vectors
We weren't checking for uniform const costs before the general cost, resulting in very high estimates. llvm-svn: 284755
This commit is contained in:
parent
4cd983fcee
commit
6773ac2510
@ -140,6 +140,30 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
return Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX512BWUniformConstCostTable[] = {
|
||||
{ ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
|
||||
{ ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
|
||||
};
|
||||
|
||||
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
|
||||
ST->hasBWI()) {
|
||||
if (const auto *Entry = CostTableLookup(AVX512BWUniformConstCostTable, ISD,
|
||||
LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX512UniformConstCostTable[] = {
|
||||
{ ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
|
||||
{ ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
|
||||
};
|
||||
|
||||
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
|
||||
ST->hasAVX512()) {
|
||||
if (const auto *Entry = CostTableLookup(AVX512UniformConstCostTable, ISD,
|
||||
LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX2UniformConstCostTable[] = {
|
||||
{ ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
|
||||
|
||||
@ -156,6 +180,30 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry SSE2UniformConstCostTable[] = {
|
||||
{ ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence
|
||||
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
|
||||
{ ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence
|
||||
{ ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
|
||||
{ ISD::SDIV, MVT::v8i32, 38 }, // pmuludq sequence
|
||||
{ ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
|
||||
{ ISD::UDIV, MVT::v8i32, 30 }, // pmuludq sequence
|
||||
{ ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
|
||||
};
|
||||
|
||||
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
|
||||
ST->hasSSE2()) {
|
||||
// pmuldq sequence.
|
||||
if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX())
|
||||
return LT.first * 30;
|
||||
if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
|
||||
return LT.first * 15;
|
||||
|
||||
if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD,
|
||||
LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX512BWCostTable[] = {
|
||||
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
|
||||
{ ISD::SDIV, MVT::v64i8, 64*20 },
|
||||
@ -291,15 +339,6 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry
|
||||
SSE2UniformConstCostTable[] = {
|
||||
// Constant splats are cheaper for the following instructions.
|
||||
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
|
||||
{ ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
|
||||
{ ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
|
||||
{ ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
|
||||
};
|
||||
|
||||
static const CostTblEntry
|
||||
SSE2UniformCostTable[] = {
|
||||
// Uniform splats are cheaper for the following instructions.
|
||||
@ -334,14 +373,6 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
if (ST->hasSSE2() &&
|
||||
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
|
||||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
|
||||
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue) {
|
||||
// pmuldq sequence.
|
||||
if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
|
||||
return LT.first * 15;
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
@ -113,8 +113,8 @@ define i32 @udiv() {
|
||||
%V64i8 = udiv <64 x i8> undef, undef
|
||||
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
; CHECK-LABEL: 'sdiv_uniformconst'
|
||||
define i32 @sdiv_uniformconst() {
|
||||
; CHECK: cost of 1 {{.*}} %I64 = sdiv
|
||||
@ -139,17 +139,16 @@ define i32 @sdiv_uniformconst() {
|
||||
; SSE2: cost of 38 {{.*}} %V8i32 = sdiv
|
||||
; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv
|
||||
; SSE42: cost of 30 {{.*}} %V8i32 = sdiv
|
||||
; AVX1: cost of 160 {{.*}} %V8i32 = sdiv
|
||||
; AVX1: cost of 30 {{.*}} %V8i32 = sdiv
|
||||
; AVX2: cost of 15 {{.*}} %V8i32 = sdiv
|
||||
; AVX512: cost of 15 {{.*}} %V8i32 = sdiv
|
||||
%V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; SSE2: cost of 76 {{.*}} %V16i32 = sdiv
|
||||
; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv
|
||||
; SSE42: cost of 60 {{.*}} %V16i32 = sdiv
|
||||
; AVX1: cost of 320 {{.*}} %V16i32 = sdiv
|
||||
; AVX1: cost of 60 {{.*}} %V16i32 = sdiv
|
||||
; AVX2: cost of 30 {{.*}} %V16i32 = sdiv
|
||||
; AVX512F: cost of 48 {{.*}} %V16i32 = sdiv
|
||||
; AVX512BW: cost of 320 {{.*}} %V16i32 = sdiv
|
||||
; AVX512: cost of 15 {{.*}} %V16i32 = sdiv
|
||||
%V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
|
||||
; CHECK: cost of 1 {{.*}} %I16 = sdiv
|
||||
@ -158,15 +157,15 @@ define i32 @sdiv_uniformconst() {
|
||||
; AVX: cost of 6 {{.*}} %V8i16 = sdiv
|
||||
%V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
|
||||
; SSE: cost of 12 {{.*}} %V16i16 = sdiv
|
||||
; AVX1: cost of 320 {{.*}} %V16i16 = sdiv
|
||||
; AVX1: cost of 12 {{.*}} %V16i16 = sdiv
|
||||
; AVX2: cost of 6 {{.*}} %V16i16 = sdiv
|
||||
; AVX512: cost of 6 {{.*}} %V16i16 = sdiv
|
||||
%V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
|
||||
; SSE: cost of 24 {{.*}} %V32i16 = sdiv
|
||||
; AVX1: cost of 640 {{.*}} %V32i16 = sdiv
|
||||
; AVX1: cost of 24 {{.*}} %V32i16 = sdiv
|
||||
; AVX2: cost of 12 {{.*}} %V32i16 = sdiv
|
||||
; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv
|
||||
; AVX512BW: cost of 640 {{.*}} %V32i16 = sdiv
|
||||
; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv
|
||||
%V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
|
||||
|
||||
; CHECK: cost of 1 {{.*}} %I8 = sdiv
|
||||
@ -182,8 +181,8 @@ define i32 @sdiv_uniformconst() {
|
||||
%V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
|
||||
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
; CHECK-LABEL: 'udiv_uniformconst'
|
||||
define i32 @udiv_uniformconst() {
|
||||
; CHECK: cost of 1 {{.*}} %I64 = udiv
|
||||
@ -204,15 +203,14 @@ define i32 @udiv_uniformconst() {
|
||||
; AVX: cost of 15 {{.*}} %V4i32 = udiv
|
||||
%V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
|
||||
; SSE: cost of 30 {{.*}} %V8i32 = udiv
|
||||
; AVX1: cost of 160 {{.*}} %V8i32 = udiv
|
||||
; AVX1: cost of 30 {{.*}} %V8i32 = udiv
|
||||
; AVX2: cost of 15 {{.*}} %V8i32 = udiv
|
||||
; AVX512: cost of 15 {{.*}} %V8i32 = udiv
|
||||
%V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
; SSE: cost of 60 {{.*}} %V16i32 = udiv
|
||||
; AVX1: cost of 320 {{.*}} %V16i32 = udiv
|
||||
; AVX1: cost of 60 {{.*}} %V16i32 = udiv
|
||||
; AVX2: cost of 30 {{.*}} %V16i32 = udiv
|
||||
; AVX512F: cost of 48 {{.*}} %V16i32 = udiv
|
||||
; AVX512BW: cost of 320 {{.*}} %V16i32 = udiv
|
||||
; AVX512: cost of 15 {{.*}} %V16i32 = udiv
|
||||
%V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
|
||||
; CHECK: cost of 1 {{.*}} %I16 = udiv
|
||||
@ -221,15 +219,15 @@ define i32 @udiv_uniformconst() {
|
||||
; AVX: cost of 6 {{.*}} %V8i16 = udiv
|
||||
%V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
|
||||
; SSE: cost of 12 {{.*}} %V16i16 = udiv
|
||||
; AVX1: cost of 320 {{.*}} %V16i16 = udiv
|
||||
; AVX1: cost of 12 {{.*}} %V16i16 = udiv
|
||||
; AVX2: cost of 6 {{.*}} %V16i16 = udiv
|
||||
; AVX512: cost of 6 {{.*}} %V16i16 = udiv
|
||||
%V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
|
||||
; SSE: cost of 24 {{.*}} %V32i16 = udiv
|
||||
; AVX1: cost of 640 {{.*}} %V32i16 = udiv
|
||||
; AVX1: cost of 24 {{.*}} %V32i16 = udiv
|
||||
; AVX2: cost of 12 {{.*}} %V32i16 = udiv
|
||||
; AVX512F: cost of 12 {{.*}} %V32i16 = udiv
|
||||
; AVX512BW: cost of 640 {{.*}} %V32i16 = udiv
|
||||
; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv
|
||||
%V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
|
||||
|
||||
; CHECK: cost of 1 {{.*}} %I8 = udiv
|
||||
@ -245,8 +243,8 @@ define i32 @udiv_uniformconst() {
|
||||
%V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
|
||||
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
; CHECK-LABEL: 'sdiv_uniformconstpow2'
|
||||
define i32 @sdiv_uniformconstpow2() {
|
||||
; CHECK: cost of 1 {{.*}} %I64 = sdiv
|
||||
@ -271,17 +269,16 @@ define i32 @sdiv_uniformconstpow2() {
|
||||
; SSE2: cost of 38 {{.*}} %V8i32 = sdiv
|
||||
; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv
|
||||
; SSE42: cost of 30 {{.*}} %V8i32 = sdiv
|
||||
; AVX1: cost of 160 {{.*}} %V8i32 = sdiv
|
||||
; AVX1: cost of 30 {{.*}} %V8i32 = sdiv
|
||||
; AVX2: cost of 15 {{.*}} %V8i32 = sdiv
|
||||
; AVX512: cost of 15 {{.*}} %V8i32 = sdiv
|
||||
%V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||
; SSE2: cost of 76 {{.*}} %V16i32 = sdiv
|
||||
; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv
|
||||
; SSE42: cost of 60 {{.*}} %V16i32 = sdiv
|
||||
; AVX1: cost of 320 {{.*}} %V16i32 = sdiv
|
||||
; AVX1: cost of 60 {{.*}} %V16i32 = sdiv
|
||||
; AVX2: cost of 30 {{.*}} %V16i32 = sdiv
|
||||
; AVX512F: cost of 48 {{.*}} %V16i32 = sdiv
|
||||
; AVX512BW: cost of 320 {{.*}} %V16i32 = sdiv
|
||||
; AVX512: cost of 15 {{.*}} %V16i32 = sdiv
|
||||
%V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||
|
||||
; CHECK: cost of 1 {{.*}} %I16 = sdiv
|
||||
@ -290,15 +287,15 @@ define i32 @sdiv_uniformconstpow2() {
|
||||
; AVX: cost of 6 {{.*}} %V8i16 = sdiv
|
||||
%V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
|
||||
; SSE: cost of 12 {{.*}} %V16i16 = sdiv
|
||||
; AVX1: cost of 320 {{.*}} %V16i16 = sdiv
|
||||
; AVX1: cost of 12 {{.*}} %V16i16 = sdiv
|
||||
; AVX2: cost of 6 {{.*}} %V16i16 = sdiv
|
||||
; AVX512: cost of 6 {{.*}} %V16i16 = sdiv
|
||||
%V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
|
||||
; SSE: cost of 24 {{.*}} %V32i16 = sdiv
|
||||
; AVX1: cost of 640 {{.*}} %V32i16 = sdiv
|
||||
; AVX1: cost of 24 {{.*}} %V32i16 = sdiv
|
||||
; AVX2: cost of 12 {{.*}} %V32i16 = sdiv
|
||||
; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv
|
||||
; AVX512BW: cost of 640 {{.*}} %V32i16 = sdiv
|
||||
; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv
|
||||
%V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
|
||||
|
||||
; CHECK: cost of 1 {{.*}} %I8 = sdiv
|
||||
@ -315,7 +312,7 @@ define i32 @sdiv_uniformconstpow2() {
|
||||
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: 'udiv_uniformconstpow2'
|
||||
define i32 @udiv_uniformconstpow2() {
|
||||
; CHECK: cost of 1 {{.*}} %I64 = udiv
|
||||
@ -336,15 +333,14 @@ define i32 @udiv_uniformconstpow2() {
|
||||
; AVX: cost of 15 {{.*}} %V4i32 = udiv
|
||||
%V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
|
||||
; SSE: cost of 30 {{.*}} %V8i32 = udiv
|
||||
; AVX1: cost of 160 {{.*}} %V8i32 = udiv
|
||||
; AVX1: cost of 30 {{.*}} %V8i32 = udiv
|
||||
; AVX2: cost of 15 {{.*}} %V8i32 = udiv
|
||||
; AVX512: cost of 15 {{.*}} %V8i32 = udiv
|
||||
%V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||
; SSE: cost of 60 {{.*}} %V16i32 = udiv
|
||||
; AVX1: cost of 320 {{.*}} %V16i32 = udiv
|
||||
; AVX1: cost of 60 {{.*}} %V16i32 = udiv
|
||||
; AVX2: cost of 30 {{.*}} %V16i32 = udiv
|
||||
; AVX512F: cost of 48 {{.*}} %V16i32 = udiv
|
||||
; AVX512BW: cost of 320 {{.*}} %V16i32 = udiv
|
||||
; AVX512: cost of 15 {{.*}} %V16i32 = udiv
|
||||
%V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||
|
||||
; CHECK: cost of 1 {{.*}} %I16 = udiv
|
||||
@ -353,15 +349,15 @@ define i32 @udiv_uniformconstpow2() {
|
||||
; AVX: cost of 6 {{.*}} %V8i16 = udiv
|
||||
%V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
|
||||
; SSE: cost of 12 {{.*}} %V16i16 = udiv
|
||||
; AVX1: cost of 320 {{.*}} %V16i16 = udiv
|
||||
; AVX1: cost of 12 {{.*}} %V16i16 = udiv
|
||||
; AVX2: cost of 6 {{.*}} %V16i16 = udiv
|
||||
; AVX512: cost of 6 {{.*}} %V16i16 = udiv
|
||||
%V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
|
||||
; SSE: cost of 24 {{.*}} %V32i16 = udiv
|
||||
; AVX1: cost of 640 {{.*}} %V32i16 = udiv
|
||||
; AVX1: cost of 24 {{.*}} %V32i16 = udiv
|
||||
; AVX2: cost of 12 {{.*}} %V32i16 = udiv
|
||||
; AVX512F: cost of 12 {{.*}} %V32i16 = udiv
|
||||
; AVX512BW: cost of 640 {{.*}} %V32i16 = udiv
|
||||
; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv
|
||||
%V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
|
||||
|
||||
; CHECK: cost of 1 {{.*}} %I8 = udiv
|
||||
@ -377,4 +373,4 @@ define i32 @udiv_uniformconstpow2() {
|
||||
%V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
|
||||
|
||||
ret i32 undef
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user