1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

[CostModel][X86] teach TTI calculate cost of chain of vector inserts/extracts more precisely and correctly:In each 128-lane, if there is at least one index is demanded and not all indices are demanded...

In each 128-lane, if there is at least one index is demanded and not all
indices are demanded and this 128-lane is not the first 128-lane of the
legalized-vector, then this 128-lane needs a extracti128;
If in each 128-lane, there is at least one index is demanded, this 128-lane
needs a inserti128.

The following cases will help you build a better understanding:
Assume we insert several elements into a v8i32 vector in avx2,
Case#1: inserting into 1th index needs vpinsrd + inserti128
Case#2: inserting into 5th index needs extracti128 + vpinsrd +
inserti128
Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128.

Reviewed By: pengfei, RKSimon

Differential Revision: https://reviews.llvm.org/D89767
This commit is contained in:
Bing1 Yu 2020-10-27 10:34:07 +08:00
parent b58f72c901
commit 450c5e89bb
7 changed files with 86 additions and 62 deletions

View File

@ -3074,8 +3074,32 @@ unsigned X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
Cost +=
BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false);
} else {
unsigned NumSubVecs = LT.second.getSizeInBits() / 128;
Cost += (PowerOf2Ceil(NumSubVecs) - 1) * LT.first;
// In each 128-lane, if at least one index is demanded but not all
// indices are demanded and this 128-lane is not the first 128-lane of
// the legalized-vector, then this 128-lane needs a extracti128; If in
// each 128-lane, there is at least one demanded index, this 128-lane
// needs a inserti128.
// The following cases will help you build a better understanding:
// Assume we insert several elements into a v8i32 vector in avx2,
// Case#1: inserting into 1th index needs vpinsrd + inserti128.
// Case#2: inserting into 5th index needs extracti128 + vpinsrd +
// inserti128.
// Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128.
unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * LT.first;
unsigned NumElts = LT.second.getVectorNumElements() * LT.first;
APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts);
unsigned Scale = NumElts / Num128Lanes;
// We iterate each 128-lane, and check if we need a
// extracti128/inserti128 for this 128-lane.
for (unsigned I = 0; I < NumElts; I += Scale) {
APInt Mask = WidenedDemandedElts.getBitsSet(NumElts, I, I + Scale);
APInt MaskedDE = Mask & WidenedDemandedElts;
unsigned Population = MaskedDE.countPopulation();
Cost += (Population > 0 && Population != Scale &&
I % LT.second.getVectorNumElements() != 0);
Cost += Population > 0;
}
Cost += DemandedElts.countPopulation();
// For vXf32 cases, insertion into the 0'th index in each v4f32

View File

@ -707,8 +707,8 @@ define i32 @frem(i32 %arg) {
; AVX-LABEL: 'frem'
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F32 = frem <8 x float> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16F32 = frem <16 x float> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32 = frem <16 x float> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef
@ -718,8 +718,8 @@ define i32 @frem(i32 %arg) {
; AVX512-LABEL: 'frem'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F32 = frem <8 x float> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16F32 = frem <16 x float> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = frem <16 x float> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef
@ -1093,8 +1093,8 @@ define i32 @fma(i32 %arg) {
; AVX-LABEL: 'fma'
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)

View File

@ -28,15 +28,15 @@ define i32 @fptosi_double_i64(i32 %arg) {
; AVX-LABEL: 'fptosi_double_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptosi_double_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'fptosi_double_i64'
@ -181,17 +181,17 @@ define i32 @fptosi_float_i64(i32 %arg) {
; AVX-LABEL: 'fptosi_float_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptosi_float_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'fptosi_float_i64'

View File

@ -28,15 +28,15 @@ define i32 @fptoui_double_i64(i32 %arg) {
; AVX-LABEL: 'fptoui_double_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptoui_double_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'fptoui_double_i64'
@ -195,17 +195,17 @@ define i32 @fptoui_float_i64(i32 %arg) {
; AVX-LABEL: 'fptoui_float_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptoui_float_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'fptoui_float_i64'

View File

@ -129,8 +129,8 @@ define i32 @loads(i32 %arg) {
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'loads'
@ -149,8 +149,8 @@ define i32 @loads(i32 %arg) {
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
load i8, i8* undef, align 4

View File

@ -80,12 +80,12 @@ define i32 @masked_load() {
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 290 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 292 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
@ -107,12 +107,12 @@ define i32 @masked_load() {
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 307 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 308 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
@ -798,24 +798,24 @@ define i32 @masked_expandload() {
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
@ -825,24 +825,24 @@ define i32 @masked_expandload() {
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0

View File

@ -253,8 +253,8 @@ define i32 @sitofp_i64_float() {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'sitofp_i64_float'