1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[AArch64] Add worst case shuffle costs

This adds some missing single source shuffle costs for AArch64, of i16
and i8 vectors. v4i16 are the same as v4i32 with a worse case cost of 3
coming from the perfect shuffle tables. The larger vector sizes expand
into a constant pool, plus a load (and adrp) and a tbl. I arbitrarily
chose 8 for the cost to be expensive but not too expensive.

Differential Revision: https://reviews.llvm.org/D106241
This commit is contained in:
David Green 2021-07-23 09:01:58 +01:00
parent 865c54f488
commit f41dff2733
7 changed files with 82 additions and 75 deletions

View File

@ -2124,13 +2124,20 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{ TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
{ TTI::SK_Select, MVT::v2f64, 1 }, // mov.
// PermuteSingleSrc shuffle kinds.
// TODO: handle vXi8/vXi16.
{ TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
{ TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
{ TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
{ TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
{ TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
{ TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
{ TTI::SK_PermuteSingleSrc, MVT::v4i16, 3 }, // perfectshuffle worst case.
{ TTI::SK_PermuteSingleSrc, MVT::v4f16, 3 }, // perfectshuffle worst case.
{ TTI::SK_PermuteSingleSrc, MVT::v4bf16, 3 }, // perfectshuffle worst case.
{ TTI::SK_PermuteSingleSrc, MVT::v8i16, 8 }, // constpool + load + tbl
{ TTI::SK_PermuteSingleSrc, MVT::v8f16, 8 }, // constpool + load + tbl
{ TTI::SK_PermuteSingleSrc, MVT::v8bf16, 8 }, // constpool + load + tbl
{ TTI::SK_PermuteSingleSrc, MVT::v8i8, 8 }, // constpool + load + tbl
{ TTI::SK_PermuteSingleSrc, MVT::v16i8, 8 }, // constpool + load + tbl
// Reverse can be lowered with `rev`.
{ TTI::SK_Reverse, MVT::v2i32, 1 }, // mov.
{ TTI::SK_Reverse, MVT::v4i32, 2 }, // REV64; EXT

View File

@ -12,7 +12,7 @@ define void @reduce() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 362 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)

View File

@ -4,16 +4,16 @@
define void @reduce_umin() {
; CHECK-LABEL: 'reduce_umin'
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
@ -43,16 +43,16 @@ define void @reduce_umin() {
define void @reduce_umax() {
; CHECK-LABEL: 'reduce_umax'
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
@ -82,16 +82,16 @@ define void @reduce_umax() {
define void @reduce_smin() {
; CHECK-LABEL: 'reduce_smin'
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
@ -121,16 +121,16 @@ define void @reduce_smin() {
define void @reduce_smax() {
; CHECK-LABEL: 'reduce_smax'
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
@ -159,10 +159,10 @@ define void @reduce_smax() {
define void @reduce_fmin() {
; CHECK-LABEL: 'reduce_fmin'
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
@ -184,10 +184,10 @@ define void @reduce_fmin() {
define void @reduce_fmax() {
; CHECK-LABEL: 'reduce_fmax'
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)

View File

@ -12,7 +12,7 @@ define void @reduce() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 362 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)

View File

@ -5,14 +5,14 @@ define void @reduce() {
; CHECK-LABEL: 'reduce'
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 455 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 637 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1001 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 309 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 673 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)

View File

@ -4,12 +4,12 @@
define void @shuffle() {
; CHECK-LABEL: 'shuffle'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>

View File

@ -48,7 +48,7 @@ define i32 @add.i32.v4i32(<4 x i32> %v) {
define i8 @umin.i8.v8i8(<8 x i8> %v) {
; COST-LABEL: 'umin.i8.v8i8'
; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
;
%r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
@ -57,7 +57,7 @@ define i8 @umin.i8.v8i8(<8 x i8> %v) {
define i8 @umin.i8.v16i8(<16 x i8> %v) {
; COST-LABEL: 'umin.i8.v16i8'
; COST-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
;
%r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
@ -66,7 +66,7 @@ define i8 @umin.i8.v16i8(<16 x i8> %v) {
define i16 @umin.i16.v4i16(<4 x i16> %v) {
; COST-LABEL: 'umin.i16.v4i16'
; COST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
;
%r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
@ -75,7 +75,7 @@ define i16 @umin.i16.v4i16(<4 x i16> %v) {
define i16 @umin.i16.v8i16(<8 x i16> %v) {
; COST-LABEL: 'umin.i16.v8i16'
; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
;
%r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
@ -93,7 +93,7 @@ define i32 @umin.i32.v4i32(<4 x i32> %v) {
define i8 @umax.i8.v8i8(<8 x i8> %v) {
; COST-LABEL: 'umax.i8.v8i8'
; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
;
%r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
@ -102,7 +102,7 @@ define i8 @umax.i8.v8i8(<8 x i8> %v) {
define i8 @umax.i8.v16i8(<16 x i8> %v) {
; COST-LABEL: 'umax.i8.v16i8'
; COST-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
;
%r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
@ -111,7 +111,7 @@ define i8 @umax.i8.v16i8(<16 x i8> %v) {
define i16 @umax.i16.v4i16(<4 x i16> %v) {
; COST-LABEL: 'umax.i16.v4i16'
; COST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
;
%r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
@ -120,7 +120,7 @@ define i16 @umax.i16.v4i16(<4 x i16> %v) {
define i16 @umax.i16.v8i16(<8 x i16> %v) {
; COST-LABEL: 'umax.i16.v8i16'
; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
;
%r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
@ -138,7 +138,7 @@ define i32 @umax.i32.v4i32(<4 x i32> %v) {
define i8 @smin.i8.v8i8(<8 x i8> %v) {
; COST-LABEL: 'smin.i8.v8i8'
; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
;
%r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
@ -147,7 +147,7 @@ define i8 @smin.i8.v8i8(<8 x i8> %v) {
define i8 @smin.i8.v16i8(<16 x i8> %v) {
; COST-LABEL: 'smin.i8.v16i8'
; COST-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
;
%r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
@ -156,7 +156,7 @@ define i8 @smin.i8.v16i8(<16 x i8> %v) {
define i16 @smin.i16.v4i16(<4 x i16> %v) {
; COST-LABEL: 'smin.i16.v4i16'
; COST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
;
%r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
@ -165,7 +165,7 @@ define i16 @smin.i16.v4i16(<4 x i16> %v) {
define i16 @smin.i16.v8i16(<8 x i16> %v) {
; COST-LABEL: 'smin.i16.v8i16'
; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
;
%r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
@ -183,7 +183,7 @@ define i32 @smin.i32.v4i32(<4 x i32> %v) {
define i8 @smax.i8.v8i8(<8 x i8> %v) {
; COST-LABEL: 'smax.i8.v8i8'
; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
;
%r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
@ -192,7 +192,7 @@ define i8 @smax.i8.v8i8(<8 x i8> %v) {
define i8 @smax.i8.v16i8(<16 x i8> %v) {
; COST-LABEL: 'smax.i8.v16i8'
; COST-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
;
%r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
@ -201,7 +201,7 @@ define i8 @smax.i8.v16i8(<16 x i8> %v) {
define i16 @smax.i16.v4i16(<4 x i16> %v) {
; COST-LABEL: 'smax.i16.v4i16'
; COST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
;
%r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
@ -210,7 +210,7 @@ define i16 @smax.i16.v4i16(<4 x i16> %v) {
define i16 @smax.i16.v8i16(<8 x i16> %v) {
; COST-LABEL: 'smax.i16.v8i16'
; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
;
%r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)