1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

[TTI, AArch64] Allow the cost model analysis to test vector reduce intrinsics

This patch considers the experimental vector reduce intrinsics in the default
implementation of getIntrinsicInstrCost. The cost of these intrinsics is
computed with getArithmeticReductionCost and getMinMaxReductionCost. This patch
also adds a test case for AArch64 that indicates the costs we currently compute
for vector reduce intrinsics. These costs are inaccurate and will be updated in
a follow-on patch.

Differential Revision: https://reviews.llvm.org/D44489

llvm-svn: 327698
This commit is contained in:
Matthew Simpson 2018-03-16 10:00:30 +00:00
parent d21c6cef5d
commit a4d65d9cf6
2 changed files with 326 additions and 0 deletions

View File

@ -916,6 +916,20 @@ public:
RetTy, Args[0], VarMask,
Alignment);
}
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
case Intrinsic::experimental_vector_reduce_or:
case Intrinsic::experimental_vector_reduce_xor:
case Intrinsic::experimental_vector_reduce_fadd:
case Intrinsic::experimental_vector_reduce_fmul:
case Intrinsic::experimental_vector_reduce_smax:
case Intrinsic::experimental_vector_reduce_smin:
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin:
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
}
}
@ -1039,6 +1053,39 @@ public:
case Intrinsic::masked_load:
return static_cast<T *>(this)
->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
case Intrinsic::experimental_vector_reduce_add:
return static_cast<T *>(this)->getArithmeticReductionCost(
Instruction::Add, Tys[0], /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_mul:
return static_cast<T *>(this)->getArithmeticReductionCost(
Instruction::Mul, Tys[0], /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_and:
return static_cast<T *>(this)->getArithmeticReductionCost(
Instruction::And, Tys[0], /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_or:
return static_cast<T *>(this)->getArithmeticReductionCost(
Instruction::Or, Tys[0], /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_xor:
return static_cast<T *>(this)->getArithmeticReductionCost(
Instruction::Xor, Tys[0], /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_fadd:
return static_cast<T *>(this)->getArithmeticReductionCost(
Instruction::FAdd, Tys[0], /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_fmul:
return static_cast<T *>(this)->getArithmeticReductionCost(
Instruction::FMul, Tys[0], /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_smax:
case Intrinsic::experimental_vector_reduce_smin:
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin:
return static_cast<T *>(this)->getMinMaxReductionCost(
Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
/*IsSigned=*/true);
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
return static_cast<T *>(this)->getMinMaxReductionCost(
Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
/*IsSigned=*/false);
case Intrinsic::ctpop:
ISDs.push_back(ISD::CTPOP);
// In case of legalization use TCC_Expensive. This is cheaper than a

View File

@ -0,0 +1,279 @@
; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST
; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
; COST-LABEL: add.i8.v8i8
; COST: Found an estimated cost of 27 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v)
; CODE-LABEL: add.i8.v8i8
; CODE: addv b0, v0.8b
define i8 @add.i8.v8i8(<8 x i8> %v) {
%r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v)
ret i8 %r
}
; COST-LABEL: add.i8.v16i8
; COST: Found an estimated cost of 53 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v)
; CODE-LABEL: add.i8.v16i8
; CODE: addv b0, v0.16b
define i8 @add.i8.v16i8(<16 x i8> %v) {
%r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v)
ret i8 %r
}
; COST-LABEL: add.i16.v4i16
; COST: Found an estimated cost of 13 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v)
; CODE-LABEL: add.i16.v4i16
; CODE: addv h0, v0.4h
define i16 @add.i16.v4i16(<4 x i16> %v) {
%r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v)
ret i16 %r
}
; COST-LABEL: add.i16.v8i16
; COST: Found an estimated cost of 27 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v)
; CODE-LABEL: add.i16.v8i16
; CODE: addv h0, v0.8h
define i16 @add.i16.v8i16(<8 x i16> %v) {
%r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v)
ret i16 %r
}
; COST-LABEL: add.i32.v4i32
; COST: Found an estimated cost of 13 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v)
; CODE-LABEL: add.i32.v4i32
; CODE: addv s0, v0.4s
define i32 @add.i32.v4i32(<4 x i32> %v) {
%r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v)
ret i32 %r
}
; COST-LABEL: umin.i8.v8i8
; COST: Found an estimated cost of 157 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %v)
; CODE-LABEL: umin.i8.v8i8
; CODE: uminv b0, v0.8b
define i8 @umin.i8.v8i8(<8 x i8> %v) {
%r = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %v)
ret i8 %r
}
; COST-LABEL: umin.i8.v16i8
; COST: Found an estimated cost of 388 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %v)
; CODE-LABEL: umin.i8.v16i8
; CODE: uminv b0, v0.16b
define i8 @umin.i8.v16i8(<16 x i8> %v) {
%r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %v)
ret i8 %r
}
; COST-LABEL: umin.i16.v4i16
; COST: Found an estimated cost of 58 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %v)
; CODE-LABEL: umin.i16.v4i16
; CODE: uminv h0, v0.4h
define i16 @umin.i16.v4i16(<4 x i16> %v) {
%r = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %v)
ret i16 %r
}
; COST-LABEL: umin.i16.v8i16
; COST: Found an estimated cost of 157 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %v)
; CODE-LABEL: umin.i16.v8i16
; CODE: uminv h0, v0.8h
define i16 @umin.i16.v8i16(<8 x i16> %v) {
%r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %v)
ret i16 %r
}
; COST-LABEL: umin.i32.v4i32
; COST: Found an estimated cost of 58 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %v)
; CODE-LABEL: umin.i32.v4i32
; CODE: uminv s0, v0.4s
define i32 @umin.i32.v4i32(<4 x i32> %v) {
%r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %v)
ret i32 %r
}
; COST-LABEL: umax.i8.v8i8
; COST: Found an estimated cost of 157 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %v)
; CODE-LABEL: umax.i8.v8i8
; CODE: umaxv b0, v0.8b
define i8 @umax.i8.v8i8(<8 x i8> %v) {
%r = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %v)
ret i8 %r
}
; COST-LABEL: umax.i8.v16i8
; COST: Found an estimated cost of 388 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %v)
; CODE-LABEL: umax.i8.v16i8
; CODE: umaxv b0, v0.16b
define i8 @umax.i8.v16i8(<16 x i8> %v) {
%r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %v)
ret i8 %r
}
; COST-LABEL: umax.i16.v4i16
; COST: Found an estimated cost of 58 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %v)
; CODE-LABEL: umax.i16.v4i16
; CODE: umaxv h0, v0.4h
define i16 @umax.i16.v4i16(<4 x i16> %v) {
%r = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %v)
ret i16 %r
}
; COST-LABEL: umax.i16.v8i16
; COST: Found an estimated cost of 157 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %v)
; CODE-LABEL: umax.i16.v8i16
; CODE: umaxv h0, v0.8h
define i16 @umax.i16.v8i16(<8 x i16> %v) {
%r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %v)
ret i16 %r
}
; COST-LABEL: umax.i32.v4i32
; COST: Found an estimated cost of 58 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %v)
; CODE-LABEL: umax.i32.v4i32
; CODE: umaxv s0, v0.4s
define i32 @umax.i32.v4i32(<4 x i32> %v) {
%r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %v)
ret i32 %r
}
; COST-LABEL: smin.i8.v8i8
; COST: Found an estimated cost of 157 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %v)
; CODE-LABEL: smin.i8.v8i8
; CODE: sminv b0, v0.8b
define i8 @smin.i8.v8i8(<8 x i8> %v) {
%r = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %v)
ret i8 %r
}
; COST-LABEL: smin.i8.v16i8
; COST: Found an estimated cost of 388 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %v)
; CODE-LABEL: smin.i8.v16i8
; CODE: sminv b0, v0.16b
define i8 @smin.i8.v16i8(<16 x i8> %v) {
%r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %v)
ret i8 %r
}
; COST-LABEL: smin.i16.v4i16
; COST: Found an estimated cost of 58 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %v)
; CODE-LABEL: smin.i16.v4i16
; CODE: sminv h0, v0.4h
define i16 @smin.i16.v4i16(<4 x i16> %v) {
%r = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %v)
ret i16 %r
}
; COST-LABEL: smin.i16.v8i16
; COST: Found an estimated cost of 157 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %v)
; CODE-LABEL: smin.i16.v8i16
; CODE: sminv h0, v0.8h
define i16 @smin.i16.v8i16(<8 x i16> %v) {
%r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %v)
ret i16 %r
}
; COST-LABEL: smin.i32.v4i32
; COST: Found an estimated cost of 58 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %v)
; CODE-LABEL: smin.i32.v4i32
; CODE: sminv s0, v0.4s
define i32 @smin.i32.v4i32(<4 x i32> %v) {
%r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %v)
ret i32 %r
}
; COST-LABEL: smax.i8.v8i8
; COST: Found an estimated cost of 157 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %v)
; CODE-LABEL: smax.i8.v8i8
; CODE: smaxv b0, v0.8b
define i8 @smax.i8.v8i8(<8 x i8> %v) {
%r = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %v)
ret i8 %r
}
; COST-LABEL: smax.i8.v16i8
; COST: Found an estimated cost of 388 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %v)
; CODE-LABEL: smax.i8.v16i8
; CODE: smaxv b0, v0.16b
define i8 @smax.i8.v16i8(<16 x i8> %v) {
%r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %v)
ret i8 %r
}
; COST-LABEL: smax.i16.v4i16
; COST: Found an estimated cost of 58 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %v)
; CODE-LABEL: smax.i16.v4i16
; CODE: smaxv h0, v0.4h
define i16 @smax.i16.v4i16(<4 x i16> %v) {
%r = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %v)
ret i16 %r
}
; COST-LABEL: smax.i16.v8i16
; COST: Found an estimated cost of 157 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %v)
; CODE-LABEL: smax.i16.v8i16
; CODE: smaxv h0, v0.8h
define i16 @smax.i16.v8i16(<8 x i16> %v) {
%r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %v)
ret i16 %r
}
; COST-LABEL: smax.i32.v4i32
; COST: Found an estimated cost of 58 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %v)
; CODE-LABEL: smax.i32.v4i32
; CODE: smaxv s0, v0.4s
define i32 @smax.i32.v4i32(<4 x i32> %v) {
%r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %v)
ret i32 %r
}
; COST-LABEL: fmin.f32.v4f32
; COST: Found an estimated cost of 58 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %v)
; CODE-LABEL: fmin.f32.v4f32
; CODE: fminnmv s0, v0.4s
define float @fmin.f32.v4f32(<4 x float> %v) {
%r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %v)
ret float %r
}
; COST-LABEL: fmax.f32.v4f32
; COST: Found an estimated cost of 58 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %v)
; CODE-LABEL: fmax.f32.v4f32
; CODE: fmaxnmv s0, v0.4s
define float @fmax.f32.v4f32(<4 x float> %v) {
%r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %v)
ret float %r
}
declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>)
declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>)
declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
declare i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8>)
declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
declare i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>)
declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
declare i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8>)
declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
declare i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>)
declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
declare i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8>)
declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
declare i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>)
declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>)
declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>)
declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>)